Skip to main content

chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11    EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12    UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18    default_batch_size, default_center_drift_threshold, default_construction_ef,
19    default_construction_ef_spann, default_initial_lambda, default_m, default_m_spann,
20    default_merge_threshold, default_nreplica_count, default_num_centers_to_merge_to,
21    default_num_samples_kmeans, default_num_threads, default_quantize,
22    default_reassign_neighbor_count, default_resize_factor, default_search_ef,
23    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
24    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
25    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
26    HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
27    InternalUpdateCollectionConfiguration, KnnIndex, Segment, UpdateCollectionConfiguration,
28    CHROMA_KEY,
29};
30
31impl ChromaError for SchemaError {
32    fn code(&self) -> ErrorCodes {
33        match self {
34            // Internal errors (500)
35            // These indicate system/internal issues during schema operations
36            SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
37            SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
38            // DefaultsMismatch and ConfigurationConflict only occur during schema merge()
39            // which happens internally during compaction, not from user input
40            SchemaError::DefaultsMismatch => ErrorCodes::Internal,
41            SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
42            SchemaError::InvalidConfigurationUpdate { .. } => ErrorCodes::Internal,
43
44            // User/External errors (400)
45            // These indicate user-provided invalid input
46            SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
47            SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
48            SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
49            SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
50            SchemaError::Builder(e) => e.code(),
51        }
52    }
53}
54
55#[derive(Debug, Error)]
56pub enum SchemaError {
57    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
58    MissingIndexConfiguration { key: String, value_type: String },
59    #[error("Schema reconciliation failed: {reason}")]
60    InvalidSchema { reason: String },
61    #[error("Cannot set both collection config and schema simultaneously")]
62    ConfigAndSchemaConflict,
63    #[error("Cannot merge schemas with differing defaults")]
64    DefaultsMismatch,
65    #[error("Conflicting configuration for {context}")]
66    ConfigurationConflict { context: String },
67    #[error("Invalid HNSW configuration: {0}")]
68    InvalidHnswConfig(validator::ValidationErrors),
69    #[error("Invalid SPANN configuration: {0}")]
70    InvalidSpannConfig(validator::ValidationErrors),
71    #[error("Invalid schema input: {reason}")]
72    InvalidUserInput { reason: String },
73    #[error("Invalid configuration update: {message}")]
74    InvalidConfigurationUpdate { message: String },
75    #[error(transparent)]
76    Builder(#[from] SchemaBuilderError),
77}
78
79#[derive(Debug, Error)]
80pub enum SchemaBuilderError {
81    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
82    VectorIndexMustBeGlobal { key: String },
83    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
84    FtsIndexMustBeGlobal { key: String },
85    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
86    SpecialKeyModificationNotAllowed { key: String },
87    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
88    SparseVectorRequiresKey,
89    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
90    MultipleSparseVectorIndexes { existing_key: String },
91    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
92    VectorIndexDeletionNotSupported,
93    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
94    FtsIndexDeletionNotSupported,
95    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
96    SparseVectorIndexDeletionNotSupported,
97}
98
99#[derive(Debug, Error)]
100pub enum FilterValidationError {
101    #[error(
102        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
103    )]
104    IndexingDisabled {
105        key: String,
106        value_type: MetadataValueType,
107    },
108    #[error(transparent)]
109    Schema(#[from] SchemaError),
110}
111
112impl ChromaError for SchemaBuilderError {
113    fn code(&self) -> ErrorCodes {
114        ErrorCodes::InvalidArgument
115    }
116}
117
118impl ChromaError for FilterValidationError {
119    fn code(&self) -> ErrorCodes {
120        match self {
121            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
122            FilterValidationError::Schema(_) => ErrorCodes::Internal,
123        }
124    }
125}
126
127// ============================================================================
128// SCHEMA CONSTANTS
129// ============================================================================
130// These constants must match the Python constants in chromadb/api/types.py
131
132// Value type name constants
133pub const STRING_VALUE_NAME: &str = "string";
134pub const INT_VALUE_NAME: &str = "int";
135pub const BOOL_VALUE_NAME: &str = "bool";
136pub const FLOAT_VALUE_NAME: &str = "float";
137pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
138pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
139
140// Index type name constants
141pub const FTS_INDEX_NAME: &str = "fts_index";
142pub const VECTOR_INDEX_NAME: &str = "vector_index";
143pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
144pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
145pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
146pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
147pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
148
149// Special metadata keys - must match Python constants in chromadb/api/types.py
150pub const DOCUMENT_KEY: &str = "#document";
151pub const EMBEDDING_KEY: &str = "#embedding";
152
153// Static regex pattern to validate CMEK for GCP
154static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
155    Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
156        .expect("The CMEK pattern for GCP should be valid")
157});
158
159/// Customer-managed encryption key for storage encryption.
160///
161/// CMEK allows you to use your own encryption keys managed by cloud providers'
162/// key management services (KMS) instead of default provider-managed keys.
163#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
164#[serde(rename_all = "snake_case")]
165pub enum Cmek {
166    /// Google Cloud Platform KMS key resource name.
167    ///
168    /// Format: `projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{cryptoKey}`
169    Gcp(Arc<String>),
170}
171
172impl Cmek {
173    /// Create a GCP CMEK from a KMS resource name
174    ///
175    /// # Example
176    /// ```
177    /// use chroma_types::Cmek;
178    /// let cmek = Cmek::gcp(
179    ///     "projects/my-project/locations/us-central1/keyRings/my-ring/cryptoKeys/my-key".to_string()
180    /// );
181    /// ```
182    pub fn gcp(resource: String) -> Self {
183        Cmek::Gcp(Arc::new(resource))
184    }
185
186    /// Validates that the CMEK resource name matches the expected pattern.
187    ///
188    /// Returns `true` if the resource name is well-formed according to the
189    /// provider's format requirements. Does not verify that the key exists
190    /// or is accessible.
191    pub fn validate_pattern(&self) -> bool {
192        match self {
193            Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
194        }
195    }
196}
197
198impl TryFrom<chroma_proto::Cmek> for Cmek {
199    type Error = ConversionError;
200
201    fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
202        match proto.provider {
203            Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
204            None => Err(ConversionError::DecodeError),
205        }
206    }
207}
208
209impl From<Cmek> for chroma_proto::Cmek {
210    fn from(cmek: Cmek) -> Self {
211        match cmek {
212            Cmek::Gcp(resource) => chroma_proto::Cmek {
213                provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
214            },
215        }
216    }
217}
218
219// ============================================================================
220// SCHEMA STRUCTURES
221// ============================================================================
222
223/// Schema representation for collection index configurations
224///
225/// This represents the server-side schema structure used for index management
226
227#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
228#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
229pub struct Schema {
230    /// Default index configurations for each value type
231    pub defaults: ValueTypes,
232    /// Key-specific index overrides
233    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
234    #[serde(rename = "keys", alias = "key_overrides")]
235    pub keys: HashMap<String, ValueTypes>,
236    /// Customer-managed encryption key for collection data
237    #[serde(skip_serializing_if = "Option::is_none")]
238    #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
239    pub cmek: Option<Cmek>,
240    /// ID of the attached function that created this output collection (if applicable)
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub source_attached_function_id: Option<String>,
243}
244
245impl Schema {
246    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
247        if let Some(vector_update) = &configuration.vector_index {
248            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
249                Self::apply_vector_index_update(default_vector_index, vector_update);
250            }
251            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
252                Self::apply_vector_index_update(embedding_vector_index, vector_update);
253            }
254        }
255
256        if let Some(embedding_function) = configuration.embedding_function.as_ref() {
257            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
258                default_vector_index.config.embedding_function = Some(embedding_function.clone());
259            }
260            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
261                embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
262            }
263        }
264    }
265
266    /// Apply updates from UpdateCollectionConfiguration.
267    ///
268    /// Only supports updating:
269    /// - `spann`: SPANN configuration parameters (search_nprobe, ef_search)
270    /// - `embedding_function`: Embedding function configuration
271    ///
272    /// Returns an error if:
273    /// - `hnsw` is provided (HNSW updates are not supported)
274    /// - Schema is missing expected structure (defaults/embedding vector index or spann config)
275    pub fn apply_update_configuration(
276        &mut self,
277        config: &UpdateCollectionConfiguration,
278    ) -> Result<(), SchemaError> {
279        // HNSW updates are not allowed
280        if config.hnsw.is_some() {
281            return Err(SchemaError::InvalidConfigurationUpdate {
282                message: "HNSW configuration updates are not supported".to_string(),
283            });
284        }
285
286        // Apply spann updates
287        if let Some(ref spann_update) = config.spann {
288            let defaults_spann = self
289                .defaults_vector_index_mut()
290                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
291                    message: "schema missing defaults.float_list.vector_index".to_string(),
292                })?
293                .config
294                .spann
295                .as_mut()
296                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
297                    message: "schema missing defaults spann config".to_string(),
298                })?;
299
300            if let Some(search_nprobe) = spann_update.search_nprobe {
301                defaults_spann.search_nprobe = Some(search_nprobe);
302            }
303            if let Some(ef_search) = spann_update.ef_search {
304                defaults_spann.ef_search = Some(ef_search);
305            }
306
307            let embedding_spann = self
308                .embedding_vector_index_mut()
309                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
310                    message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
311                })?
312                .config
313                .spann
314                .as_mut()
315                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
316                    message: "schema missing #embedding spann config".to_string(),
317                })?;
318
319            if let Some(search_nprobe) = spann_update.search_nprobe {
320                embedding_spann.search_nprobe = Some(search_nprobe);
321            }
322            if let Some(ef_search) = spann_update.ef_search {
323                embedding_spann.ef_search = Some(ef_search);
324            }
325        }
326
327        // Apply embedding function updates
328        if let Some(ref ef) = config.embedding_function {
329            self.defaults_vector_index_mut()
330                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
331                    message: "schema missing defaults.float_list.vector_index".to_string(),
332                })?
333                .config
334                .embedding_function = Some(ef.clone());
335
336            self.embedding_vector_index_mut()
337                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
338                    message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
339                })?
340                .config
341                .embedding_function = Some(ef.clone());
342        }
343
344        Ok(())
345    }
346
347    fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
348        self.defaults
349            .float_list
350            .as_mut()
351            .and_then(|float_list| float_list.vector_index.as_mut())
352    }
353
354    fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
355        self.keys
356            .get_mut(EMBEDDING_KEY)
357            .and_then(|value_types| value_types.float_list.as_mut())
358            .and_then(|float_list| float_list.vector_index.as_mut())
359    }
360
361    fn apply_vector_index_update(
362        vector_index: &mut VectorIndexType,
363        update: &UpdateVectorIndexConfiguration,
364    ) {
365        match update {
366            UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
367                if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
368                    if let Some(ef_search) = hnsw_update.ef_search {
369                        hnsw_config.ef_search = Some(ef_search);
370                    }
371                    if let Some(max_neighbors) = hnsw_update.max_neighbors {
372                        hnsw_config.max_neighbors = Some(max_neighbors);
373                    }
374                    if let Some(num_threads) = hnsw_update.num_threads {
375                        hnsw_config.num_threads = Some(num_threads);
376                    }
377                    if let Some(resize_factor) = hnsw_update.resize_factor {
378                        hnsw_config.resize_factor = Some(resize_factor);
379                    }
380                    if let Some(sync_threshold) = hnsw_update.sync_threshold {
381                        hnsw_config.sync_threshold = Some(sync_threshold);
382                    }
383                    if let Some(batch_size) = hnsw_update.batch_size {
384                        hnsw_config.batch_size = Some(batch_size);
385                    }
386                }
387            }
388            UpdateVectorIndexConfiguration::Hnsw(None) => {}
389            UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
390                if let Some(spann_config) = vector_index.config.spann.as_mut() {
391                    if let Some(search_nprobe) = spann_update.search_nprobe {
392                        spann_config.search_nprobe = Some(search_nprobe);
393                    }
394                    if let Some(ef_search) = spann_update.ef_search {
395                        spann_config.ef_search = Some(ef_search);
396                    }
397                }
398            }
399            UpdateVectorIndexConfiguration::Spann(None) => {}
400        }
401    }
402
403    pub fn is_sparse_index_enabled(&self) -> bool {
404        let defaults_enabled = self
405            .defaults
406            .sparse_vector
407            .as_ref()
408            .and_then(|sv| sv.sparse_vector_index.as_ref())
409            .is_some_and(|idx| idx.enabled);
410        let key_enabled = self.keys.values().any(|value_types| {
411            value_types
412                .sparse_vector
413                .as_ref()
414                .and_then(|sv| sv.sparse_vector_index.as_ref())
415                .is_some_and(|idx| idx.enabled)
416        });
417        defaults_enabled || key_enabled
418    }
419}
420
421impl Default for Schema {
422    /// Create a default Schema that matches Python's behavior exactly.
423    ///
424    /// Python creates a Schema with:
425    /// - All inverted indexes enabled by default (string, int, float, bool)
426    /// - Vector and FTS indexes disabled in defaults
427    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
428    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
429    ///
430    /// # Examples
431    /// ```
432    /// use chroma_types::Schema;
433    ///
434    /// let schema = Schema::default();
435    /// assert!(schema.keys.contains_key("#document"));
436    /// assert!(schema.keys.contains_key("#embedding"));
437    /// ```
438    fn default() -> Self {
439        // Initialize defaults - match Python's _initialize_defaults()
440        let defaults = ValueTypes {
441            string: Some(StringValueType {
442                fts_index: Some(FtsIndexType {
443                    enabled: false,
444                    config: FtsIndexConfig {},
445                }),
446                string_inverted_index: Some(StringInvertedIndexType {
447                    enabled: true,
448                    config: StringInvertedIndexConfig {},
449                }),
450            }),
451            float_list: Some(FloatListValueType {
452                vector_index: Some(VectorIndexType {
453                    enabled: false,
454                    config: VectorIndexConfig {
455                        space: None, // Python leaves as None (resolved on serialization)
456                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
457                        source_key: None,
458                        hnsw: None,  // Python doesn't specify
459                        spann: None, // Python doesn't specify
460                    },
461                }),
462            }),
463            sparse_vector: Some(SparseVectorValueType {
464                sparse_vector_index: Some(SparseVectorIndexType {
465                    enabled: false,
466                    config: SparseVectorIndexConfig {
467                        embedding_function: None,
468                        source_key: None,
469                        bm25: None,
470                    },
471                }),
472            }),
473            int: Some(IntValueType {
474                int_inverted_index: Some(IntInvertedIndexType {
475                    enabled: true,
476                    config: IntInvertedIndexConfig {},
477                }),
478            }),
479            float: Some(FloatValueType {
480                float_inverted_index: Some(FloatInvertedIndexType {
481                    enabled: true,
482                    config: FloatInvertedIndexConfig {},
483                }),
484            }),
485            boolean: Some(BoolValueType {
486                bool_inverted_index: Some(BoolInvertedIndexType {
487                    enabled: true,
488                    config: BoolInvertedIndexConfig {},
489                }),
490            }),
491        };
492
493        // Initialize key-specific overrides - match Python's _initialize_keys()
494        let mut keys = HashMap::new();
495
496        // #document: FTS enabled, string inverted disabled
497        keys.insert(
498            DOCUMENT_KEY.to_string(),
499            ValueTypes {
500                string: Some(StringValueType {
501                    fts_index: Some(FtsIndexType {
502                        enabled: true,
503                        config: FtsIndexConfig {},
504                    }),
505                    string_inverted_index: Some(StringInvertedIndexType {
506                        enabled: false,
507                        config: StringInvertedIndexConfig {},
508                    }),
509                }),
510                ..Default::default()
511            },
512        );
513
514        // #embedding: Vector index enabled with source_key=#document
515        keys.insert(
516            EMBEDDING_KEY.to_string(),
517            ValueTypes {
518                float_list: Some(FloatListValueType {
519                    vector_index: Some(VectorIndexType {
520                        enabled: true,
521                        config: VectorIndexConfig {
522                            space: None, // Python leaves as None (resolved on serialization)
523                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
524                            source_key: Some(DOCUMENT_KEY.to_string()),
525                            hnsw: None,  // Python doesn't specify
526                            spann: None, // Python doesn't specify
527                        },
528                    }),
529                }),
530                ..Default::default()
531            },
532        );
533
534        Schema {
535            defaults,
536            keys,
537            cmek: None,
538            source_attached_function_id: None,
539        }
540    }
541}
542
543pub fn is_embedding_function_default(
544    embedding_function: &Option<EmbeddingFunctionConfiguration>,
545) -> bool {
546    match embedding_function {
547        None => true,
548        Some(embedding_function) => embedding_function.is_default(),
549    }
550}
551
552/// Check if space is default (None means default, or if present, should be default space)
553pub fn is_space_default(space: &Option<Space>) -> bool {
554    match space {
555        None => true,                     // None means default
556        Some(s) => *s == default_space(), // If present, check if it's the default space
557    }
558}
559
560/// Check if HNSW config is default
561pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
562    hnsw_config.ef_construction == Some(default_construction_ef())
563        && hnsw_config.ef_search == Some(default_search_ef())
564        && hnsw_config.max_neighbors == Some(default_m())
565        && hnsw_config.num_threads == Some(default_num_threads())
566        && hnsw_config.batch_size == Some(default_batch_size())
567        && hnsw_config.sync_threshold == Some(default_sync_threshold())
568        && hnsw_config.resize_factor == Some(default_resize_factor())
569}
570
571// ============================================================================
572// NEW STRONGLY-TYPED SCHEMA STRUCTURES
573// ============================================================================
574
575/// Strongly-typed value type configurations
576/// Contains optional configurations for each supported value type
577#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
578#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
579pub struct ValueTypes {
580    #[serde(
581        rename = "string",
582        alias = "#string",
583        skip_serializing_if = "Option::is_none"
584    )] // STRING_VALUE_NAME
585    pub string: Option<StringValueType>,
586
587    #[serde(
588        rename = "float_list",
589        alias = "#float_list",
590        skip_serializing_if = "Option::is_none"
591    )]
592    // FLOAT_LIST_VALUE_NAME
593    pub float_list: Option<FloatListValueType>,
594
595    #[serde(
596        rename = "sparse_vector",
597        alias = "#sparse_vector",
598        skip_serializing_if = "Option::is_none"
599    )]
600    // SPARSE_VECTOR_VALUE_NAME
601    pub sparse_vector: Option<SparseVectorValueType>,
602
603    #[serde(
604        rename = "int",
605        alias = "#int",
606        skip_serializing_if = "Option::is_none"
607    )] // INT_VALUE_NAME
608    pub int: Option<IntValueType>,
609
610    #[serde(
611        rename = "float",
612        alias = "#float",
613        skip_serializing_if = "Option::is_none"
614    )] // FLOAT_VALUE_NAME
615    pub float: Option<FloatValueType>,
616
617    #[serde(
618        rename = "bool",
619        alias = "#bool",
620        skip_serializing_if = "Option::is_none"
621    )] // BOOL_VALUE_NAME
622    pub boolean: Option<BoolValueType>,
623}
624
625/// String value type index configurations
626#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
627#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
628pub struct StringValueType {
629    #[serde(
630        rename = "fts_index",
631        alias = "$fts_index",
632        skip_serializing_if = "Option::is_none"
633    )] // FTS_INDEX_NAME
634    pub fts_index: Option<FtsIndexType>,
635
636    #[serde(
637        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
638        alias = "$string_inverted_index",
639        skip_serializing_if = "Option::is_none"
640    )]
641    pub string_inverted_index: Option<StringInvertedIndexType>,
642}
643
644/// Float list value type index configurations (for vectors)
645#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
646#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
647pub struct FloatListValueType {
648    #[serde(
649        rename = "vector_index",
650        alias = "$vector_index",
651        skip_serializing_if = "Option::is_none"
652    )] // VECTOR_INDEX_NAME
653    pub vector_index: Option<VectorIndexType>,
654}
655
656/// Sparse vector value type index configurations
657#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
658#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
659pub struct SparseVectorValueType {
660    #[serde(
661        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
662        alias = "$sparse_vector_index",
663        skip_serializing_if = "Option::is_none"
664    )]
665    pub sparse_vector_index: Option<SparseVectorIndexType>,
666}
667
668/// Integer value type index configurations
669#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
671pub struct IntValueType {
672    #[serde(
673        rename = "int_inverted_index",
674        alias = "$int_inverted_index",
675        skip_serializing_if = "Option::is_none"
676    )]
677    // INT_INVERTED_INDEX_NAME
678    pub int_inverted_index: Option<IntInvertedIndexType>,
679}
680
681/// Float value type index configurations
682#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
683#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
684pub struct FloatValueType {
685    #[serde(
686        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
687        alias = "$float_inverted_index",
688        skip_serializing_if = "Option::is_none"
689    )]
690    pub float_inverted_index: Option<FloatInvertedIndexType>,
691}
692
693/// Boolean value type index configurations
694#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
695#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
696pub struct BoolValueType {
697    #[serde(
698        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
699        alias = "$bool_inverted_index",
700        skip_serializing_if = "Option::is_none"
701    )]
702    pub bool_inverted_index: Option<BoolInvertedIndexType>,
703}
704
705// Individual index type structs with enabled status and config
706#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
707#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
708pub struct FtsIndexType {
709    pub enabled: bool,
710    pub config: FtsIndexConfig,
711}
712
713#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
714#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
715pub struct VectorIndexType {
716    pub enabled: bool,
717    pub config: VectorIndexConfig,
718}
719
720#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
721#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
722pub struct SparseVectorIndexType {
723    pub enabled: bool,
724    pub config: SparseVectorIndexConfig,
725}
726
727#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
728#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
729pub struct StringInvertedIndexType {
730    pub enabled: bool,
731    pub config: StringInvertedIndexConfig,
732}
733
734#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
735#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
736pub struct IntInvertedIndexType {
737    pub enabled: bool,
738    pub config: IntInvertedIndexConfig,
739}
740
741#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
742#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
743pub struct FloatInvertedIndexType {
744    pub enabled: bool,
745    pub config: FloatInvertedIndexConfig,
746}
747
748#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
749#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
750pub struct BoolInvertedIndexType {
751    pub enabled: bool,
752    pub config: BoolInvertedIndexConfig,
753}
754
755impl Schema {
756    /// Create a new Schema with strongly-typed default configurations
757    pub fn new_default(default_knn_index: KnnIndex) -> Self {
758        // Vector index disabled on all keys except #embedding.
759        let vector_config = VectorIndexType {
760            enabled: false,
761            config: VectorIndexConfig {
762                space: Some(default_space()),
763                embedding_function: None,
764                source_key: None,
765                hnsw: match default_knn_index {
766                    KnnIndex::Hnsw => Some(HnswIndexConfig {
767                        ef_construction: Some(default_construction_ef()),
768                        max_neighbors: Some(default_m()),
769                        ef_search: Some(default_search_ef()),
770                        num_threads: Some(default_num_threads()),
771                        batch_size: Some(default_batch_size()),
772                        sync_threshold: Some(default_sync_threshold()),
773                        resize_factor: Some(default_resize_factor()),
774                    }),
775                    KnnIndex::Spann => None,
776                },
777                spann: match default_knn_index {
778                    KnnIndex::Hnsw => None,
779                    KnnIndex::Spann => Some(SpannIndexConfig {
780                        search_nprobe: Some(default_search_nprobe()),
781                        search_rng_factor: Some(default_search_rng_factor()),
782                        search_rng_epsilon: Some(default_search_rng_epsilon()),
783                        nreplica_count: Some(default_nreplica_count()),
784                        write_rng_factor: Some(default_write_rng_factor()),
785                        write_rng_epsilon: Some(default_write_rng_epsilon()),
786                        split_threshold: Some(default_split_threshold()),
787                        num_samples_kmeans: Some(default_num_samples_kmeans()),
788                        initial_lambda: Some(default_initial_lambda()),
789                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
790                        merge_threshold: Some(default_merge_threshold()),
791                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
792                        write_nprobe: Some(default_write_nprobe()),
793                        ef_construction: Some(default_construction_ef_spann()),
794                        ef_search: Some(default_search_ef_spann()),
795                        max_neighbors: Some(default_m_spann()),
796                        center_drift_threshold: None,
797                        quantize: default_quantize(),
798                    }),
799                },
800            },
801        };
802
803        // Initialize defaults struct directly instead of using Default::default() + field assignments
804        let defaults = ValueTypes {
805            string: Some(StringValueType {
806                string_inverted_index: Some(StringInvertedIndexType {
807                    enabled: true,
808                    config: StringInvertedIndexConfig {},
809                }),
810                fts_index: Some(FtsIndexType {
811                    enabled: false,
812                    config: FtsIndexConfig {},
813                }),
814            }),
815            float: Some(FloatValueType {
816                float_inverted_index: Some(FloatInvertedIndexType {
817                    enabled: true,
818                    config: FloatInvertedIndexConfig {},
819                }),
820            }),
821            int: Some(IntValueType {
822                int_inverted_index: Some(IntInvertedIndexType {
823                    enabled: true,
824                    config: IntInvertedIndexConfig {},
825                }),
826            }),
827            boolean: Some(BoolValueType {
828                bool_inverted_index: Some(BoolInvertedIndexType {
829                    enabled: true,
830                    config: BoolInvertedIndexConfig {},
831                }),
832            }),
833            float_list: Some(FloatListValueType {
834                vector_index: Some(vector_config),
835            }),
836            sparse_vector: Some(SparseVectorValueType {
837                sparse_vector_index: Some(SparseVectorIndexType {
838                    enabled: false,
839                    config: SparseVectorIndexConfig {
840                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
841                        source_key: None,
842                        bm25: Some(false),
843                    },
844                }),
845            }),
846        };
847
848        // Set up key overrides
849        let mut keys = HashMap::new();
850
851        // Enable vector index for #embedding.
852        let embedding_defaults = ValueTypes {
853            float_list: Some(FloatListValueType {
854                vector_index: Some(VectorIndexType {
855                    enabled: true,
856                    config: VectorIndexConfig {
857                        space: Some(default_space()),
858                        embedding_function: None,
859                        source_key: Some(DOCUMENT_KEY.to_string()),
860                        hnsw: match default_knn_index {
861                            KnnIndex::Hnsw => Some(HnswIndexConfig {
862                                ef_construction: Some(default_construction_ef()),
863                                max_neighbors: Some(default_m()),
864                                ef_search: Some(default_search_ef()),
865                                num_threads: Some(default_num_threads()),
866                                batch_size: Some(default_batch_size()),
867                                sync_threshold: Some(default_sync_threshold()),
868                                resize_factor: Some(default_resize_factor()),
869                            }),
870                            KnnIndex::Spann => None,
871                        },
872                        spann: match default_knn_index {
873                            KnnIndex::Hnsw => None,
874                            KnnIndex::Spann => Some(SpannIndexConfig {
875                                search_nprobe: Some(default_search_nprobe()),
876                                search_rng_factor: Some(default_search_rng_factor()),
877                                search_rng_epsilon: Some(default_search_rng_epsilon()),
878                                nreplica_count: Some(default_nreplica_count()),
879                                write_rng_factor: Some(default_write_rng_factor()),
880                                write_rng_epsilon: Some(default_write_rng_epsilon()),
881                                split_threshold: Some(default_split_threshold()),
882                                num_samples_kmeans: Some(default_num_samples_kmeans()),
883                                initial_lambda: Some(default_initial_lambda()),
884                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
885                                merge_threshold: Some(default_merge_threshold()),
886                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
887                                write_nprobe: Some(default_write_nprobe()),
888                                ef_construction: Some(default_construction_ef_spann()),
889                                ef_search: Some(default_search_ef_spann()),
890                                max_neighbors: Some(default_m_spann()),
891                                center_drift_threshold: None,
892                                quantize: default_quantize(),
893                            }),
894                        },
895                    },
896                }),
897            }),
898            ..Default::default()
899        };
900        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
901
902        // Document defaults - initialize directly instead of Default::default() + field assignment
903        let document_defaults = ValueTypes {
904            string: Some(StringValueType {
905                fts_index: Some(FtsIndexType {
906                    enabled: true,
907                    config: FtsIndexConfig {},
908                }),
909                string_inverted_index: Some(StringInvertedIndexType {
910                    enabled: false,
911                    config: StringInvertedIndexConfig {},
912                }),
913            }),
914            ..Default::default()
915        };
916        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
917
918        Schema {
919            defaults,
920            keys,
921            cmek: None,
922            source_attached_function_id: None,
923        }
924    }
925
926    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
927        let to_internal = |vector_index: &VectorIndexType| {
928            let space = vector_index.config.space.clone();
929            vector_index
930                .config
931                .spann
932                .clone()
933                .map(|config| (space.as_ref(), &config).into())
934        };
935
936        self.keys
937            .get(EMBEDDING_KEY)
938            .and_then(|value_types| value_types.float_list.as_ref())
939            .and_then(|float_list| float_list.vector_index.as_ref())
940            .and_then(to_internal)
941            .or_else(|| {
942                self.defaults
943                    .float_list
944                    .as_ref()
945                    .and_then(|float_list| float_list.vector_index.as_ref())
946                    .and_then(to_internal)
947            })
948    }
949
950    /// Check if quantization is enabled in the SPANN index configuration
951    pub fn is_quantization_enabled(&self) -> bool {
952        let check_spann = |vector_index: &VectorIndexType| {
953            vector_index
954                .config
955                .spann
956                .as_ref()
957                .map(|config| config.quantize)
958                .unwrap_or(false)
959        };
960
961        self.keys
962            .get(EMBEDDING_KEY)
963            .and_then(|value_types| value_types.float_list.as_ref())
964            .and_then(|float_list| float_list.vector_index.as_ref())
965            .map(check_spann)
966            .unwrap_or_else(|| {
967                self.defaults
968                    .float_list
969                    .as_ref()
970                    .and_then(|float_list| float_list.vector_index.as_ref())
971                    .map(check_spann)
972                    .unwrap_or(false)
973            })
974    }
975
976    /// Get a mutable reference to the SPANN index configuration
977    /// Checks the #embedding key first, then falls back to defaults
978    pub fn get_spann_config_mut(&mut self) -> Option<&mut SpannIndexConfig> {
979        // Try #embedding key first
980        if let Some(value_types) = self.keys.get_mut(EMBEDDING_KEY) {
981            if let Some(float_list) = &mut value_types.float_list {
982                if let Some(vector_index) = &mut float_list.vector_index {
983                    if let Some(spann_config) = &mut vector_index.config.spann {
984                        return Some(spann_config);
985                    }
986                }
987            }
988        }
989
990        // Fall back to defaults
991        if let Some(float_list) = &mut self.defaults.float_list {
992            if let Some(vector_index) = &mut float_list.vector_index {
993                if let Some(spann_config) = &mut vector_index.config.spann {
994                    return Some(spann_config);
995                }
996            }
997        }
998
999        None
1000    }
1001
1002    pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
1003        let to_internal = |vector_index: &VectorIndexType| {
1004            if vector_index.config.spann.is_some() {
1005                return None;
1006            }
1007            let space = vector_index.config.space.as_ref();
1008            let hnsw_config = vector_index.config.hnsw.as_ref();
1009            Some((space, hnsw_config).into())
1010        };
1011
1012        self.keys
1013            .get(EMBEDDING_KEY)
1014            .and_then(|value_types| value_types.float_list.as_ref())
1015            .and_then(|float_list| float_list.vector_index.as_ref())
1016            .and_then(to_internal)
1017            .or_else(|| {
1018                self.defaults
1019                    .float_list
1020                    .as_ref()
1021                    .and_then(|float_list| float_list.vector_index.as_ref())
1022                    .and_then(to_internal)
1023            })
1024    }
1025
1026    pub fn get_internal_hnsw_config_with_legacy_fallback(
1027        &self,
1028        segment: &Segment,
1029    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
1030        if let Some(config) = self.get_internal_hnsw_config() {
1031            let config_from_metadata =
1032                InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
1033
1034            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
1035                return Ok(Some(config_from_metadata));
1036            }
1037
1038            return Ok(Some(config));
1039        }
1040
1041        Ok(None)
1042    }
1043
1044    /// Reconcile user-provided schema with system defaults
1045    ///
1046    /// This method merges user configurations with system defaults, ensuring that:
1047    /// - User overrides take precedence over defaults
1048    /// - Missing user configurations fall back to system defaults
1049    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
1050    pub fn reconcile_with_defaults(
1051        user_schema: Option<&Schema>,
1052        knn_index: KnnIndex,
1053    ) -> Result<Self, SchemaError> {
1054        let default_schema = Schema::new_default(knn_index);
1055
1056        match user_schema {
1057            Some(user) => {
1058                // Merge defaults with user overrides
1059                let merged_defaults =
1060                    Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
1061
1062                // Merge key overrides
1063                let mut merged_keys = default_schema.keys.clone();
1064                for (key, user_value_types) in &user.keys {
1065                    if let Some(default_value_types) = merged_keys.get(key) {
1066                        // Merge with existing default key override
1067                        let merged_value_types = Self::merge_value_types(
1068                            default_value_types,
1069                            user_value_types,
1070                            knn_index,
1071                        )?;
1072                        merged_keys.insert(key.clone(), merged_value_types);
1073                    } else {
1074                        // New key override from user
1075                        merged_keys.insert(key.clone(), user_value_types.clone());
1076                    }
1077                }
1078
1079                Ok(Schema {
1080                    defaults: merged_defaults,
1081                    keys: merged_keys,
1082                    cmek: user.cmek.clone().or(default_schema.cmek.clone()),
1083                    source_attached_function_id: user
1084                        .source_attached_function_id
1085                        .clone()
1086                        .or(default_schema.source_attached_function_id.clone()),
1087                })
1088            }
1089            None => Ok(default_schema),
1090        }
1091    }
1092
1093    /// Merge two schemas together, combining key overrides when possible.
1094    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
1095        if self.defaults != other.defaults {
1096            return Err(SchemaError::DefaultsMismatch);
1097        }
1098
1099        let mut keys = self.keys.clone();
1100
1101        for (key, other_value_types) in &other.keys {
1102            if let Some(existing) = keys.get(key).cloned() {
1103                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
1104                keys.insert(key.clone(), merged);
1105            } else {
1106                keys.insert(key.clone(), other_value_types.clone());
1107            }
1108        }
1109
1110        Ok(Schema {
1111            defaults: self.defaults.clone(),
1112            keys,
1113            cmek: other.cmek.clone().or(self.cmek.clone()),
1114            source_attached_function_id: other
1115                .source_attached_function_id
1116                .clone()
1117                .or(self.source_attached_function_id.clone()),
1118        })
1119    }
1120
1121    fn merge_override_value_types(
1122        key: &str,
1123        left: &ValueTypes,
1124        right: &ValueTypes,
1125    ) -> Result<ValueTypes, SchemaError> {
1126        Ok(ValueTypes {
1127            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
1128            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
1129            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
1130            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
1131            float_list: Self::merge_float_list_override(
1132                key,
1133                left.float_list.as_ref(),
1134                right.float_list.as_ref(),
1135            )?,
1136            sparse_vector: Self::merge_sparse_vector_override(
1137                key,
1138                left.sparse_vector.as_ref(),
1139                right.sparse_vector.as_ref(),
1140            )?,
1141        })
1142    }
1143
1144    fn merge_string_override(
1145        key: &str,
1146        left: Option<&StringValueType>,
1147        right: Option<&StringValueType>,
1148    ) -> Result<Option<StringValueType>, SchemaError> {
1149        match (left, right) {
1150            (Some(l), Some(r)) => Ok(Some(StringValueType {
1151                string_inverted_index: Self::merge_index_or_error(
1152                    l.string_inverted_index.as_ref(),
1153                    r.string_inverted_index.as_ref(),
1154                    &format!("key '{key}' string.string_inverted_index"),
1155                )?,
1156                fts_index: Self::merge_index_or_error(
1157                    l.fts_index.as_ref(),
1158                    r.fts_index.as_ref(),
1159                    &format!("key '{key}' string.fts_index"),
1160                )?,
1161            })),
1162            (Some(l), None) => Ok(Some(l.clone())),
1163            (None, Some(r)) => Ok(Some(r.clone())),
1164            (None, None) => Ok(None),
1165        }
1166    }
1167
1168    fn merge_float_override(
1169        key: &str,
1170        left: Option<&FloatValueType>,
1171        right: Option<&FloatValueType>,
1172    ) -> Result<Option<FloatValueType>, SchemaError> {
1173        match (left, right) {
1174            (Some(l), Some(r)) => Ok(Some(FloatValueType {
1175                float_inverted_index: Self::merge_index_or_error(
1176                    l.float_inverted_index.as_ref(),
1177                    r.float_inverted_index.as_ref(),
1178                    &format!("key '{key}' float.float_inverted_index"),
1179                )?,
1180            })),
1181            (Some(l), None) => Ok(Some(l.clone())),
1182            (None, Some(r)) => Ok(Some(r.clone())),
1183            (None, None) => Ok(None),
1184        }
1185    }
1186
1187    fn merge_int_override(
1188        key: &str,
1189        left: Option<&IntValueType>,
1190        right: Option<&IntValueType>,
1191    ) -> Result<Option<IntValueType>, SchemaError> {
1192        match (left, right) {
1193            (Some(l), Some(r)) => Ok(Some(IntValueType {
1194                int_inverted_index: Self::merge_index_or_error(
1195                    l.int_inverted_index.as_ref(),
1196                    r.int_inverted_index.as_ref(),
1197                    &format!("key '{key}' int.int_inverted_index"),
1198                )?,
1199            })),
1200            (Some(l), None) => Ok(Some(l.clone())),
1201            (None, Some(r)) => Ok(Some(r.clone())),
1202            (None, None) => Ok(None),
1203        }
1204    }
1205
1206    fn merge_bool_override(
1207        key: &str,
1208        left: Option<&BoolValueType>,
1209        right: Option<&BoolValueType>,
1210    ) -> Result<Option<BoolValueType>, SchemaError> {
1211        match (left, right) {
1212            (Some(l), Some(r)) => Ok(Some(BoolValueType {
1213                bool_inverted_index: Self::merge_index_or_error(
1214                    l.bool_inverted_index.as_ref(),
1215                    r.bool_inverted_index.as_ref(),
1216                    &format!("key '{key}' bool.bool_inverted_index"),
1217                )?,
1218            })),
1219            (Some(l), None) => Ok(Some(l.clone())),
1220            (None, Some(r)) => Ok(Some(r.clone())),
1221            (None, None) => Ok(None),
1222        }
1223    }
1224
1225    fn merge_float_list_override(
1226        key: &str,
1227        left: Option<&FloatListValueType>,
1228        right: Option<&FloatListValueType>,
1229    ) -> Result<Option<FloatListValueType>, SchemaError> {
1230        match (left, right) {
1231            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1232                vector_index: Self::merge_index_or_error(
1233                    l.vector_index.as_ref(),
1234                    r.vector_index.as_ref(),
1235                    &format!("key '{key}' float_list.vector_index"),
1236                )?,
1237            })),
1238            (Some(l), None) => Ok(Some(l.clone())),
1239            (None, Some(r)) => Ok(Some(r.clone())),
1240            (None, None) => Ok(None),
1241        }
1242    }
1243
1244    fn merge_sparse_vector_override(
1245        key: &str,
1246        left: Option<&SparseVectorValueType>,
1247        right: Option<&SparseVectorValueType>,
1248    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1249        match (left, right) {
1250            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1251                sparse_vector_index: Self::merge_index_or_error(
1252                    l.sparse_vector_index.as_ref(),
1253                    r.sparse_vector_index.as_ref(),
1254                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
1255                )?,
1256            })),
1257            (Some(l), None) => Ok(Some(l.clone())),
1258            (None, Some(r)) => Ok(Some(r.clone())),
1259            (None, None) => Ok(None),
1260        }
1261    }
1262
1263    fn merge_index_or_error<T: Clone + PartialEq>(
1264        left: Option<&T>,
1265        right: Option<&T>,
1266        context: &str,
1267    ) -> Result<Option<T>, SchemaError> {
1268        match (left, right) {
1269            (Some(l), Some(r)) => {
1270                if l == r {
1271                    Ok(Some(l.clone()))
1272                } else {
1273                    Err(SchemaError::ConfigurationConflict {
1274                        context: context.to_string(),
1275                    })
1276                }
1277            }
1278            (Some(l), None) => Ok(Some(l.clone())),
1279            (None, Some(r)) => Ok(Some(r.clone())),
1280            (None, None) => Ok(None),
1281        }
1282    }
1283
1284    /// Merge two ValueTypes with field-level merging
1285    /// User values take precedence over default values
1286    fn merge_value_types(
1287        default: &ValueTypes,
1288        user: &ValueTypes,
1289        knn_index: KnnIndex,
1290    ) -> Result<ValueTypes, SchemaError> {
1291        // Merge float_list first
1292        let float_list = Self::merge_float_list_type(
1293            default.float_list.as_ref(),
1294            user.float_list.as_ref(),
1295            knn_index,
1296        )?;
1297
1298        // Validate the merged float_list (covers all merge cases)
1299        if let Some(ref fl) = float_list {
1300            Self::validate_float_list_value_type(fl)?;
1301        }
1302
1303        Ok(ValueTypes {
1304            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1305            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1306            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1307            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1308            float_list,
1309            sparse_vector: Self::merge_sparse_vector_type(
1310                default.sparse_vector.as_ref(),
1311                user.sparse_vector.as_ref(),
1312            )?,
1313        })
1314    }
1315
1316    /// Merge StringValueType configurations
1317    fn merge_string_type(
1318        default: Option<&StringValueType>,
1319        user: Option<&StringValueType>,
1320    ) -> Result<Option<StringValueType>, SchemaError> {
1321        match (default, user) {
1322            (Some(default), Some(user)) => Ok(Some(StringValueType {
1323                string_inverted_index: Self::merge_string_inverted_index_type(
1324                    default.string_inverted_index.as_ref(),
1325                    user.string_inverted_index.as_ref(),
1326                )?,
1327                fts_index: Self::merge_fts_index_type(
1328                    default.fts_index.as_ref(),
1329                    user.fts_index.as_ref(),
1330                )?,
1331            })),
1332            (Some(default), None) => Ok(Some(default.clone())),
1333            (None, Some(user)) => Ok(Some(user.clone())),
1334            (None, None) => Ok(None),
1335        }
1336    }
1337
1338    /// Merge FloatValueType configurations
1339    fn merge_float_type(
1340        default: Option<&FloatValueType>,
1341        user: Option<&FloatValueType>,
1342    ) -> Result<Option<FloatValueType>, SchemaError> {
1343        match (default, user) {
1344            (Some(default), Some(user)) => Ok(Some(FloatValueType {
1345                float_inverted_index: Self::merge_float_inverted_index_type(
1346                    default.float_inverted_index.as_ref(),
1347                    user.float_inverted_index.as_ref(),
1348                )?,
1349            })),
1350            (Some(default), None) => Ok(Some(default.clone())),
1351            (None, Some(user)) => Ok(Some(user.clone())),
1352            (None, None) => Ok(None),
1353        }
1354    }
1355
1356    /// Merge IntValueType configurations
1357    fn merge_int_type(
1358        default: Option<&IntValueType>,
1359        user: Option<&IntValueType>,
1360    ) -> Result<Option<IntValueType>, SchemaError> {
1361        match (default, user) {
1362            (Some(default), Some(user)) => Ok(Some(IntValueType {
1363                int_inverted_index: Self::merge_int_inverted_index_type(
1364                    default.int_inverted_index.as_ref(),
1365                    user.int_inverted_index.as_ref(),
1366                )?,
1367            })),
1368            (Some(default), None) => Ok(Some(default.clone())),
1369            (None, Some(user)) => Ok(Some(user.clone())),
1370            (None, None) => Ok(None),
1371        }
1372    }
1373
1374    /// Merge BoolValueType configurations
1375    fn merge_bool_type(
1376        default: Option<&BoolValueType>,
1377        user: Option<&BoolValueType>,
1378    ) -> Result<Option<BoolValueType>, SchemaError> {
1379        match (default, user) {
1380            (Some(default), Some(user)) => Ok(Some(BoolValueType {
1381                bool_inverted_index: Self::merge_bool_inverted_index_type(
1382                    default.bool_inverted_index.as_ref(),
1383                    user.bool_inverted_index.as_ref(),
1384                )?,
1385            })),
1386            (Some(default), None) => Ok(Some(default.clone())),
1387            (None, Some(user)) => Ok(Some(user.clone())),
1388            (None, None) => Ok(None),
1389        }
1390    }
1391
1392    /// Merge FloatListValueType configurations
1393    fn merge_float_list_type(
1394        default: Option<&FloatListValueType>,
1395        user: Option<&FloatListValueType>,
1396        knn_index: KnnIndex,
1397    ) -> Result<Option<FloatListValueType>, SchemaError> {
1398        match (default, user) {
1399            (Some(default), Some(user)) => Ok(Some(FloatListValueType {
1400                vector_index: Self::merge_vector_index_type(
1401                    default.vector_index.as_ref(),
1402                    user.vector_index.as_ref(),
1403                    knn_index,
1404                )?,
1405            })),
1406            (Some(default), None) => Ok(Some(default.clone())),
1407            (None, Some(user)) => Ok(Some(user.clone())),
1408            (None, None) => Ok(None),
1409        }
1410    }
1411
1412    /// Merge SparseVectorValueType configurations
1413    fn merge_sparse_vector_type(
1414        default: Option<&SparseVectorValueType>,
1415        user: Option<&SparseVectorValueType>,
1416    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1417        match (default, user) {
1418            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1419                sparse_vector_index: Self::merge_sparse_vector_index_type(
1420                    default.sparse_vector_index.as_ref(),
1421                    user.sparse_vector_index.as_ref(),
1422                )?,
1423            })),
1424            (Some(default), None) => Ok(Some(default.clone())),
1425            (None, Some(user)) => Ok(Some(user.clone())),
1426            (None, None) => Ok(None),
1427        }
1428    }
1429
1430    /// Merge individual index type configurations
1431    fn merge_string_inverted_index_type(
1432        default: Option<&StringInvertedIndexType>,
1433        user: Option<&StringInvertedIndexType>,
1434    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1435        match (default, user) {
1436            (Some(_default), Some(user)) => {
1437                Ok(Some(StringInvertedIndexType {
1438                    enabled: user.enabled,       // User enabled state takes precedence
1439                    config: user.config.clone(), // User config takes precedence
1440                }))
1441            }
1442            (Some(default), None) => Ok(Some(default.clone())),
1443            (None, Some(user)) => Ok(Some(user.clone())),
1444            (None, None) => Ok(None),
1445        }
1446    }
1447
1448    fn merge_fts_index_type(
1449        default: Option<&FtsIndexType>,
1450        user: Option<&FtsIndexType>,
1451    ) -> Result<Option<FtsIndexType>, SchemaError> {
1452        match (default, user) {
1453            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1454                enabled: user.enabled,
1455                config: user.config.clone(),
1456            })),
1457            (Some(default), None) => Ok(Some(default.clone())),
1458            (None, Some(user)) => Ok(Some(user.clone())),
1459            (None, None) => Ok(None),
1460        }
1461    }
1462
1463    fn merge_float_inverted_index_type(
1464        default: Option<&FloatInvertedIndexType>,
1465        user: Option<&FloatInvertedIndexType>,
1466    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1467        match (default, user) {
1468            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1469                enabled: user.enabled,
1470                config: user.config.clone(),
1471            })),
1472            (Some(default), None) => Ok(Some(default.clone())),
1473            (None, Some(user)) => Ok(Some(user.clone())),
1474            (None, None) => Ok(None),
1475        }
1476    }
1477
1478    fn merge_int_inverted_index_type(
1479        default: Option<&IntInvertedIndexType>,
1480        user: Option<&IntInvertedIndexType>,
1481    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1482        match (default, user) {
1483            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1484                enabled: user.enabled,
1485                config: user.config.clone(),
1486            })),
1487            (Some(default), None) => Ok(Some(default.clone())),
1488            (None, Some(user)) => Ok(Some(user.clone())),
1489            (None, None) => Ok(None),
1490        }
1491    }
1492
1493    fn merge_bool_inverted_index_type(
1494        default: Option<&BoolInvertedIndexType>,
1495        user: Option<&BoolInvertedIndexType>,
1496    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1497        match (default, user) {
1498            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1499                enabled: user.enabled,
1500                config: user.config.clone(),
1501            })),
1502            (Some(default), None) => Ok(Some(default.clone())),
1503            (None, Some(user)) => Ok(Some(user.clone())),
1504            (None, None) => Ok(None),
1505        }
1506    }
1507
1508    fn merge_vector_index_type(
1509        default: Option<&VectorIndexType>,
1510        user: Option<&VectorIndexType>,
1511        knn_index: KnnIndex,
1512    ) -> Result<Option<VectorIndexType>, SchemaError> {
1513        match (default, user) {
1514            (Some(default), Some(user)) => Ok(Some(VectorIndexType {
1515                enabled: user.enabled,
1516                config: Self::merge_vector_index_config(&default.config, &user.config, knn_index)?,
1517            })),
1518            (Some(default), None) => Ok(Some(default.clone())),
1519            (None, Some(user)) => Ok(Some(user.clone())),
1520            (None, None) => Ok(None),
1521        }
1522    }
1523
1524    fn merge_sparse_vector_index_type(
1525        default: Option<&SparseVectorIndexType>,
1526        user: Option<&SparseVectorIndexType>,
1527    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1528        match (default, user) {
1529            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1530                enabled: user.enabled,
1531                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1532            })),
1533            (Some(default), None) => Ok(Some(default.clone())),
1534            (None, Some(user)) => Ok(Some(user.clone())),
1535            (None, None) => Ok(None),
1536        }
1537    }
1538
1539    /// Validate FloatListValueType vector index configurations
1540    /// This validates HNSW and SPANN configs within the merged float_list
1541    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1542        if let Some(vector_index) = &float_list.vector_index {
1543            if let Some(hnsw) = &vector_index.config.hnsw {
1544                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1545            }
1546            if let Some(spann) = &vector_index.config.spann {
1547                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1548            }
1549        }
1550        Ok(())
1551    }
1552
1553    /// Merge VectorIndexConfig with field-level merging
1554    fn merge_vector_index_config(
1555        default: &VectorIndexConfig,
1556        user: &VectorIndexConfig,
1557        knn_index: KnnIndex,
1558    ) -> Result<VectorIndexConfig, SchemaError> {
1559        match knn_index {
1560            KnnIndex::Hnsw => Ok(VectorIndexConfig {
1561                space: user.space.clone().or(default.space.clone()),
1562                embedding_function: user
1563                    .embedding_function
1564                    .clone()
1565                    .or(default.embedding_function.clone()),
1566                source_key: user.source_key.clone().or(default.source_key.clone()),
1567                hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1568                spann: None,
1569            }),
1570            KnnIndex::Spann => Ok(VectorIndexConfig {
1571                space: user.space.clone().or(default.space.clone()),
1572                embedding_function: user
1573                    .embedding_function
1574                    .clone()
1575                    .or(default.embedding_function.clone()),
1576                source_key: user.source_key.clone().or(default.source_key.clone()),
1577                hnsw: None,
1578                spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref())?,
1579            }),
1580        }
1581    }
1582
1583    /// Merge SparseVectorIndexConfig with field-level merging
1584    fn merge_sparse_vector_index_config(
1585        default: &SparseVectorIndexConfig,
1586        user: &SparseVectorIndexConfig,
1587    ) -> SparseVectorIndexConfig {
1588        SparseVectorIndexConfig {
1589            embedding_function: user
1590                .embedding_function
1591                .clone()
1592                .or(default.embedding_function.clone()),
1593            source_key: user.source_key.clone().or(default.source_key.clone()),
1594            bm25: user.bm25.or(default.bm25),
1595        }
1596    }
1597
1598    /// Merge HNSW configurations with field-level merging
1599    fn merge_hnsw_configs(
1600        default_hnsw: Option<&HnswIndexConfig>,
1601        user_hnsw: Option<&HnswIndexConfig>,
1602    ) -> Option<HnswIndexConfig> {
1603        match (default_hnsw, user_hnsw) {
1604            (Some(default), Some(user)) => Some(HnswIndexConfig {
1605                ef_construction: user.ef_construction.or(default.ef_construction),
1606                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1607                ef_search: user.ef_search.or(default.ef_search),
1608                num_threads: user.num_threads.or(default.num_threads),
1609                batch_size: user.batch_size.or(default.batch_size),
1610                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1611                resize_factor: user.resize_factor.or(default.resize_factor),
1612            }),
1613            (Some(default), None) => Some(default.clone()),
1614            (None, Some(user)) => Some(user.clone()),
1615            (None, None) => None,
1616        }
1617    }
1618
1619    /// Merge SPANN configurations with field-level merging
1620    fn merge_spann_configs(
1621        default_spann: Option<&SpannIndexConfig>,
1622        user_spann: Option<&SpannIndexConfig>,
1623    ) -> Result<Option<SpannIndexConfig>, SchemaError> {
1624        match (default_spann, user_spann) {
1625            (Some(default), Some(user)) => {
1626                // Validate that quantize is always false (should only be set programmatically by frontend)
1627                if user.quantize != default_quantize() || default.quantize != default_quantize() {
1628                    return Err(SchemaError::InvalidUserInput {
1629                        reason: "quantize field cannot be set to true in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1630                    });
1631                }
1632                Ok(Some(SpannIndexConfig {
1633                    search_nprobe: user.search_nprobe.or(default.search_nprobe),
1634                    search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1635                    search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1636                    nreplica_count: user.nreplica_count.or(default.nreplica_count),
1637                    write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1638                    write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1639                    split_threshold: user.split_threshold.or(default.split_threshold),
1640                    num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1641                    initial_lambda: user.initial_lambda.or(default.initial_lambda),
1642                    reassign_neighbor_count: user
1643                        .reassign_neighbor_count
1644                        .or(default.reassign_neighbor_count),
1645                    merge_threshold: user.merge_threshold.or(default.merge_threshold),
1646                    num_centers_to_merge_to: user
1647                        .num_centers_to_merge_to
1648                        .or(default.num_centers_to_merge_to),
1649                    write_nprobe: user.write_nprobe.or(default.write_nprobe),
1650                    ef_construction: user.ef_construction.or(default.ef_construction),
1651                    ef_search: user.ef_search.or(default.ef_search),
1652                    max_neighbors: user.max_neighbors.or(default.max_neighbors),
1653                    center_drift_threshold: user
1654                        .center_drift_threshold
1655                        .or(default.center_drift_threshold),
1656                    quantize: default_quantize(), // Always false - quantization is set programmatically
1657                }))
1658            }
1659            (Some(default), None) => {
1660                // Validate default is also false
1661                if default.quantize != default_quantize() {
1662                    return Err(SchemaError::InvalidUserInput {
1663                        reason: "quantize field cannot be set to true in default schema. Quantization can only be enabled via frontend configuration.".to_string(),
1664                    });
1665                }
1666                Ok(Some(default.clone()))
1667            }
1668            (None, Some(user)) => {
1669                // Validate user is false
1670                if user.quantize != default_quantize() {
1671                    return Err(SchemaError::InvalidUserInput {
1672                        reason: "quantize field cannot be set to true in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1673                    });
1674                }
1675                Ok(Some(user.clone()))
1676            }
1677            (None, None) => Ok(None),
1678        }
1679    }
1680
1681    /// Reconcile Schema with InternalCollectionConfiguration
1682    ///
1683    /// Simple reconciliation logic:
1684    /// 1. If collection config is default → return schema (schema is source of truth)
1685    /// 2. If collection config is non-default and schema is default → override schema with collection config
1686    ///
1687    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1688    pub fn reconcile_with_collection_config(
1689        schema: &Schema,
1690        collection_config: &InternalCollectionConfiguration,
1691        default_knn_index: KnnIndex,
1692    ) -> Result<Schema, SchemaError> {
1693        // 1. Check if collection config is default
1694        if collection_config.is_default() {
1695            if schema.is_default() {
1696                // if both are default, use the schema, and apply the ef from config if available
1697                // for both defaults and #embedding key
1698                let mut new_schema = Schema::new_default(default_knn_index);
1699
1700                if collection_config.embedding_function.is_some() {
1701                    if let Some(float_list) = &mut new_schema.defaults.float_list {
1702                        if let Some(vector_index) = &mut float_list.vector_index {
1703                            vector_index.config.embedding_function =
1704                                collection_config.embedding_function.clone();
1705                        }
1706                    }
1707                    if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1708                        if let Some(float_list) = &mut embedding_types.float_list {
1709                            if let Some(vector_index) = &mut float_list.vector_index {
1710                                vector_index.config.embedding_function =
1711                                    collection_config.embedding_function.clone();
1712                            }
1713                        }
1714                    }
1715                }
1716                return Ok(new_schema);
1717            } else {
1718                // Collection config is default and schema is non-default → schema is source of truth
1719                return Ok(schema.clone());
1720            }
1721        }
1722
1723        // 2. Collection config is non-default, schema must be default (already validated earlier)
1724        // Convert collection config to schema
1725        Self::try_from(collection_config)
1726    }
1727
1728    pub fn reconcile_schema_and_config(
1729        schema: Option<&Schema>,
1730        configuration: Option<&InternalCollectionConfiguration>,
1731        knn_index: KnnIndex,
1732    ) -> Result<Schema, SchemaError> {
1733        // Early validation: check if both user-provided schema and config are non-default
1734        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1735            if !user_schema.is_default() && !config.is_default() {
1736                return Err(SchemaError::ConfigAndSchemaConflict);
1737            }
1738        }
1739
1740        let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1741        if let Some(config) = configuration {
1742            Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1743        } else {
1744            Ok(reconciled_schema)
1745        }
1746    }
1747
1748    pub fn default_with_embedding_function(
1749        embedding_function: EmbeddingFunctionConfiguration,
1750    ) -> Schema {
1751        let mut schema = Schema::new_default(KnnIndex::Spann);
1752        if let Some(float_list) = &mut schema.defaults.float_list {
1753            if let Some(vector_index) = &mut float_list.vector_index {
1754                vector_index.config.embedding_function = Some(embedding_function.clone());
1755            }
1756        }
1757        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1758            if let Some(float_list) = &mut embedding_types.float_list {
1759                if let Some(vector_index) = &mut float_list.vector_index {
1760                    vector_index.config.embedding_function = Some(embedding_function);
1761                }
1762            }
1763        }
1764        schema
1765    }
1766
1767    /// Check if schema is default by checking each field individually
1768    pub fn is_default(&self) -> bool {
1769        // Check if defaults are default (field by field)
1770        if !Self::is_value_types_default(&self.defaults) {
1771            return false;
1772        }
1773
1774        for key in self.keys.keys() {
1775            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1776                return false;
1777            }
1778        }
1779
1780        // Check #embedding key
1781        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1782            if !Self::is_embedding_value_types_default(embedding_value) {
1783                return false;
1784            }
1785        }
1786
1787        // Check #document key
1788        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1789            if !Self::is_document_value_types_default(document_value) {
1790                return false;
1791            }
1792        }
1793
1794        // Check CMEK is None (default)
1795        if self.cmek.is_some() {
1796            return false;
1797        }
1798
1799        true
1800    }
1801
1802    /// Check if ValueTypes (defaults) are in default state
1803    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1804        // Check string field
1805        if let Some(string) = &value_types.string {
1806            if let Some(string_inverted) = &string.string_inverted_index {
1807                if !string_inverted.enabled {
1808                    return false;
1809                }
1810                // Config is an empty struct, so no need to check it
1811            }
1812            if let Some(fts) = &string.fts_index {
1813                if fts.enabled {
1814                    return false;
1815                }
1816                // Config is an empty struct, so no need to check it
1817            }
1818        }
1819
1820        // Check float field
1821        if let Some(float) = &value_types.float {
1822            if let Some(float_inverted) = &float.float_inverted_index {
1823                if !float_inverted.enabled {
1824                    return false;
1825                }
1826                // Config is an empty struct, so no need to check it
1827            }
1828        }
1829
1830        // Check int field
1831        if let Some(int) = &value_types.int {
1832            if let Some(int_inverted) = &int.int_inverted_index {
1833                if !int_inverted.enabled {
1834                    return false;
1835                }
1836                // Config is an empty struct, so no need to check it
1837            }
1838        }
1839
1840        // Check boolean field
1841        if let Some(boolean) = &value_types.boolean {
1842            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1843                if !bool_inverted.enabled {
1844                    return false;
1845                }
1846                // Config is an empty struct, so no need to check it
1847            }
1848        }
1849
1850        // Check float_list field (vector index should be disabled)
1851        if let Some(float_list) = &value_types.float_list {
1852            if let Some(vector_index) = &float_list.vector_index {
1853                if vector_index.enabled {
1854                    return false;
1855                }
1856                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1857                    return false;
1858                }
1859                if !is_space_default(&vector_index.config.space) {
1860                    return false;
1861                }
1862                // Check that the config has default structure
1863                if vector_index.config.source_key.is_some() {
1864                    return false;
1865                }
1866                // Check that either hnsw or spann config is present (not both, not neither)
1867                // and that the config values are default
1868                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1869                    (Some(hnsw_config), None) => {
1870                        if !hnsw_config.is_default() {
1871                            return false;
1872                        }
1873                    }
1874                    (None, Some(spann_config)) => {
1875                        if !spann_config.is_default() {
1876                            return false;
1877                        }
1878                    }
1879                    (Some(_), Some(_)) => return false, // Both present
1880                    (None, None) => {}
1881                }
1882            }
1883        }
1884
1885        // Check sparse_vector field (should be disabled)
1886        if let Some(sparse_vector) = &value_types.sparse_vector {
1887            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1888                if sparse_index.enabled {
1889                    return false;
1890                }
1891                // Check config structure
1892                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1893                    return false;
1894                }
1895                if sparse_index.config.source_key.is_some() {
1896                    return false;
1897                }
1898                if let Some(bm25) = &sparse_index.config.bm25 {
1899                    if bm25 != &false {
1900                        return false;
1901                    }
1902                }
1903            }
1904        }
1905
1906        true
1907    }
1908
1909    /// Check if ValueTypes for #embedding key are in default state
1910    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1911        // For #embedding, only float_list should be set
1912        if value_types.string.is_some()
1913            || value_types.float.is_some()
1914            || value_types.int.is_some()
1915            || value_types.boolean.is_some()
1916            || value_types.sparse_vector.is_some()
1917        {
1918            return false;
1919        }
1920
1921        // Check float_list field (vector index should be enabled)
1922        if let Some(float_list) = &value_types.float_list {
1923            if let Some(vector_index) = &float_list.vector_index {
1924                if !vector_index.enabled {
1925                    return false;
1926                }
1927                if !is_space_default(&vector_index.config.space) {
1928                    return false;
1929                }
1930                // Check that embedding_function is default
1931                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1932                    return false;
1933                }
1934                // Check that source_key is #document
1935                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1936                    return false;
1937                }
1938                // Check that either hnsw or spann config is present (not both, not neither)
1939                // and that the config values are default
1940                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1941                    (Some(hnsw_config), None) => {
1942                        if !hnsw_config.is_default() {
1943                            return false;
1944                        }
1945                    }
1946                    (None, Some(spann_config)) => {
1947                        if !spann_config.is_default() {
1948                            return false;
1949                        }
1950                    }
1951                    (Some(_), Some(_)) => return false, // Both present
1952                    (None, None) => {}
1953                }
1954            }
1955        }
1956
1957        true
1958    }
1959
1960    /// Check if ValueTypes for #document key are in default state
1961    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1962        // For #document, only string should be set
1963        if value_types.float_list.is_some()
1964            || value_types.float.is_some()
1965            || value_types.int.is_some()
1966            || value_types.boolean.is_some()
1967            || value_types.sparse_vector.is_some()
1968        {
1969            return false;
1970        }
1971
1972        // Check string field
1973        if let Some(string) = &value_types.string {
1974            if let Some(fts) = &string.fts_index {
1975                if !fts.enabled {
1976                    return false;
1977                }
1978                // Config is an empty struct, so no need to check it
1979            }
1980            if let Some(string_inverted) = &string.string_inverted_index {
1981                if string_inverted.enabled {
1982                    return false;
1983                }
1984                // Config is an empty struct, so no need to check it
1985            }
1986        }
1987
1988        true
1989    }
1990
1991    /// Check if a specific metadata key-value should be indexed based on schema configuration
1992    pub fn is_metadata_type_index_enabled(
1993        &self,
1994        key: &str,
1995        value_type: MetadataValueType,
1996    ) -> Result<bool, SchemaError> {
1997        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1998
1999        match value_type {
2000            MetadataValueType::Bool => match &v_type.boolean {
2001                Some(bool_type) => match &bool_type.bool_inverted_index {
2002                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2003                    None => Err(SchemaError::MissingIndexConfiguration {
2004                        key: key.to_string(),
2005                        value_type: "bool".to_string(),
2006                    }),
2007                },
2008                None => match &self.defaults.boolean {
2009                    Some(bool_type) => match &bool_type.bool_inverted_index {
2010                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2011                        None => Err(SchemaError::MissingIndexConfiguration {
2012                            key: key.to_string(),
2013                            value_type: "bool".to_string(),
2014                        }),
2015                    },
2016                    None => Err(SchemaError::MissingIndexConfiguration {
2017                        key: key.to_string(),
2018                        value_type: "bool".to_string(),
2019                    }),
2020                },
2021            },
2022            MetadataValueType::Int => match &v_type.int {
2023                Some(int_type) => match &int_type.int_inverted_index {
2024                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2025                    None => Err(SchemaError::MissingIndexConfiguration {
2026                        key: key.to_string(),
2027                        value_type: "int".to_string(),
2028                    }),
2029                },
2030                None => match &self.defaults.int {
2031                    Some(int_type) => match &int_type.int_inverted_index {
2032                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2033                        None => Err(SchemaError::MissingIndexConfiguration {
2034                            key: key.to_string(),
2035                            value_type: "int".to_string(),
2036                        }),
2037                    },
2038                    None => Err(SchemaError::MissingIndexConfiguration {
2039                        key: key.to_string(),
2040                        value_type: "int".to_string(),
2041                    }),
2042                },
2043            },
2044            MetadataValueType::Float => match &v_type.float {
2045                Some(float_type) => match &float_type.float_inverted_index {
2046                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2047                    None => Err(SchemaError::MissingIndexConfiguration {
2048                        key: key.to_string(),
2049                        value_type: "float".to_string(),
2050                    }),
2051                },
2052                None => match &self.defaults.float {
2053                    Some(float_type) => match &float_type.float_inverted_index {
2054                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2055                        None => Err(SchemaError::MissingIndexConfiguration {
2056                            key: key.to_string(),
2057                            value_type: "float".to_string(),
2058                        }),
2059                    },
2060                    None => Err(SchemaError::MissingIndexConfiguration {
2061                        key: key.to_string(),
2062                        value_type: "float".to_string(),
2063                    }),
2064                },
2065            },
2066            MetadataValueType::Str => match &v_type.string {
2067                Some(string_type) => match &string_type.string_inverted_index {
2068                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2069                    None => Err(SchemaError::MissingIndexConfiguration {
2070                        key: key.to_string(),
2071                        value_type: "string".to_string(),
2072                    }),
2073                },
2074                None => match &self.defaults.string {
2075                    Some(string_type) => match &string_type.string_inverted_index {
2076                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2077                        None => Err(SchemaError::MissingIndexConfiguration {
2078                            key: key.to_string(),
2079                            value_type: "string".to_string(),
2080                        }),
2081                    },
2082                    None => Err(SchemaError::MissingIndexConfiguration {
2083                        key: key.to_string(),
2084                        value_type: "string".to_string(),
2085                    }),
2086                },
2087            },
2088            MetadataValueType::SparseVector => match &v_type.sparse_vector {
2089                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2090                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2091                    None => Err(SchemaError::MissingIndexConfiguration {
2092                        key: key.to_string(),
2093                        value_type: "sparse_vector".to_string(),
2094                    }),
2095                },
2096                None => match &self.defaults.sparse_vector {
2097                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2098                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2099                        None => Err(SchemaError::MissingIndexConfiguration {
2100                            key: key.to_string(),
2101                            value_type: "sparse_vector".to_string(),
2102                        }),
2103                    },
2104                    None => Err(SchemaError::MissingIndexConfiguration {
2105                        key: key.to_string(),
2106                        value_type: "sparse_vector".to_string(),
2107                    }),
2108                },
2109            },
2110            // Array types use the same indexes as their scalar counterparts
2111            MetadataValueType::BoolArray => {
2112                self.is_metadata_type_index_enabled(key, MetadataValueType::Bool)
2113            }
2114            MetadataValueType::IntArray => {
2115                self.is_metadata_type_index_enabled(key, MetadataValueType::Int)
2116            }
2117            MetadataValueType::FloatArray => {
2118                self.is_metadata_type_index_enabled(key, MetadataValueType::Float)
2119            }
2120            MetadataValueType::StringArray => {
2121                self.is_metadata_type_index_enabled(key, MetadataValueType::Str)
2122            }
2123        }
2124    }
2125
2126    pub fn is_metadata_where_indexing_enabled(
2127        &self,
2128        where_clause: &Where,
2129    ) -> Result<(), FilterValidationError> {
2130        match where_clause {
2131            Where::Composite(composite) => {
2132                for child in &composite.children {
2133                    self.is_metadata_where_indexing_enabled(child)?;
2134                }
2135                Ok(())
2136            }
2137            Where::Document(_) => Ok(()),
2138            Where::Metadata(expression) => {
2139                let value_type = match &expression.comparison {
2140                    MetadataComparison::Primitive(_, value) => value.value_type(),
2141                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
2142                    MetadataComparison::ArrayContains(_, value) => value.value_type(),
2143                };
2144                let is_enabled = self
2145                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
2146                    .map_err(FilterValidationError::Schema)?;
2147                if !is_enabled {
2148                    return Err(FilterValidationError::IndexingDisabled {
2149                        key: expression.key.clone(),
2150                        value_type,
2151                    });
2152                }
2153                Ok(())
2154            }
2155        }
2156    }
2157
2158    pub fn is_knn_key_indexing_enabled(
2159        &self,
2160        key: &str,
2161        query: &QueryVector,
2162    ) -> Result<(), FilterValidationError> {
2163        match query {
2164            QueryVector::Sparse(_) => {
2165                let is_enabled = self
2166                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
2167                    .map_err(FilterValidationError::Schema)?;
2168                if !is_enabled {
2169                    return Err(FilterValidationError::IndexingDisabled {
2170                        key: key.to_string(),
2171                        value_type: MetadataValueType::SparseVector,
2172                    });
2173                }
2174                Ok(())
2175            }
2176            QueryVector::Dense(_) => {
2177                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
2178                // Dense vectors are always indexed
2179                Ok(())
2180            }
2181        }
2182    }
2183
2184    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2185        if key.starts_with(CHROMA_KEY) {
2186            return false;
2187        }
2188        let value_types = self.keys.entry(key.to_string()).or_default();
2189        match value_type {
2190            MetadataValueType::Bool => {
2191                if value_types.boolean.is_none() {
2192                    value_types.boolean = self.defaults.boolean.clone();
2193                    return true;
2194                }
2195            }
2196            MetadataValueType::Int => {
2197                if value_types.int.is_none() {
2198                    value_types.int = self.defaults.int.clone();
2199                    return true;
2200                }
2201            }
2202            MetadataValueType::Float => {
2203                if value_types.float.is_none() {
2204                    value_types.float = self.defaults.float.clone();
2205                    return true;
2206                }
2207            }
2208            MetadataValueType::Str => {
2209                if value_types.string.is_none() {
2210                    value_types.string = self.defaults.string.clone();
2211                    return true;
2212                }
2213            }
2214            MetadataValueType::SparseVector => {
2215                if value_types.sparse_vector.is_none() {
2216                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
2217                    return true;
2218                }
2219            }
2220            // Array types use the same indexes as their scalar counterparts
2221            MetadataValueType::BoolArray => {
2222                if value_types.boolean.is_none() {
2223                    value_types.boolean = self.defaults.boolean.clone();
2224                    return true;
2225                }
2226            }
2227            MetadataValueType::IntArray => {
2228                if value_types.int.is_none() {
2229                    value_types.int = self.defaults.int.clone();
2230                    return true;
2231                }
2232            }
2233            MetadataValueType::FloatArray => {
2234                if value_types.float.is_none() {
2235                    value_types.float = self.defaults.float.clone();
2236                    return true;
2237                }
2238            }
2239            MetadataValueType::StringArray => {
2240                if value_types.string.is_none() {
2241                    value_types.string = self.defaults.string.clone();
2242                    return true;
2243                }
2244            }
2245        }
2246        false
2247    }
2248
2249    // ========================================================================
2250    // BUILDER PATTERN METHODS
2251    // ========================================================================
2252
2253    /// Create an index configuration (builder pattern)
2254    ///
2255    /// This method allows fluent, chainable configuration of indexes on a schema.
2256    /// It matches the Python API's `.create_index()` method.
2257    ///
2258    /// # Arguments
2259    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
2260    /// * `config` - Index configuration to create
2261    ///
2262    /// # Returns
2263    /// `Self` for method chaining
2264    ///
2265    /// # Errors
2266    /// Returns error if:
2267    /// - Attempting to create index on special keys (`#document`, `#embedding`)
2268    /// - Invalid configuration (e.g., vector index on non-embedding key)
2269    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
2270    ///
2271    /// # Examples
2272    /// ```
2273    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space, SchemaBuilderError};
2274    ///
2275    /// # fn main() -> Result<(), SchemaBuilderError> {
2276    /// let schema = Schema::default()
2277    ///     .create_index(None, VectorIndexConfig {
2278    ///         space: Some(Space::Cosine),
2279    ///         embedding_function: None,
2280    ///         source_key: None,
2281    ///         hnsw: None,
2282    ///         spann: None,
2283    ///     }.into())?
2284    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2285    /// # Ok(())
2286    /// # }
2287    /// ```
2288    pub fn create_index(
2289        mut self,
2290        key: Option<&str>,
2291        config: IndexConfig,
2292    ) -> Result<Self, SchemaBuilderError> {
2293        // Handle special cases: Vector and FTS (global configs only)
2294        match (&key, &config) {
2295            (None, IndexConfig::Vector(cfg)) => {
2296                self._set_vector_index_config_builder(cfg.clone());
2297                return Ok(self);
2298            }
2299            (None, IndexConfig::Fts(cfg)) => {
2300                self._set_fts_index_config_builder(cfg.clone());
2301                return Ok(self);
2302            }
2303            (Some(k), IndexConfig::Vector(_)) => {
2304                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2305            }
2306            (Some(k), IndexConfig::Fts(_)) => {
2307                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2308            }
2309            _ => {}
2310        }
2311
2312        // Validate special keys
2313        if let Some(k) = key {
2314            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2315                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2316                    key: k.to_string(),
2317                });
2318            }
2319        }
2320
2321        // Validate sparse vector requires key
2322        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2323            return Err(SchemaBuilderError::SparseVectorRequiresKey);
2324        }
2325
2326        // Dispatch to appropriate helper
2327        match key {
2328            Some(k) => self._set_index_for_key_builder(k, config, true)?,
2329            None => self._set_index_in_defaults_builder(config, true)?,
2330        }
2331
2332        Ok(self)
2333    }
2334
2335    /// Delete/disable an index configuration (builder pattern)
2336    ///
2337    /// This method allows disabling indexes on a schema.
2338    /// It matches the Python API's `.delete_index()` method.
2339    ///
2340    /// # Arguments
2341    /// * `key` - Optional key name for per-key index. `None` applies to defaults
2342    /// * `config` - Index configuration to disable
2343    ///
2344    /// # Returns
2345    /// `Self` for method chaining
2346    ///
2347    /// # Errors
2348    /// Returns error if:
2349    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
2350    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
2351    ///
2352    /// # Examples
2353    /// ```
2354    /// use chroma_types::{Schema, StringInvertedIndexConfig, SchemaBuilderError};
2355    ///
2356    /// # fn main() -> Result<(), SchemaBuilderError> {
2357    /// let schema = Schema::default()
2358    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2359    /// # Ok(())
2360    /// # }
2361    /// ```
2362    pub fn delete_index(
2363        mut self,
2364        key: Option<&str>,
2365        config: IndexConfig,
2366    ) -> Result<Self, SchemaBuilderError> {
2367        // Validate special keys
2368        if let Some(k) = key {
2369            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2370                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2371                    key: k.to_string(),
2372                });
2373            }
2374        }
2375
2376        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
2377        match &config {
2378            IndexConfig::Vector(_) => {
2379                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2380            }
2381            IndexConfig::Fts(_) => {
2382                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2383            }
2384            IndexConfig::SparseVector(_) => {
2385                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2386            }
2387            _ => {}
2388        }
2389
2390        // Dispatch to appropriate helper (enabled=false)
2391        match key {
2392            Some(k) => self._set_index_for_key_builder(k, config, false)?,
2393            None => self._set_index_in_defaults_builder(config, false)?,
2394        }
2395
2396        Ok(self)
2397    }
2398
2399    /// Set customer-managed encryption key for the collection (builder pattern)
2400    ///
2401    /// This method allows setting CMEK on a schema for fluent, chainable configuration.
2402    ///
2403    /// # Arguments
2404    /// * `cmek` - Customer-managed encryption key configuration
2405    ///
2406    /// # Returns
2407    /// `Self` for method chaining
2408    ///
2409    /// # Examples
2410    /// ```
2411    /// use chroma_types::{Schema, Cmek};
2412    ///
2413    /// let schema = Schema::default()
2414    ///     .with_cmek(Cmek::gcp("projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key".to_string()));
2415    /// ```
2416    pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2417        self.cmek = Some(cmek);
2418        self
2419    }
2420
2421    /// Set vector index config globally (applies to #embedding)
2422    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2423        // Update defaults (disabled, just config update)
2424        if let Some(float_list) = &mut self.defaults.float_list {
2425            if let Some(vector_index) = &mut float_list.vector_index {
2426                vector_index.config = config.clone();
2427            }
2428        }
2429
2430        // Update #embedding key (enabled, config update, preserve source_key=#document)
2431        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2432            if let Some(float_list) = &mut embedding_types.float_list {
2433                if let Some(vector_index) = &mut float_list.vector_index {
2434                    let mut updated_config = config;
2435                    // Preserve source_key as #document
2436                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2437                    vector_index.config = updated_config;
2438                }
2439            }
2440        }
2441    }
2442
2443    /// Set FTS index config globally (applies to #document)
2444    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2445        // Update defaults (disabled, just config update)
2446        if let Some(string) = &mut self.defaults.string {
2447            if let Some(fts_index) = &mut string.fts_index {
2448                fts_index.config = config.clone();
2449            }
2450        }
2451
2452        // Update #document key (enabled, config update)
2453        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2454            if let Some(string) = &mut document_types.string {
2455                if let Some(fts_index) = &mut string.fts_index {
2456                    fts_index.config = config;
2457                }
2458            }
2459        }
2460    }
2461
2462    /// Set index configuration for a specific key
2463    fn _set_index_for_key_builder(
2464        &mut self,
2465        key: &str,
2466        config: IndexConfig,
2467        enabled: bool,
2468    ) -> Result<(), SchemaBuilderError> {
2469        // Check for multiple sparse vector indexes BEFORE getting mutable reference
2470        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2471            // Find existing sparse vector index
2472            let existing_key = self
2473                .keys
2474                .iter()
2475                .find(|(k, v)| {
2476                    k.as_str() != key
2477                        && v.sparse_vector
2478                            .as_ref()
2479                            .and_then(|sv| sv.sparse_vector_index.as_ref())
2480                            .map(|idx| idx.enabled)
2481                            .unwrap_or(false)
2482                })
2483                .map(|(k, _)| k.clone());
2484
2485            if let Some(existing_key) = existing_key {
2486                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2487            }
2488        }
2489
2490        // Get or create ValueTypes for this key
2491        let value_types = self.keys.entry(key.to_string()).or_default();
2492
2493        // Set the appropriate index based on config type
2494        match config {
2495            IndexConfig::Vector(_) => {
2496                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2497                    key: key.to_string(),
2498                });
2499            }
2500            IndexConfig::Fts(_) => {
2501                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2502                    key: key.to_string(),
2503                });
2504            }
2505            IndexConfig::SparseVector(cfg) => {
2506                value_types.sparse_vector = Some(SparseVectorValueType {
2507                    sparse_vector_index: Some(SparseVectorIndexType {
2508                        enabled,
2509                        config: cfg,
2510                    }),
2511                });
2512            }
2513            IndexConfig::StringInverted(cfg) => {
2514                if value_types.string.is_none() {
2515                    value_types.string = Some(StringValueType {
2516                        fts_index: None,
2517                        string_inverted_index: None,
2518                    });
2519                }
2520                if let Some(string) = &mut value_types.string {
2521                    string.string_inverted_index = Some(StringInvertedIndexType {
2522                        enabled,
2523                        config: cfg,
2524                    });
2525                }
2526            }
2527            IndexConfig::IntInverted(cfg) => {
2528                value_types.int = Some(IntValueType {
2529                    int_inverted_index: Some(IntInvertedIndexType {
2530                        enabled,
2531                        config: cfg,
2532                    }),
2533                });
2534            }
2535            IndexConfig::FloatInverted(cfg) => {
2536                value_types.float = Some(FloatValueType {
2537                    float_inverted_index: Some(FloatInvertedIndexType {
2538                        enabled,
2539                        config: cfg,
2540                    }),
2541                });
2542            }
2543            IndexConfig::BoolInverted(cfg) => {
2544                value_types.boolean = Some(BoolValueType {
2545                    bool_inverted_index: Some(BoolInvertedIndexType {
2546                        enabled,
2547                        config: cfg,
2548                    }),
2549                });
2550            }
2551        }
2552
2553        Ok(())
2554    }
2555
2556    /// Set index configuration in defaults
2557    fn _set_index_in_defaults_builder(
2558        &mut self,
2559        config: IndexConfig,
2560        enabled: bool,
2561    ) -> Result<(), SchemaBuilderError> {
2562        match config {
2563            IndexConfig::Vector(_) => {
2564                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2565                    key: "defaults".to_string(),
2566                });
2567            }
2568            IndexConfig::Fts(_) => {
2569                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2570                    key: "defaults".to_string(),
2571                });
2572            }
2573            IndexConfig::SparseVector(cfg) => {
2574                self.defaults.sparse_vector = Some(SparseVectorValueType {
2575                    sparse_vector_index: Some(SparseVectorIndexType {
2576                        enabled,
2577                        config: cfg,
2578                    }),
2579                });
2580            }
2581            IndexConfig::StringInverted(cfg) => {
2582                if self.defaults.string.is_none() {
2583                    self.defaults.string = Some(StringValueType {
2584                        fts_index: None,
2585                        string_inverted_index: None,
2586                    });
2587                }
2588                if let Some(string) = &mut self.defaults.string {
2589                    string.string_inverted_index = Some(StringInvertedIndexType {
2590                        enabled,
2591                        config: cfg,
2592                    });
2593                }
2594            }
2595            IndexConfig::IntInverted(cfg) => {
2596                self.defaults.int = Some(IntValueType {
2597                    int_inverted_index: Some(IntInvertedIndexType {
2598                        enabled,
2599                        config: cfg,
2600                    }),
2601                });
2602            }
2603            IndexConfig::FloatInverted(cfg) => {
2604                self.defaults.float = Some(FloatValueType {
2605                    float_inverted_index: Some(FloatInvertedIndexType {
2606                        enabled,
2607                        config: cfg,
2608                    }),
2609                });
2610            }
2611            IndexConfig::BoolInverted(cfg) => {
2612                self.defaults.boolean = Some(BoolValueType {
2613                    bool_inverted_index: Some(BoolInvertedIndexType {
2614                        enabled,
2615                        config: cfg,
2616                    }),
2617                });
2618            }
2619        }
2620
2621        Ok(())
2622    }
2623}
2624
2625// ============================================================================
2626// INDEX CONFIGURATION STRUCTURES
2627// ============================================================================
2628
2629#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2630#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2631#[serde(deny_unknown_fields)]
2632pub struct VectorIndexConfig {
2633    /// Vector space for similarity calculation (cosine, l2, ip)
2634    #[serde(skip_serializing_if = "Option::is_none")]
2635    pub space: Option<Space>,
2636    /// Embedding function configuration
2637    #[serde(skip_serializing_if = "Option::is_none")]
2638    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2639    /// Key to source the vector from
2640    #[serde(skip_serializing_if = "Option::is_none")]
2641    pub source_key: Option<String>,
2642    /// HNSW algorithm configuration
2643    #[serde(skip_serializing_if = "Option::is_none")]
2644    pub hnsw: Option<HnswIndexConfig>,
2645    /// SPANN algorithm configuration
2646    #[serde(skip_serializing_if = "Option::is_none")]
2647    pub spann: Option<SpannIndexConfig>,
2648}
2649
2650/// Configuration for HNSW vector index algorithm parameters
2651#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2652#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2653#[serde(deny_unknown_fields)]
2654pub struct HnswIndexConfig {
2655    #[serde(skip_serializing_if = "Option::is_none")]
2656    pub ef_construction: Option<usize>,
2657    #[serde(skip_serializing_if = "Option::is_none")]
2658    pub max_neighbors: Option<usize>,
2659    #[serde(skip_serializing_if = "Option::is_none")]
2660    pub ef_search: Option<usize>,
2661    #[serde(skip_serializing_if = "Option::is_none")]
2662    pub num_threads: Option<usize>,
2663    #[serde(skip_serializing_if = "Option::is_none")]
2664    #[validate(range(min = 2))]
2665    pub batch_size: Option<usize>,
2666    #[serde(skip_serializing_if = "Option::is_none")]
2667    #[validate(range(min = 2))]
2668    pub sync_threshold: Option<usize>,
2669    #[serde(skip_serializing_if = "Option::is_none")]
2670    pub resize_factor: Option<f64>,
2671}
2672
2673impl HnswIndexConfig {
2674    /// Check if this config has default values
2675    /// None values are considered default (not set by user)
2676    /// Note: We skip num_threads as it's variable based on available_parallelism
2677    pub fn is_default(&self) -> bool {
2678        if let Some(ef_construction) = self.ef_construction {
2679            if ef_construction != default_construction_ef() {
2680                return false;
2681            }
2682        }
2683        if let Some(max_neighbors) = self.max_neighbors {
2684            if max_neighbors != default_m() {
2685                return false;
2686            }
2687        }
2688        if let Some(ef_search) = self.ef_search {
2689            if ef_search != default_search_ef() {
2690                return false;
2691            }
2692        }
2693        if let Some(batch_size) = self.batch_size {
2694            if batch_size != default_batch_size() {
2695                return false;
2696            }
2697        }
2698        if let Some(sync_threshold) = self.sync_threshold {
2699            if sync_threshold != default_sync_threshold() {
2700                return false;
2701            }
2702        }
2703        if let Some(resize_factor) = self.resize_factor {
2704            if resize_factor != default_resize_factor() {
2705                return false;
2706            }
2707        }
2708        // Skip num_threads check as it's system-dependent
2709        true
2710    }
2711}
2712
2713/// Configuration for SPANN vector index algorithm parameters
2714#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2715#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2716#[serde(deny_unknown_fields)]
2717pub struct SpannIndexConfig {
2718    #[serde(skip_serializing_if = "Option::is_none")]
2719    #[validate(range(max = 128))]
2720    pub search_nprobe: Option<u32>,
2721    #[serde(skip_serializing_if = "Option::is_none")]
2722    #[validate(range(min = 1.0, max = 1.0))]
2723    pub search_rng_factor: Option<f32>,
2724    #[serde(skip_serializing_if = "Option::is_none")]
2725    #[validate(range(min = 5.0, max = 10.0))]
2726    pub search_rng_epsilon: Option<f32>,
2727    #[serde(skip_serializing_if = "Option::is_none")]
2728    #[validate(range(max = 8))]
2729    pub nreplica_count: Option<u32>,
2730    #[serde(skip_serializing_if = "Option::is_none")]
2731    #[validate(range(min = 1.0, max = 1.0))]
2732    pub write_rng_factor: Option<f32>,
2733    #[serde(skip_serializing_if = "Option::is_none")]
2734    #[validate(range(min = 5.0, max = 10.0))]
2735    pub write_rng_epsilon: Option<f32>,
2736    #[serde(skip_serializing_if = "Option::is_none")]
2737    #[validate(range(min = 50, max = 200))]
2738    pub split_threshold: Option<u32>,
2739    #[serde(skip_serializing_if = "Option::is_none")]
2740    #[validate(range(max = 1000))]
2741    pub num_samples_kmeans: Option<usize>,
2742    #[serde(skip_serializing_if = "Option::is_none")]
2743    #[validate(range(min = 100.0, max = 100.0))]
2744    pub initial_lambda: Option<f32>,
2745    #[serde(skip_serializing_if = "Option::is_none")]
2746    #[validate(range(max = 64))]
2747    pub reassign_neighbor_count: Option<u32>,
2748    #[serde(skip_serializing_if = "Option::is_none")]
2749    #[validate(range(min = 25, max = 100))]
2750    pub merge_threshold: Option<u32>,
2751    #[serde(skip_serializing_if = "Option::is_none")]
2752    #[validate(range(max = 8))]
2753    pub num_centers_to_merge_to: Option<u32>,
2754    #[serde(skip_serializing_if = "Option::is_none")]
2755    #[validate(range(max = 64))]
2756    pub write_nprobe: Option<u32>,
2757    #[serde(skip_serializing_if = "Option::is_none")]
2758    #[validate(range(max = 200))]
2759    pub ef_construction: Option<usize>,
2760    #[serde(skip_serializing_if = "Option::is_none")]
2761    #[validate(range(max = 200))]
2762    pub ef_search: Option<usize>,
2763    #[serde(skip_serializing_if = "Option::is_none")]
2764    #[validate(range(max = 64))]
2765    pub max_neighbors: Option<usize>,
2766    #[serde(skip_serializing_if = "Option::is_none")]
2767    #[validate(range(min = 0.1, max = 1.0))]
2768    pub center_drift_threshold: Option<f32>,
2769    /// Enable quantization for vector search (cloud-only feature)
2770    #[serde(default = "default_quantize", skip_serializing_if = "is_false")]
2771    pub quantize: bool,
2772}
2773
2774fn is_false(v: &bool) -> bool {
2775    !*v
2776}
2777
2778impl SpannIndexConfig {
2779    /// Check if this config has default values
2780    /// None values are considered default (not set by user)
2781    pub fn is_default(&self) -> bool {
2782        if let Some(search_nprobe) = self.search_nprobe {
2783            if search_nprobe != default_search_nprobe() {
2784                return false;
2785            }
2786        }
2787        if let Some(search_rng_factor) = self.search_rng_factor {
2788            if search_rng_factor != default_search_rng_factor() {
2789                return false;
2790            }
2791        }
2792        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2793            if search_rng_epsilon != default_search_rng_epsilon() {
2794                return false;
2795            }
2796        }
2797        if let Some(nreplica_count) = self.nreplica_count {
2798            if nreplica_count != default_nreplica_count() {
2799                return false;
2800            }
2801        }
2802        if let Some(write_rng_factor) = self.write_rng_factor {
2803            if write_rng_factor != default_write_rng_factor() {
2804                return false;
2805            }
2806        }
2807        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2808            if write_rng_epsilon != default_write_rng_epsilon() {
2809                return false;
2810            }
2811        }
2812        if let Some(split_threshold) = self.split_threshold {
2813            if split_threshold != default_split_threshold() {
2814                return false;
2815            }
2816        }
2817        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2818            if num_samples_kmeans != default_num_samples_kmeans() {
2819                return false;
2820            }
2821        }
2822        if let Some(initial_lambda) = self.initial_lambda {
2823            if initial_lambda != default_initial_lambda() {
2824                return false;
2825            }
2826        }
2827        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2828            if reassign_neighbor_count != default_reassign_neighbor_count() {
2829                return false;
2830            }
2831        }
2832        if let Some(merge_threshold) = self.merge_threshold {
2833            if merge_threshold != default_merge_threshold() {
2834                return false;
2835            }
2836        }
2837        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2838            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2839                return false;
2840            }
2841        }
2842        if let Some(write_nprobe) = self.write_nprobe {
2843            if write_nprobe != default_write_nprobe() {
2844                return false;
2845            }
2846        }
2847        if let Some(ef_construction) = self.ef_construction {
2848            if ef_construction != default_construction_ef_spann() {
2849                return false;
2850            }
2851        }
2852        if let Some(ef_search) = self.ef_search {
2853            if ef_search != default_search_ef_spann() {
2854                return false;
2855            }
2856        }
2857        if let Some(max_neighbors) = self.max_neighbors {
2858            if max_neighbors != default_m_spann() {
2859                return false;
2860            }
2861        }
2862        if let Some(center_drift_threshold) = self.center_drift_threshold {
2863            if center_drift_threshold != default_center_drift_threshold() {
2864                return false;
2865            }
2866        }
2867        if self.quantize != default_quantize() {
2868            return false;
2869        }
2870        true
2871    }
2872}
2873
2874#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2875#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2876#[serde(deny_unknown_fields)]
2877pub struct SparseVectorIndexConfig {
2878    /// Embedding function configuration
2879    #[serde(skip_serializing_if = "Option::is_none")]
2880    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2881    /// Key to source the sparse vector from
2882    #[serde(skip_serializing_if = "Option::is_none")]
2883    pub source_key: Option<String>,
2884    /// Whether this embedding is BM25
2885    #[serde(skip_serializing_if = "Option::is_none")]
2886    pub bm25: Option<bool>,
2887}
2888
2889#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2890#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2891#[serde(deny_unknown_fields)]
2892pub struct FtsIndexConfig {
2893    // FTS index typically has no additional parameters
2894}
2895
2896#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2897#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2898#[serde(deny_unknown_fields)]
2899pub struct StringInvertedIndexConfig {
2900    // String inverted index typically has no additional parameters
2901}
2902
2903#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2904#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2905#[serde(deny_unknown_fields)]
2906pub struct IntInvertedIndexConfig {
2907    // Integer inverted index typically has no additional parameters
2908}
2909
2910#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2911#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2912#[serde(deny_unknown_fields)]
2913pub struct FloatInvertedIndexConfig {
2914    // Float inverted index typically has no additional parameters
2915}
2916
2917#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2918#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2919#[serde(deny_unknown_fields)]
2920pub struct BoolInvertedIndexConfig {
2921    // Boolean inverted index typically has no additional parameters
2922}
2923
2924// ============================================================================
2925// BUILDER PATTERN SUPPORT
2926// ============================================================================
2927
2928/// Union type for all index configurations (used by builder pattern)
2929#[derive(Clone, Debug)]
2930#[allow(clippy::large_enum_variant)]
2931pub enum IndexConfig {
2932    Vector(VectorIndexConfig),
2933    SparseVector(SparseVectorIndexConfig),
2934    Fts(FtsIndexConfig),
2935    StringInverted(StringInvertedIndexConfig),
2936    IntInverted(IntInvertedIndexConfig),
2937    FloatInverted(FloatInvertedIndexConfig),
2938    BoolInverted(BoolInvertedIndexConfig),
2939}
2940
2941// Convenience From implementations for ergonomic usage
2942impl From<VectorIndexConfig> for IndexConfig {
2943    fn from(config: VectorIndexConfig) -> Self {
2944        IndexConfig::Vector(config)
2945    }
2946}
2947
2948impl From<SparseVectorIndexConfig> for IndexConfig {
2949    fn from(config: SparseVectorIndexConfig) -> Self {
2950        IndexConfig::SparseVector(config)
2951    }
2952}
2953
2954impl From<FtsIndexConfig> for IndexConfig {
2955    fn from(config: FtsIndexConfig) -> Self {
2956        IndexConfig::Fts(config)
2957    }
2958}
2959
2960impl From<StringInvertedIndexConfig> for IndexConfig {
2961    fn from(config: StringInvertedIndexConfig) -> Self {
2962        IndexConfig::StringInverted(config)
2963    }
2964}
2965
2966impl From<IntInvertedIndexConfig> for IndexConfig {
2967    fn from(config: IntInvertedIndexConfig) -> Self {
2968        IndexConfig::IntInverted(config)
2969    }
2970}
2971
2972impl From<FloatInvertedIndexConfig> for IndexConfig {
2973    fn from(config: FloatInvertedIndexConfig) -> Self {
2974        IndexConfig::FloatInverted(config)
2975    }
2976}
2977
2978impl From<BoolInvertedIndexConfig> for IndexConfig {
2979    fn from(config: BoolInvertedIndexConfig) -> Self {
2980        IndexConfig::BoolInverted(config)
2981    }
2982}
2983
2984impl TryFrom<&InternalCollectionConfiguration> for Schema {
2985    type Error = SchemaError;
2986
2987    fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2988        // Start with a default schema structure
2989        let mut schema = match &config.vector_index {
2990            VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2991            VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2992        };
2993        // Convert vector index configuration
2994        let vector_config = match &config.vector_index {
2995            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2996                space: Some(hnsw_config.space.clone()),
2997                embedding_function: config.embedding_function.clone(),
2998                source_key: None,
2999                hnsw: Some(HnswIndexConfig {
3000                    ef_construction: Some(hnsw_config.ef_construction),
3001                    max_neighbors: Some(hnsw_config.max_neighbors),
3002                    ef_search: Some(hnsw_config.ef_search),
3003                    num_threads: Some(hnsw_config.num_threads),
3004                    batch_size: Some(hnsw_config.batch_size),
3005                    sync_threshold: Some(hnsw_config.sync_threshold),
3006                    resize_factor: Some(hnsw_config.resize_factor),
3007                }),
3008                spann: None,
3009            },
3010            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
3011                space: Some(spann_config.space.clone()),
3012                embedding_function: config.embedding_function.clone(),
3013                source_key: None,
3014                hnsw: None,
3015                spann: Some(SpannIndexConfig {
3016                    search_nprobe: Some(spann_config.search_nprobe),
3017                    search_rng_factor: Some(spann_config.search_rng_factor),
3018                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
3019                    nreplica_count: Some(spann_config.nreplica_count),
3020                    write_rng_factor: Some(spann_config.write_rng_factor),
3021                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
3022                    split_threshold: Some(spann_config.split_threshold),
3023                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
3024                    initial_lambda: Some(spann_config.initial_lambda),
3025                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
3026                    merge_threshold: Some(spann_config.merge_threshold),
3027                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
3028                    write_nprobe: Some(spann_config.write_nprobe),
3029                    ef_construction: Some(spann_config.ef_construction),
3030                    ef_search: Some(spann_config.ef_search),
3031                    max_neighbors: Some(spann_config.max_neighbors),
3032                    center_drift_threshold: None,
3033                    quantize: default_quantize(),
3034                }),
3035            },
3036        };
3037
3038        // Update defaults (keep enabled=false, just update the config)
3039        // This serves as the template for any new float_list fields
3040        if let Some(float_list) = &mut schema.defaults.float_list {
3041            if let Some(vector_index) = &mut float_list.vector_index {
3042                vector_index.config = vector_config.clone();
3043            }
3044        }
3045
3046        // Update the vector_index in the existing #embedding key override
3047        // Keep enabled=true (already set by new_default) and update the config
3048        // Set source_key to DOCUMENT_KEY for the embedding key
3049        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
3050            if let Some(float_list) = &mut embedding_types.float_list {
3051                if let Some(vector_index) = &mut float_list.vector_index {
3052                    let mut vector_config = vector_config;
3053                    vector_config.source_key = Some(DOCUMENT_KEY.to_string());
3054                    vector_index.config = vector_config;
3055                }
3056            }
3057        }
3058
3059        Ok(schema)
3060    }
3061}
3062
3063#[cfg(test)]
3064mod tests {
3065    use super::*;
3066    use crate::hnsw_configuration::Space;
3067    use crate::metadata::SparseVector;
3068    use crate::{
3069        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
3070    };
3071    use serde_json::json;
3072
3073    #[test]
3074    fn test_reconcile_with_defaults_none_user_schema() {
3075        // Test that when no user schema is provided, we get the default schema
3076        let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
3077        let expected = Schema::new_default(KnnIndex::Spann);
3078        assert_eq!(result, expected);
3079    }
3080
3081    #[test]
3082    fn test_reconcile_with_defaults_empty_user_schema() {
3083        // Test merging with an empty user schema
3084        let user_schema = Schema {
3085            defaults: ValueTypes::default(),
3086            keys: HashMap::new(),
3087            cmek: None,
3088            source_attached_function_id: None,
3089        };
3090
3091        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3092        let expected = Schema::new_default(KnnIndex::Spann);
3093        assert_eq!(result, expected);
3094    }
3095
3096    #[test]
3097    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
3098        // Test that user can override string inverted index enabled state
3099        let mut user_schema = Schema {
3100            defaults: ValueTypes::default(),
3101            keys: HashMap::new(),
3102            cmek: None,
3103            source_attached_function_id: None,
3104        };
3105
3106        user_schema.defaults.string = Some(StringValueType {
3107            string_inverted_index: Some(StringInvertedIndexType {
3108                enabled: false, // Override default (true) to false
3109                config: StringInvertedIndexConfig {},
3110            }),
3111            fts_index: None,
3112        });
3113
3114        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3115
3116        // Check that the user override took precedence
3117        assert!(
3118            !result
3119                .defaults
3120                .string
3121                .as_ref()
3122                .unwrap()
3123                .string_inverted_index
3124                .as_ref()
3125                .unwrap()
3126                .enabled
3127        );
3128        // Check that other defaults are still present
3129        assert!(result.defaults.float.is_some());
3130        assert!(result.defaults.int.is_some());
3131    }
3132
3133    #[test]
3134    fn test_reconcile_with_defaults_user_overrides_vector_config() {
3135        // Test field-level merging for vector configurations
3136        let mut user_schema = Schema {
3137            defaults: ValueTypes::default(),
3138            keys: HashMap::new(),
3139            cmek: None,
3140            source_attached_function_id: None,
3141        };
3142
3143        user_schema.defaults.float_list = Some(FloatListValueType {
3144            vector_index: Some(VectorIndexType {
3145                enabled: true, // Enable vector index (default is false)
3146                config: VectorIndexConfig {
3147                    space: Some(Space::L2),                     // Override default space
3148                    embedding_function: None,                   // Will use default
3149                    source_key: Some("custom_key".to_string()), // Override default
3150                    hnsw: Some(HnswIndexConfig {
3151                        ef_construction: Some(500), // Override default
3152                        max_neighbors: None,        // Will use default
3153                        ef_search: None,            // Will use default
3154                        num_threads: None,
3155                        batch_size: None,
3156                        sync_threshold: None,
3157                        resize_factor: None,
3158                    }),
3159                    spann: None,
3160                },
3161            }),
3162        });
3163
3164        // Use HNSW defaults for this test so we have HNSW config to merge with
3165        let result = {
3166            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3167            let merged_defaults = Schema::merge_value_types(
3168                &default_schema.defaults,
3169                &user_schema.defaults,
3170                KnnIndex::Hnsw,
3171            )
3172            .unwrap();
3173            let mut merged_keys = default_schema.keys.clone();
3174            for (key, user_value_types) in user_schema.keys {
3175                if let Some(default_value_types) = merged_keys.get(&key) {
3176                    let merged_value_types = Schema::merge_value_types(
3177                        default_value_types,
3178                        &user_value_types,
3179                        KnnIndex::Hnsw,
3180                    )
3181                    .unwrap();
3182                    merged_keys.insert(key, merged_value_types);
3183                } else {
3184                    merged_keys.insert(key, user_value_types);
3185                }
3186            }
3187            Schema {
3188                defaults: merged_defaults,
3189                keys: merged_keys,
3190                cmek: None,
3191                source_attached_function_id: None,
3192            }
3193        };
3194
3195        let vector_config = &result
3196            .defaults
3197            .float_list
3198            .as_ref()
3199            .unwrap()
3200            .vector_index
3201            .as_ref()
3202            .unwrap()
3203            .config;
3204
3205        // Check user overrides took precedence
3206        assert_eq!(vector_config.space, Some(Space::L2));
3207        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
3208        assert_eq!(
3209            vector_config.hnsw.as_ref().unwrap().ef_construction,
3210            Some(500)
3211        );
3212
3213        // Check defaults were preserved for unspecified fields
3214        assert_eq!(vector_config.embedding_function, None);
3215        // Since user provided HNSW config, the default max_neighbors should be merged in
3216        assert_eq!(
3217            vector_config.hnsw.as_ref().unwrap().max_neighbors,
3218            Some(default_m())
3219        );
3220    }
3221
3222    #[test]
3223    fn test_reconcile_with_defaults_keys() {
3224        // Test that key overrides are properly merged
3225        let mut user_schema = Schema {
3226            defaults: ValueTypes::default(),
3227            keys: HashMap::new(),
3228            cmek: None,
3229            source_attached_function_id: None,
3230        };
3231
3232        // Add a custom key override
3233        let custom_key_types = ValueTypes {
3234            string: Some(StringValueType {
3235                fts_index: Some(FtsIndexType {
3236                    enabled: true,
3237                    config: FtsIndexConfig {},
3238                }),
3239                string_inverted_index: Some(StringInvertedIndexType {
3240                    enabled: false,
3241                    config: StringInvertedIndexConfig {},
3242                }),
3243            }),
3244            ..Default::default()
3245        };
3246        user_schema
3247            .keys
3248            .insert("custom_key".to_string(), custom_key_types);
3249
3250        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3251
3252        // Check that default key overrides are preserved
3253        assert!(result.keys.contains_key(EMBEDDING_KEY));
3254        assert!(result.keys.contains_key(DOCUMENT_KEY));
3255
3256        // Check that user key override was added
3257        assert!(result.keys.contains_key("custom_key"));
3258        let custom_override = result.keys.get("custom_key").unwrap();
3259        assert!(
3260            custom_override
3261                .string
3262                .as_ref()
3263                .unwrap()
3264                .fts_index
3265                .as_ref()
3266                .unwrap()
3267                .enabled
3268        );
3269    }
3270
3271    #[test]
3272    fn test_reconcile_with_defaults_override_existing_key() {
3273        // Test overriding an existing key override (like #embedding)
3274        let mut user_schema = Schema {
3275            defaults: ValueTypes::default(),
3276            keys: HashMap::new(),
3277            cmek: None,
3278            source_attached_function_id: None,
3279        };
3280
3281        // Override the #embedding key with custom settings
3282        let embedding_override = ValueTypes {
3283            float_list: Some(FloatListValueType {
3284                vector_index: Some(VectorIndexType {
3285                    enabled: false, // Override default enabled=true to false
3286                    config: VectorIndexConfig {
3287                        space: Some(Space::Ip), // Override default space
3288                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3289                        source_key: Some("custom_embedding_key".to_string()),
3290                        hnsw: None,
3291                        spann: None,
3292                    },
3293                }),
3294            }),
3295            ..Default::default()
3296        };
3297        user_schema
3298            .keys
3299            .insert(EMBEDDING_KEY.to_string(), embedding_override);
3300
3301        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3302
3303        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3304        let vector_config = &embedding_config
3305            .float_list
3306            .as_ref()
3307            .unwrap()
3308            .vector_index
3309            .as_ref()
3310            .unwrap();
3311
3312        // Check user overrides took precedence
3313        assert!(!vector_config.enabled);
3314        assert_eq!(vector_config.config.space, Some(Space::Ip));
3315        assert_eq!(
3316            vector_config.config.source_key,
3317            Some("custom_embedding_key".to_string())
3318        );
3319    }
3320
3321    #[test]
3322    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3323        let collection_config = InternalCollectionConfiguration {
3324            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3325                space: Space::Cosine,
3326                ef_construction: 128,
3327                ef_search: 96,
3328                max_neighbors: 42,
3329                num_threads: 8,
3330                resize_factor: 1.5,
3331                sync_threshold: 2_000,
3332                batch_size: 256,
3333            }),
3334            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3335                EmbeddingFunctionNewConfiguration {
3336                    name: "custom".to_string(),
3337                    config: json!({"alpha": 1}),
3338                },
3339            )),
3340        };
3341
3342        let schema = Schema::try_from(&collection_config).unwrap();
3343        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3344
3345        assert_eq!(reconstructed, collection_config);
3346    }
3347
3348    #[test]
3349    fn test_convert_schema_to_collection_config_spann_roundtrip() {
3350        let spann_config = InternalSpannConfiguration {
3351            space: Space::Cosine,
3352            search_nprobe: 11,
3353            search_rng_factor: 1.7,
3354            write_nprobe: 5,
3355            nreplica_count: 3,
3356            split_threshold: 150,
3357            merge_threshold: 80,
3358            ef_construction: 120,
3359            ef_search: 90,
3360            max_neighbors: 40,
3361            ..Default::default()
3362        };
3363
3364        let collection_config = InternalCollectionConfiguration {
3365            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3366            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3367                EmbeddingFunctionNewConfiguration {
3368                    name: "custom".to_string(),
3369                    config: json!({"beta": true}),
3370                },
3371            )),
3372        };
3373
3374        let schema = Schema::try_from(&collection_config).unwrap();
3375        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3376
3377        assert_eq!(reconstructed, collection_config);
3378    }
3379
3380    #[test]
3381    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3382        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3383        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3384            if let Some(float_list) = &mut embedding.float_list {
3385                if let Some(vector_index) = &mut float_list.vector_index {
3386                    vector_index.config.spann = Some(SpannIndexConfig {
3387                        search_nprobe: Some(1),
3388                        search_rng_factor: Some(1.0),
3389                        search_rng_epsilon: Some(0.1),
3390                        nreplica_count: Some(1),
3391                        write_rng_factor: Some(1.0),
3392                        write_rng_epsilon: Some(0.1),
3393                        split_threshold: Some(100),
3394                        num_samples_kmeans: Some(10),
3395                        initial_lambda: Some(0.5),
3396                        reassign_neighbor_count: Some(10),
3397                        merge_threshold: Some(50),
3398                        num_centers_to_merge_to: Some(3),
3399                        write_nprobe: Some(1),
3400                        ef_construction: Some(50),
3401                        ef_search: Some(40),
3402                        max_neighbors: Some(20),
3403                        center_drift_threshold: None,
3404                        quantize: false,
3405                    });
3406                }
3407            }
3408        }
3409
3410        let result = InternalCollectionConfiguration::try_from(&schema);
3411        assert!(result.is_err());
3412    }
3413
3414    #[test]
3415    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3416        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3417        let before = schema.clone();
3418        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3419        assert!(!modified);
3420        assert_eq!(schema, before);
3421    }
3422
3423    #[test]
3424    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3425        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3426        assert!(!schema.keys.contains_key("custom_field"));
3427
3428        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3429
3430        assert!(modified);
3431        let entry = schema
3432            .keys
3433            .get("custom_field")
3434            .expect("expected new key override to be inserted");
3435        assert_eq!(entry.boolean, schema.defaults.boolean);
3436        assert!(entry.string.is_none());
3437        assert!(entry.int.is_none());
3438        assert!(entry.float.is_none());
3439        assert!(entry.float_list.is_none());
3440        assert!(entry.sparse_vector.is_none());
3441    }
3442
3443    #[test]
3444    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3445        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3446        let initial_len = schema.keys.len();
3447        schema.keys.insert(
3448            "custom_field".to_string(),
3449            ValueTypes {
3450                string: schema.defaults.string.clone(),
3451                ..Default::default()
3452            },
3453        );
3454
3455        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3456
3457        assert!(modified);
3458        assert_eq!(schema.keys.len(), initial_len + 1);
3459        let entry = schema
3460            .keys
3461            .get("custom_field")
3462            .expect("expected key override to exist after ensure call");
3463        assert!(entry.string.is_some());
3464        assert_eq!(entry.boolean, schema.defaults.boolean);
3465    }
3466
3467    #[test]
3468    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3469        let schema = Schema::new_default(KnnIndex::Spann);
3470        let result = schema.is_knn_key_indexing_enabled(
3471            "custom_sparse",
3472            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3473        );
3474
3475        let err = result.expect_err("expected indexing disabled error");
3476        match err {
3477            FilterValidationError::IndexingDisabled { key, value_type } => {
3478                assert_eq!(key, "custom_sparse");
3479                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3480            }
3481            other => panic!("unexpected error variant: {other:?}"),
3482        }
3483    }
3484
3485    #[test]
3486    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3487        let mut schema = Schema::new_default(KnnIndex::Spann);
3488        schema.keys.insert(
3489            "sparse_enabled".to_string(),
3490            ValueTypes {
3491                sparse_vector: Some(SparseVectorValueType {
3492                    sparse_vector_index: Some(SparseVectorIndexType {
3493                        enabled: true,
3494                        config: SparseVectorIndexConfig {
3495                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3496                            source_key: None,
3497                            bm25: None,
3498                        },
3499                    }),
3500                }),
3501                ..Default::default()
3502            },
3503        );
3504
3505        let result = schema.is_knn_key_indexing_enabled(
3506            "sparse_enabled",
3507            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3508        );
3509
3510        assert!(result.is_ok());
3511    }
3512
3513    #[test]
3514    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3515        let schema = Schema::new_default(KnnIndex::Spann);
3516        let result = schema.is_knn_key_indexing_enabled(
3517            EMBEDDING_KEY,
3518            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3519        );
3520
3521        assert!(result.is_ok());
3522    }
3523
3524    #[test]
3525    fn test_merge_hnsw_configs_field_level() {
3526        // Test field-level merging for HNSW configurations
3527        let default_hnsw = HnswIndexConfig {
3528            ef_construction: Some(200),
3529            max_neighbors: Some(16),
3530            ef_search: Some(10),
3531            num_threads: Some(4),
3532            batch_size: Some(100),
3533            sync_threshold: Some(1000),
3534            resize_factor: Some(1.2),
3535        };
3536
3537        let user_hnsw = HnswIndexConfig {
3538            ef_construction: Some(300), // Override
3539            max_neighbors: None,        // Will use default
3540            ef_search: Some(20),        // Override
3541            num_threads: None,          // Will use default
3542            batch_size: None,           // Will use default
3543            sync_threshold: Some(2000), // Override
3544            resize_factor: None,        // Will use default
3545        };
3546
3547        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3548
3549        // Check user overrides
3550        assert_eq!(result.ef_construction, Some(300));
3551        assert_eq!(result.ef_search, Some(20));
3552        assert_eq!(result.sync_threshold, Some(2000));
3553
3554        // Check defaults preserved
3555        assert_eq!(result.max_neighbors, Some(16));
3556        assert_eq!(result.num_threads, Some(4));
3557        assert_eq!(result.batch_size, Some(100));
3558        assert_eq!(result.resize_factor, Some(1.2));
3559    }
3560
3561    #[test]
3562    fn test_merge_spann_configs_field_level() {
3563        // Test field-level merging for SPANN configurations
3564        let default_spann = SpannIndexConfig {
3565            search_nprobe: Some(10),
3566            search_rng_factor: Some(1.0),  // Must be exactly 1.0
3567            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
3568            nreplica_count: Some(3),
3569            write_rng_factor: Some(1.0),  // Must be exactly 1.0
3570            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
3571            split_threshold: Some(100),   // Must be 50-200
3572            num_samples_kmeans: Some(100),
3573            initial_lambda: Some(100.0), // Must be exactly 100.0
3574            reassign_neighbor_count: Some(50),
3575            merge_threshold: Some(50),        // Must be 25-100
3576            num_centers_to_merge_to: Some(4), // Max is 8
3577            write_nprobe: Some(5),
3578            ef_construction: Some(100),
3579            ef_search: Some(10),
3580            max_neighbors: Some(16),
3581            center_drift_threshold: None,
3582            quantize: false,
3583        };
3584
3585        let user_spann = SpannIndexConfig {
3586            search_nprobe: Some(20),       // Override
3587            search_rng_factor: None,       // Will use default
3588            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
3589            nreplica_count: None,          // Will use default
3590            write_rng_factor: None,
3591            write_rng_epsilon: None,
3592            split_threshold: Some(150), // Override (valid: 50-200)
3593            num_samples_kmeans: None,
3594            initial_lambda: None,
3595            reassign_neighbor_count: None,
3596            merge_threshold: None,
3597            num_centers_to_merge_to: None,
3598            write_nprobe: None,
3599            ef_construction: None,
3600            ef_search: None,
3601            max_neighbors: None,
3602            center_drift_threshold: None,
3603            quantize: false,
3604        };
3605
3606        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann))
3607            .unwrap()
3608            .unwrap();
3609
3610        // Check user overrides
3611        assert_eq!(result.search_nprobe, Some(20));
3612        assert_eq!(result.search_rng_epsilon, Some(8.0));
3613        assert_eq!(result.split_threshold, Some(150));
3614
3615        // Check defaults preserved
3616        assert_eq!(result.search_rng_factor, Some(1.0));
3617        assert_eq!(result.nreplica_count, Some(3));
3618        assert_eq!(result.initial_lambda, Some(100.0));
3619    }
3620
3621    #[test]
3622    fn test_merge_spann_configs_rejects_quantize_true() {
3623        // Test that merge_spann_configs rejects quantize: true in user schema
3624        let default_spann = SpannIndexConfig {
3625            search_nprobe: Some(10),
3626            search_rng_factor: Some(1.0),
3627            search_rng_epsilon: Some(7.0),
3628            nreplica_count: Some(3),
3629            write_rng_factor: Some(1.0),
3630            write_rng_epsilon: Some(6.0),
3631            split_threshold: Some(100),
3632            num_samples_kmeans: Some(100),
3633            initial_lambda: Some(100.0),
3634            reassign_neighbor_count: Some(50),
3635            merge_threshold: Some(50),
3636            num_centers_to_merge_to: Some(4),
3637            write_nprobe: Some(5),
3638            ef_construction: Some(100),
3639            ef_search: Some(10),
3640            max_neighbors: Some(16),
3641            center_drift_threshold: None,
3642            quantize: false,
3643        };
3644
3645        let user_spann_with_quantize = SpannIndexConfig {
3646            search_nprobe: Some(20),
3647            search_rng_factor: None,
3648            search_rng_epsilon: Some(8.0),
3649            nreplica_count: None,
3650            write_rng_factor: None,
3651            write_rng_epsilon: None,
3652            split_threshold: Some(150),
3653            num_samples_kmeans: None,
3654            initial_lambda: None,
3655            reassign_neighbor_count: None,
3656            merge_threshold: None,
3657            num_centers_to_merge_to: None,
3658            write_nprobe: None,
3659            ef_construction: None,
3660            ef_search: None,
3661            max_neighbors: None,
3662            center_drift_threshold: None,
3663            quantize: true, // This should be rejected
3664        };
3665
3666        // Should reject user schema with quantize: true
3667        let result =
3668            Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann_with_quantize));
3669        assert!(result.is_err());
3670        match result {
3671            Err(SchemaError::InvalidUserInput { reason }) => {
3672                assert!(reason.contains("quantize field cannot be set to true"));
3673            }
3674            _ => panic!("Expected InvalidUserInput error"),
3675        }
3676
3677        // Should reject default schema with quantize: true
3678        let default_spann_with_quantize = SpannIndexConfig {
3679            search_nprobe: Some(10),
3680            search_rng_factor: Some(1.0),
3681            search_rng_epsilon: Some(7.0),
3682            nreplica_count: Some(3),
3683            write_rng_factor: Some(1.0),
3684            write_rng_epsilon: Some(6.0),
3685            split_threshold: Some(100),
3686            num_samples_kmeans: Some(100),
3687            initial_lambda: Some(100.0),
3688            reassign_neighbor_count: Some(50),
3689            merge_threshold: Some(50),
3690            num_centers_to_merge_to: Some(4),
3691            write_nprobe: Some(5),
3692            ef_construction: Some(100),
3693            ef_search: Some(10),
3694            max_neighbors: Some(16),
3695            center_drift_threshold: None,
3696            quantize: true, // This should be rejected
3697        };
3698
3699        let result = Schema::merge_spann_configs(Some(&default_spann_with_quantize), None);
3700        assert!(result.is_err());
3701        match result {
3702            Err(SchemaError::InvalidUserInput { reason }) => {
3703                assert!(reason.contains("quantize field cannot be set to true"));
3704            }
3705            _ => panic!("Expected InvalidUserInput error"),
3706        }
3707
3708        // Should reject user-only schema with quantize: true
3709        let result = Schema::merge_spann_configs(None, Some(&user_spann_with_quantize));
3710        assert!(result.is_err());
3711        match result {
3712            Err(SchemaError::InvalidUserInput { reason }) => {
3713                assert!(reason.contains("quantize field cannot be set to true"));
3714            }
3715            _ => panic!("Expected InvalidUserInput error"),
3716        }
3717    }
3718
3719    #[test]
3720    fn test_spann_index_config_into_internal_configuration() {
3721        let config = SpannIndexConfig {
3722            search_nprobe: Some(33),
3723            search_rng_factor: Some(1.2),
3724            search_rng_epsilon: None,
3725            nreplica_count: None,
3726            write_rng_factor: Some(1.5),
3727            write_rng_epsilon: None,
3728            split_threshold: Some(75),
3729            num_samples_kmeans: None,
3730            initial_lambda: Some(0.9),
3731            reassign_neighbor_count: Some(40),
3732            merge_threshold: None,
3733            num_centers_to_merge_to: Some(4),
3734            write_nprobe: Some(60),
3735            ef_construction: Some(180),
3736            ef_search: Some(170),
3737            max_neighbors: Some(32),
3738            center_drift_threshold: None,
3739            quantize: false,
3740        };
3741
3742        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3743        assert_eq!(with_space.space, Space::Cosine);
3744        assert_eq!(with_space.search_nprobe, 33);
3745        assert_eq!(with_space.search_rng_factor, 1.2);
3746        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3747        assert_eq!(with_space.write_rng_factor, 1.5);
3748        assert_eq!(with_space.write_nprobe, 60);
3749        assert_eq!(with_space.ef_construction, 180);
3750        assert_eq!(with_space.ef_search, 170);
3751        assert_eq!(with_space.max_neighbors, 32);
3752        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3753
3754        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3755        assert_eq!(default_space_config.space, default_space());
3756    }
3757
3758    #[test]
3759    fn test_merge_string_type_combinations() {
3760        // Test all combinations of default and user StringValueType
3761
3762        // Both Some - should merge
3763        let default = StringValueType {
3764            string_inverted_index: Some(StringInvertedIndexType {
3765                enabled: true,
3766                config: StringInvertedIndexConfig {},
3767            }),
3768            fts_index: Some(FtsIndexType {
3769                enabled: false,
3770                config: FtsIndexConfig {},
3771            }),
3772        };
3773
3774        let user = StringValueType {
3775            string_inverted_index: Some(StringInvertedIndexType {
3776                enabled: false, // Override
3777                config: StringInvertedIndexConfig {},
3778            }),
3779            fts_index: None, // Will use default
3780        };
3781
3782        let result = Schema::merge_string_type(Some(&default), Some(&user))
3783            .unwrap()
3784            .unwrap();
3785        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3786        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3787
3788        // Default Some, User None - should return default
3789        let result = Schema::merge_string_type(Some(&default), None)
3790            .unwrap()
3791            .unwrap();
3792        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3793
3794        // Default None, User Some - should return user
3795        let result = Schema::merge_string_type(None, Some(&user))
3796            .unwrap()
3797            .unwrap();
3798        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3799
3800        // Both None - should return None
3801        let result = Schema::merge_string_type(None, None).unwrap();
3802        assert!(result.is_none());
3803    }
3804
3805    #[test]
3806    fn test_merge_vector_index_config_comprehensive() {
3807        // Test comprehensive vector index config merging
3808        let default_config = VectorIndexConfig {
3809            space: Some(Space::Cosine),
3810            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3811            source_key: Some("default_key".to_string()),
3812            hnsw: Some(HnswIndexConfig {
3813                ef_construction: Some(200),
3814                max_neighbors: Some(16),
3815                ef_search: Some(10),
3816                num_threads: Some(4),
3817                batch_size: Some(100),
3818                sync_threshold: Some(1000),
3819                resize_factor: Some(1.2),
3820            }),
3821            spann: None,
3822        };
3823
3824        let user_config = VectorIndexConfig {
3825            space: Some(Space::L2),                   // Override
3826            embedding_function: None,                 // Will use default
3827            source_key: Some("user_key".to_string()), // Override
3828            hnsw: Some(HnswIndexConfig {
3829                ef_construction: Some(300), // Override
3830                max_neighbors: None,        // Will use default
3831                ef_search: None,            // Will use default
3832                num_threads: None,
3833                batch_size: None,
3834                sync_threshold: None,
3835                resize_factor: None,
3836            }),
3837            spann: Some(SpannIndexConfig {
3838                search_nprobe: Some(15),
3839                search_rng_factor: None,
3840                search_rng_epsilon: None,
3841                nreplica_count: None,
3842                write_rng_factor: None,
3843                write_rng_epsilon: None,
3844                split_threshold: None,
3845                num_samples_kmeans: None,
3846                initial_lambda: None,
3847                reassign_neighbor_count: None,
3848                merge_threshold: None,
3849                num_centers_to_merge_to: None,
3850                write_nprobe: None,
3851                ef_construction: None,
3852                ef_search: None,
3853                max_neighbors: None,
3854                center_drift_threshold: None,
3855                quantize: false,
3856            }), // Add SPANN config
3857        };
3858
3859        let result =
3860            Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw)
3861                .expect("merge should succeed");
3862
3863        // Check field-level merging
3864        assert_eq!(result.space, Some(Space::L2)); // User override
3865        assert_eq!(
3866            result.embedding_function,
3867            Some(EmbeddingFunctionConfiguration::Legacy)
3868        ); // Default preserved
3869        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3870
3871        // Check HNSW merging
3872        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3873        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3874
3875        // Check SPANN is not present, since merging in the context of HNSW
3876        assert!(result.spann.is_none());
3877    }
3878
3879    #[test]
3880    fn test_merge_sparse_vector_index_config() {
3881        // Test sparse vector index config merging
3882        let default_config = SparseVectorIndexConfig {
3883            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3884            source_key: Some("default_sparse_key".to_string()),
3885            bm25: None,
3886        };
3887
3888        let user_config = SparseVectorIndexConfig {
3889            embedding_function: None,                        // Will use default
3890            source_key: Some("user_sparse_key".to_string()), // Override
3891            bm25: None,
3892        };
3893
3894        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3895
3896        // Check user override
3897        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3898        // Check default preserved
3899        assert_eq!(
3900            result.embedding_function,
3901            Some(EmbeddingFunctionConfiguration::Legacy)
3902        );
3903    }
3904
3905    #[test]
3906    fn test_complex_nested_merging_scenario() {
3907        // Test a complex scenario with multiple levels of merging
3908        let mut user_schema = Schema {
3909            defaults: ValueTypes::default(),
3910            keys: HashMap::new(),
3911            cmek: None,
3912            source_attached_function_id: None,
3913        };
3914
3915        // Set up complex user defaults
3916        user_schema.defaults.string = Some(StringValueType {
3917            string_inverted_index: Some(StringInvertedIndexType {
3918                enabled: false,
3919                config: StringInvertedIndexConfig {},
3920            }),
3921            fts_index: Some(FtsIndexType {
3922                enabled: true,
3923                config: FtsIndexConfig {},
3924            }),
3925        });
3926
3927        user_schema.defaults.float_list = Some(FloatListValueType {
3928            vector_index: Some(VectorIndexType {
3929                enabled: true,
3930                config: VectorIndexConfig {
3931                    space: Some(Space::Ip),
3932                    embedding_function: None, // Will use default
3933                    source_key: Some("custom_vector_key".to_string()),
3934                    hnsw: Some(HnswIndexConfig {
3935                        ef_construction: Some(400),
3936                        max_neighbors: Some(32),
3937                        ef_search: None, // Will use default
3938                        num_threads: None,
3939                        batch_size: None,
3940                        sync_threshold: None,
3941                        resize_factor: None,
3942                    }),
3943                    spann: None,
3944                },
3945            }),
3946        });
3947
3948        // Set up key overrides
3949        let custom_key_override = ValueTypes {
3950            string: Some(StringValueType {
3951                fts_index: Some(FtsIndexType {
3952                    enabled: true,
3953                    config: FtsIndexConfig {},
3954                }),
3955                string_inverted_index: None,
3956            }),
3957            ..Default::default()
3958        };
3959        user_schema
3960            .keys
3961            .insert("custom_field".to_string(), custom_key_override);
3962
3963        // Use HNSW defaults for this test so we have HNSW config to merge with
3964        let result = {
3965            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3966            let merged_defaults = Schema::merge_value_types(
3967                &default_schema.defaults,
3968                &user_schema.defaults,
3969                KnnIndex::Hnsw,
3970            )
3971            .unwrap();
3972            let mut merged_keys = default_schema.keys.clone();
3973            for (key, user_value_types) in user_schema.keys {
3974                if let Some(default_value_types) = merged_keys.get(&key) {
3975                    let merged_value_types = Schema::merge_value_types(
3976                        default_value_types,
3977                        &user_value_types,
3978                        KnnIndex::Hnsw,
3979                    )
3980                    .unwrap();
3981                    merged_keys.insert(key, merged_value_types);
3982                } else {
3983                    merged_keys.insert(key, user_value_types);
3984                }
3985            }
3986            Schema {
3987                defaults: merged_defaults,
3988                keys: merged_keys,
3989                cmek: None,
3990                source_attached_function_id: None,
3991            }
3992        };
3993
3994        // Verify complex merging worked correctly
3995
3996        // Check defaults merging
3997        assert!(
3998            !result
3999                .defaults
4000                .string
4001                .as_ref()
4002                .unwrap()
4003                .string_inverted_index
4004                .as_ref()
4005                .unwrap()
4006                .enabled
4007        );
4008        assert!(
4009            result
4010                .defaults
4011                .string
4012                .as_ref()
4013                .unwrap()
4014                .fts_index
4015                .as_ref()
4016                .unwrap()
4017                .enabled
4018        );
4019
4020        let vector_config = &result
4021            .defaults
4022            .float_list
4023            .as_ref()
4024            .unwrap()
4025            .vector_index
4026            .as_ref()
4027            .unwrap()
4028            .config;
4029        assert_eq!(vector_config.space, Some(Space::Ip));
4030        assert_eq!(vector_config.embedding_function, None); // Default preserved
4031        assert_eq!(
4032            vector_config.source_key,
4033            Some("custom_vector_key".to_string())
4034        );
4035        assert_eq!(
4036            vector_config.hnsw.as_ref().unwrap().ef_construction,
4037            Some(400)
4038        );
4039        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
4040        assert_eq!(
4041            vector_config.hnsw.as_ref().unwrap().ef_search,
4042            Some(default_search_ef())
4043        ); // Default preserved
4044
4045        // Check key overrides
4046        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
4047        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
4048        assert!(result.keys.contains_key("custom_field")); // User added
4049
4050        let custom_override = result.keys.get("custom_field").unwrap();
4051        assert!(
4052            custom_override
4053                .string
4054                .as_ref()
4055                .unwrap()
4056                .fts_index
4057                .as_ref()
4058                .unwrap()
4059                .enabled
4060        );
4061        assert!(custom_override
4062            .string
4063            .as_ref()
4064            .unwrap()
4065            .string_inverted_index
4066            .is_none());
4067    }
4068
4069    #[test]
4070    fn test_reconcile_with_collection_config_default_config() {
4071        // Test that when collection config is default, schema is returned as-is
4072        let collection_config = InternalCollectionConfiguration::default_hnsw();
4073        let schema = Schema::try_from(&collection_config).unwrap();
4074
4075        let result =
4076            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4077                .unwrap();
4078        assert_eq!(result, schema);
4079    }
4080
4081    // Test all 8 cases of double default scenarios
4082    #[test]
4083    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
4084        let collection_config = InternalCollectionConfiguration::default_hnsw();
4085        let schema = Schema::new_default(KnnIndex::Hnsw);
4086        let result =
4087            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4088                .unwrap();
4089
4090        // Should create new schema with default_knn_index (Hnsw)
4091        assert!(result.defaults.float_list.is_some());
4092        assert!(result
4093            .defaults
4094            .float_list
4095            .as_ref()
4096            .unwrap()
4097            .vector_index
4098            .as_ref()
4099            .unwrap()
4100            .config
4101            .hnsw
4102            .is_some());
4103        assert!(result
4104            .defaults
4105            .float_list
4106            .as_ref()
4107            .unwrap()
4108            .vector_index
4109            .as_ref()
4110            .unwrap()
4111            .config
4112            .spann
4113            .is_none());
4114    }
4115
4116    #[test]
4117    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
4118        let collection_config = InternalCollectionConfiguration::default_hnsw();
4119        let schema = Schema::new_default(KnnIndex::Hnsw);
4120        let result =
4121            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4122                .unwrap();
4123
4124        // Should create new schema with default_knn_index (Spann)
4125        assert!(result.defaults.float_list.is_some());
4126        assert!(result
4127            .defaults
4128            .float_list
4129            .as_ref()
4130            .unwrap()
4131            .vector_index
4132            .as_ref()
4133            .unwrap()
4134            .config
4135            .spann
4136            .is_some());
4137        assert!(result
4138            .defaults
4139            .float_list
4140            .as_ref()
4141            .unwrap()
4142            .vector_index
4143            .as_ref()
4144            .unwrap()
4145            .config
4146            .hnsw
4147            .is_none());
4148    }
4149
4150    #[test]
4151    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
4152        let collection_config = InternalCollectionConfiguration::default_hnsw();
4153        let schema = Schema::new_default(KnnIndex::Spann);
4154        let result =
4155            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4156                .unwrap();
4157
4158        // Should create new schema with default_knn_index (Hnsw)
4159        assert!(result.defaults.float_list.is_some());
4160        assert!(result
4161            .defaults
4162            .float_list
4163            .as_ref()
4164            .unwrap()
4165            .vector_index
4166            .as_ref()
4167            .unwrap()
4168            .config
4169            .hnsw
4170            .is_some());
4171        assert!(result
4172            .defaults
4173            .float_list
4174            .as_ref()
4175            .unwrap()
4176            .vector_index
4177            .as_ref()
4178            .unwrap()
4179            .config
4180            .spann
4181            .is_none());
4182    }
4183
4184    #[test]
4185    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
4186        let collection_config = InternalCollectionConfiguration::default_hnsw();
4187        let schema = Schema::new_default(KnnIndex::Spann);
4188        let result =
4189            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4190                .unwrap();
4191
4192        // Should create new schema with default_knn_index (Spann)
4193        assert!(result.defaults.float_list.is_some());
4194        assert!(result
4195            .defaults
4196            .float_list
4197            .as_ref()
4198            .unwrap()
4199            .vector_index
4200            .as_ref()
4201            .unwrap()
4202            .config
4203            .spann
4204            .is_some());
4205        assert!(result
4206            .defaults
4207            .float_list
4208            .as_ref()
4209            .unwrap()
4210            .vector_index
4211            .as_ref()
4212            .unwrap()
4213            .config
4214            .hnsw
4215            .is_none());
4216    }
4217
4218    #[test]
4219    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
4220        let collection_config = InternalCollectionConfiguration::default_spann();
4221        let schema = Schema::new_default(KnnIndex::Spann);
4222        let result =
4223            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4224                .unwrap();
4225
4226        // Should create new schema with default_knn_index (Hnsw)
4227        assert!(result.defaults.float_list.is_some());
4228        assert!(result
4229            .defaults
4230            .float_list
4231            .as_ref()
4232            .unwrap()
4233            .vector_index
4234            .as_ref()
4235            .unwrap()
4236            .config
4237            .hnsw
4238            .is_some());
4239        assert!(result
4240            .defaults
4241            .float_list
4242            .as_ref()
4243            .unwrap()
4244            .vector_index
4245            .as_ref()
4246            .unwrap()
4247            .config
4248            .spann
4249            .is_none());
4250    }
4251
4252    #[test]
4253    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
4254        let collection_config = InternalCollectionConfiguration::default_spann();
4255        let schema = Schema::new_default(KnnIndex::Spann);
4256        let result =
4257            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4258                .unwrap();
4259
4260        // Should create new schema with default_knn_index (Spann)
4261        assert!(result.defaults.float_list.is_some());
4262        assert!(result
4263            .defaults
4264            .float_list
4265            .as_ref()
4266            .unwrap()
4267            .vector_index
4268            .as_ref()
4269            .unwrap()
4270            .config
4271            .spann
4272            .is_some());
4273        assert!(result
4274            .defaults
4275            .float_list
4276            .as_ref()
4277            .unwrap()
4278            .vector_index
4279            .as_ref()
4280            .unwrap()
4281            .config
4282            .hnsw
4283            .is_none());
4284        // Defaults should have source_key=None
4285        assert_eq!(
4286            result
4287                .defaults
4288                .float_list
4289                .as_ref()
4290                .unwrap()
4291                .vector_index
4292                .as_ref()
4293                .unwrap()
4294                .config
4295                .source_key,
4296            None
4297        );
4298    }
4299
4300    #[test]
4301    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
4302        let collection_config = InternalCollectionConfiguration::default_spann();
4303        let schema = Schema::new_default(KnnIndex::Hnsw);
4304        let result =
4305            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4306                .unwrap();
4307
4308        // Should create new schema with default_knn_index (Hnsw)
4309        assert!(result.defaults.float_list.is_some());
4310        assert!(result
4311            .defaults
4312            .float_list
4313            .as_ref()
4314            .unwrap()
4315            .vector_index
4316            .as_ref()
4317            .unwrap()
4318            .config
4319            .hnsw
4320            .is_some());
4321        assert!(result
4322            .defaults
4323            .float_list
4324            .as_ref()
4325            .unwrap()
4326            .vector_index
4327            .as_ref()
4328            .unwrap()
4329            .config
4330            .spann
4331            .is_none());
4332    }
4333
4334    #[test]
4335    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
4336        let collection_config = InternalCollectionConfiguration::default_spann();
4337        let schema = Schema::new_default(KnnIndex::Hnsw);
4338        let result =
4339            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4340                .unwrap();
4341
4342        // Should create new schema with default_knn_index (Spann)
4343        assert!(result.defaults.float_list.is_some());
4344        assert!(result
4345            .defaults
4346            .float_list
4347            .as_ref()
4348            .unwrap()
4349            .vector_index
4350            .as_ref()
4351            .unwrap()
4352            .config
4353            .spann
4354            .is_some());
4355        assert!(result
4356            .defaults
4357            .float_list
4358            .as_ref()
4359            .unwrap()
4360            .vector_index
4361            .as_ref()
4362            .unwrap()
4363            .config
4364            .hnsw
4365            .is_none());
4366    }
4367
4368    #[test]
4369    fn test_defaults_source_key_not_document() {
4370        // Test that defaults.float_list.vector_index.config.source_key is None, not DOCUMENT_KEY
4371        let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4372        let schema_spann = Schema::new_default(KnnIndex::Spann);
4373
4374        // Check HNSW default schema
4375        let defaults_hnsw = schema_hnsw
4376            .defaults
4377            .float_list
4378            .as_ref()
4379            .unwrap()
4380            .vector_index
4381            .as_ref()
4382            .unwrap();
4383        assert_eq!(defaults_hnsw.config.source_key, None);
4384
4385        // Check Spann default schema
4386        let defaults_spann = schema_spann
4387            .defaults
4388            .float_list
4389            .as_ref()
4390            .unwrap()
4391            .vector_index
4392            .as_ref()
4393            .unwrap();
4394        assert_eq!(defaults_spann.config.source_key, None);
4395
4396        // Test after reconcile with NON-default collection config
4397        // This path calls try_from where our fix is
4398        let collection_config_hnsw = InternalCollectionConfiguration {
4399            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4400                ef_construction: 300,
4401                max_neighbors: 32,
4402                ef_search: 50,
4403                num_threads: 8,
4404                batch_size: 200,
4405                sync_threshold: 2000,
4406                resize_factor: 1.5,
4407                space: Space::L2,
4408            }),
4409            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4410        };
4411        let result_hnsw = Schema::reconcile_with_collection_config(
4412            &schema_hnsw,
4413            &collection_config_hnsw,
4414            KnnIndex::Hnsw,
4415        )
4416        .unwrap();
4417        let reconciled_defaults_hnsw = result_hnsw
4418            .defaults
4419            .float_list
4420            .as_ref()
4421            .unwrap()
4422            .vector_index
4423            .as_ref()
4424            .unwrap();
4425        assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4426
4427        let collection_config_spann = InternalCollectionConfiguration {
4428            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4429                search_nprobe: 20,
4430                search_rng_factor: 3.0,
4431                search_rng_epsilon: 0.2,
4432                nreplica_count: 5,
4433                write_rng_factor: 2.0,
4434                write_rng_epsilon: 0.1,
4435                split_threshold: 2000,
4436                num_samples_kmeans: 200,
4437                initial_lambda: 0.8,
4438                reassign_neighbor_count: 100,
4439                merge_threshold: 800,
4440                num_centers_to_merge_to: 20,
4441                write_nprobe: 10,
4442                ef_construction: 400,
4443                ef_search: 60,
4444                max_neighbors: 24,
4445                space: Space::Cosine,
4446            }),
4447            embedding_function: None,
4448        };
4449        let result_spann = Schema::reconcile_with_collection_config(
4450            &schema_spann,
4451            &collection_config_spann,
4452            KnnIndex::Spann,
4453        )
4454        .unwrap();
4455        let reconciled_defaults_spann = result_spann
4456            .defaults
4457            .float_list
4458            .as_ref()
4459            .unwrap()
4460            .vector_index
4461            .as_ref()
4462            .unwrap();
4463        assert_eq!(reconciled_defaults_spann.config.source_key, None);
4464
4465        // Verify that #embedding key DOES have source_key set to DOCUMENT_KEY
4466        let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4467        let embedding_vector_index_hnsw = embedding_hnsw
4468            .float_list
4469            .as_ref()
4470            .unwrap()
4471            .vector_index
4472            .as_ref()
4473            .unwrap();
4474        assert_eq!(
4475            embedding_vector_index_hnsw.config.source_key,
4476            Some(DOCUMENT_KEY.to_string())
4477        );
4478
4479        let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4480        let embedding_vector_index_spann = embedding_spann
4481            .float_list
4482            .as_ref()
4483            .unwrap()
4484            .vector_index
4485            .as_ref()
4486            .unwrap();
4487        assert_eq!(
4488            embedding_vector_index_spann.config.source_key,
4489            Some(DOCUMENT_KEY.to_string())
4490        );
4491    }
4492
4493    #[test]
4494    fn test_try_from_source_key() {
4495        // Direct test of try_from to verify source_key behavior
4496        // Defaults should have source_key=None, #embedding should have source_key=DOCUMENT_KEY
4497
4498        // Test with HNSW config
4499        let collection_config_hnsw = InternalCollectionConfiguration {
4500            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4501                ef_construction: 300,
4502                max_neighbors: 32,
4503                ef_search: 50,
4504                num_threads: 8,
4505                batch_size: 200,
4506                sync_threshold: 2000,
4507                resize_factor: 1.5,
4508                space: Space::L2,
4509            }),
4510            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4511        };
4512        let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4513
4514        // Check defaults have source_key=None
4515        let defaults_hnsw = schema_hnsw
4516            .defaults
4517            .float_list
4518            .as_ref()
4519            .unwrap()
4520            .vector_index
4521            .as_ref()
4522            .unwrap();
4523        assert_eq!(defaults_hnsw.config.source_key, None);
4524
4525        // Check #embedding has source_key=DOCUMENT_KEY
4526        let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4527        let embedding_vector_index_hnsw = embedding_hnsw
4528            .float_list
4529            .as_ref()
4530            .unwrap()
4531            .vector_index
4532            .as_ref()
4533            .unwrap();
4534        assert_eq!(
4535            embedding_vector_index_hnsw.config.source_key,
4536            Some(DOCUMENT_KEY.to_string())
4537        );
4538
4539        // Test with Spann config
4540        let collection_config_spann = InternalCollectionConfiguration {
4541            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4542                search_nprobe: 20,
4543                search_rng_factor: 3.0,
4544                search_rng_epsilon: 0.2,
4545                nreplica_count: 5,
4546                write_rng_factor: 2.0,
4547                write_rng_epsilon: 0.1,
4548                split_threshold: 2000,
4549                num_samples_kmeans: 200,
4550                initial_lambda: 0.8,
4551                reassign_neighbor_count: 100,
4552                merge_threshold: 800,
4553                num_centers_to_merge_to: 20,
4554                write_nprobe: 10,
4555                ef_construction: 400,
4556                ef_search: 60,
4557                max_neighbors: 24,
4558                space: Space::Cosine,
4559            }),
4560            embedding_function: None,
4561        };
4562        let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4563
4564        // Check defaults have source_key=None
4565        let defaults_spann = schema_spann
4566            .defaults
4567            .float_list
4568            .as_ref()
4569            .unwrap()
4570            .vector_index
4571            .as_ref()
4572            .unwrap();
4573        assert_eq!(defaults_spann.config.source_key, None);
4574
4575        // Check #embedding has source_key=DOCUMENT_KEY
4576        let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4577        let embedding_vector_index_spann = embedding_spann
4578            .float_list
4579            .as_ref()
4580            .unwrap()
4581            .vector_index
4582            .as_ref()
4583            .unwrap();
4584        assert_eq!(
4585            embedding_vector_index_spann.config.source_key,
4586            Some(DOCUMENT_KEY.to_string())
4587        );
4588    }
4589
4590    #[test]
4591    fn test_default_hnsw_with_default_embedding_function() {
4592        // Test that when InternalCollectionConfiguration is default HNSW but has
4593        // an embedding function with name "default" and config as {}, it still
4594        // goes through the double default path and preserves source_key behavior
4595        use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4596
4597        let collection_config = InternalCollectionConfiguration {
4598            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4599            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4600                EmbeddingFunctionNewConfiguration {
4601                    name: "default".to_string(),
4602                    config: serde_json::json!({}),
4603                },
4604            )),
4605        };
4606
4607        // Verify it's still considered default
4608        assert!(collection_config.is_default());
4609
4610        let schema = Schema::new_default(KnnIndex::Hnsw);
4611        let result =
4612            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4613                .unwrap();
4614
4615        // Check that defaults have source_key=None
4616        let defaults = result
4617            .defaults
4618            .float_list
4619            .as_ref()
4620            .unwrap()
4621            .vector_index
4622            .as_ref()
4623            .unwrap();
4624        assert_eq!(defaults.config.source_key, None);
4625
4626        // Check that #embedding has source_key=DOCUMENT_KEY
4627        let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4628        let embedding_vector_index = embedding
4629            .float_list
4630            .as_ref()
4631            .unwrap()
4632            .vector_index
4633            .as_ref()
4634            .unwrap();
4635        assert_eq!(
4636            embedding_vector_index.config.source_key,
4637            Some(DOCUMENT_KEY.to_string())
4638        );
4639
4640        // verify vector index config is set to spann
4641        let vector_index_config = defaults.config.clone();
4642        assert!(vector_index_config.spann.is_some());
4643        assert!(vector_index_config.hnsw.is_none());
4644
4645        // Verify embedding function was set correctly
4646        assert_eq!(
4647            embedding_vector_index.config.embedding_function,
4648            Some(EmbeddingFunctionConfiguration::Known(
4649                EmbeddingFunctionNewConfiguration {
4650                    name: "default".to_string(),
4651                    config: serde_json::json!({}),
4652                },
4653            ))
4654        );
4655        assert_eq!(
4656            defaults.config.embedding_function,
4657            Some(EmbeddingFunctionConfiguration::Known(
4658                EmbeddingFunctionNewConfiguration {
4659                    name: "default".to_string(),
4660                    config: serde_json::json!({}),
4661                },
4662            ))
4663        );
4664    }
4665
4666    #[test]
4667    fn test_reconcile_with_collection_config_both_non_default() {
4668        // Test that when both schema and collection config are non-default, it returns an error
4669        let mut schema = Schema::new_default(KnnIndex::Hnsw);
4670        schema.defaults.string = Some(StringValueType {
4671            fts_index: Some(FtsIndexType {
4672                enabled: true,
4673                config: FtsIndexConfig {},
4674            }),
4675            string_inverted_index: None,
4676        });
4677
4678        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4679        // Make collection config non-default by changing a parameter
4680        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4681        {
4682            hnsw_config.ef_construction = 500; // Non-default value
4683        }
4684
4685        // Use reconcile_schema_and_config which has the early validation
4686        let result = Schema::reconcile_schema_and_config(
4687            Some(&schema),
4688            Some(&collection_config),
4689            KnnIndex::Spann,
4690        );
4691        assert!(result.is_err());
4692        assert!(matches!(
4693            result.unwrap_err(),
4694            SchemaError::ConfigAndSchemaConflict
4695        ));
4696    }
4697
4698    #[test]
4699    fn test_reconcile_with_collection_config_hnsw_override() {
4700        // Test that non-default HNSW collection config overrides default schema
4701        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
4702
4703        let collection_config = InternalCollectionConfiguration {
4704            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4705                ef_construction: 300,
4706                max_neighbors: 32,
4707                ef_search: 50,
4708                num_threads: 8,
4709                batch_size: 200,
4710                sync_threshold: 2000,
4711                resize_factor: 1.5,
4712                space: Space::L2,
4713            }),
4714            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4715        };
4716
4717        let result =
4718            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4719                .unwrap();
4720
4721        // Check that #embedding key override was created with the collection config settings
4722        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4723        let vector_index = embedding_override
4724            .float_list
4725            .as_ref()
4726            .unwrap()
4727            .vector_index
4728            .as_ref()
4729            .unwrap();
4730
4731        assert!(vector_index.enabled);
4732        assert_eq!(vector_index.config.space, Some(Space::L2));
4733        assert_eq!(
4734            vector_index.config.embedding_function,
4735            Some(EmbeddingFunctionConfiguration::Legacy)
4736        );
4737        assert_eq!(
4738            vector_index.config.source_key,
4739            Some(DOCUMENT_KEY.to_string())
4740        );
4741
4742        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4743        assert_eq!(hnsw_config.ef_construction, Some(300));
4744        assert_eq!(hnsw_config.max_neighbors, Some(32));
4745        assert_eq!(hnsw_config.ef_search, Some(50));
4746        assert_eq!(hnsw_config.num_threads, Some(8));
4747        assert_eq!(hnsw_config.batch_size, Some(200));
4748        assert_eq!(hnsw_config.sync_threshold, Some(2000));
4749        assert_eq!(hnsw_config.resize_factor, Some(1.5));
4750
4751        assert!(vector_index.config.spann.is_none());
4752    }
4753
4754    #[test]
4755    fn test_reconcile_with_collection_config_spann_override() {
4756        // Test that non-default SPANN collection config overrides default schema
4757        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
4758
4759        let collection_config = InternalCollectionConfiguration {
4760            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4761                search_nprobe: 20,
4762                search_rng_factor: 3.0,
4763                search_rng_epsilon: 0.2,
4764                nreplica_count: 5,
4765                write_rng_factor: 2.0,
4766                write_rng_epsilon: 0.1,
4767                split_threshold: 2000,
4768                num_samples_kmeans: 200,
4769                initial_lambda: 0.8,
4770                reassign_neighbor_count: 100,
4771                merge_threshold: 800,
4772                num_centers_to_merge_to: 20,
4773                write_nprobe: 10,
4774                ef_construction: 400,
4775                ef_search: 60,
4776                max_neighbors: 24,
4777                space: Space::Cosine,
4778            }),
4779            embedding_function: None,
4780        };
4781
4782        let result =
4783            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4784                .unwrap();
4785
4786        // Check that #embedding key override was created with the collection config settings
4787        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4788        let vector_index = embedding_override
4789            .float_list
4790            .as_ref()
4791            .unwrap()
4792            .vector_index
4793            .as_ref()
4794            .unwrap();
4795
4796        assert!(vector_index.enabled);
4797        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4798        assert_eq!(vector_index.config.embedding_function, None);
4799        assert_eq!(
4800            vector_index.config.source_key,
4801            Some(DOCUMENT_KEY.to_string())
4802        );
4803
4804        assert!(vector_index.config.hnsw.is_none());
4805
4806        let spann_config = vector_index.config.spann.as_ref().unwrap();
4807        assert_eq!(spann_config.search_nprobe, Some(20));
4808        assert_eq!(spann_config.search_rng_factor, Some(3.0));
4809        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4810        assert_eq!(spann_config.nreplica_count, Some(5));
4811        assert_eq!(spann_config.write_rng_factor, Some(2.0));
4812        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4813        assert_eq!(spann_config.split_threshold, Some(2000));
4814        assert_eq!(spann_config.num_samples_kmeans, Some(200));
4815        assert_eq!(spann_config.initial_lambda, Some(0.8));
4816        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4817        assert_eq!(spann_config.merge_threshold, Some(800));
4818        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4819        assert_eq!(spann_config.write_nprobe, Some(10));
4820        assert_eq!(spann_config.ef_construction, Some(400));
4821        assert_eq!(spann_config.ef_search, Some(60));
4822        assert_eq!(spann_config.max_neighbors, Some(24));
4823    }
4824
4825    #[test]
4826    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4827        // Test that collection config updates BOTH defaults.float_list.vector_index
4828        // AND keys["embedding"].float_list.vector_index
4829        let schema = Schema::new_default(KnnIndex::Hnsw);
4830
4831        let collection_config = InternalCollectionConfiguration {
4832            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4833                ef_construction: 300,
4834                max_neighbors: 32,
4835                ef_search: 50,
4836                num_threads: 8,
4837                batch_size: 200,
4838                sync_threshold: 2000,
4839                resize_factor: 1.5,
4840                space: Space::L2,
4841            }),
4842            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4843        };
4844
4845        let result =
4846            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4847                .unwrap();
4848
4849        // Check that defaults.float_list.vector_index was updated
4850        let defaults_vector_index = result
4851            .defaults
4852            .float_list
4853            .as_ref()
4854            .unwrap()
4855            .vector_index
4856            .as_ref()
4857            .unwrap();
4858
4859        // Should be disabled in defaults (template for new keys)
4860        assert!(!defaults_vector_index.enabled);
4861        // But config should be updated
4862        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4863        assert_eq!(
4864            defaults_vector_index.config.embedding_function,
4865            Some(EmbeddingFunctionConfiguration::Legacy)
4866        );
4867        assert_eq!(defaults_vector_index.config.source_key, None);
4868        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4869        assert_eq!(defaults_hnsw.ef_construction, Some(300));
4870        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4871
4872        // Check that #embedding key override was also updated
4873        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4874        let embedding_vector_index = embedding_override
4875            .float_list
4876            .as_ref()
4877            .unwrap()
4878            .vector_index
4879            .as_ref()
4880            .unwrap();
4881
4882        // Should be enabled on #embedding
4883        assert!(embedding_vector_index.enabled);
4884        // Config should match defaults
4885        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4886        assert_eq!(
4887            embedding_vector_index.config.embedding_function,
4888            Some(EmbeddingFunctionConfiguration::Legacy)
4889        );
4890        assert_eq!(
4891            embedding_vector_index.config.source_key,
4892            Some(DOCUMENT_KEY.to_string())
4893        );
4894        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4895        assert_eq!(embedding_hnsw.ef_construction, Some(300));
4896        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4897    }
4898
4899    #[test]
4900    fn test_is_schema_default() {
4901        // Test that actual default schemas are correctly identified
4902        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4903        assert!(default_hnsw_schema.is_default());
4904
4905        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4906        assert!(default_spann_schema.is_default());
4907
4908        // Test that a modified default schema is not considered default
4909        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4910        // Make a clear modification - change the string inverted index enabled state
4911        if let Some(ref mut string_type) = modified_schema.defaults.string {
4912            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4913                string_inverted.enabled = false; // Default is true, so this should make it non-default
4914            }
4915        }
4916        assert!(!modified_schema.is_default());
4917
4918        // Test that schema with additional key overrides is not default
4919        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4920        schema_with_extra_overrides
4921            .keys
4922            .insert("custom_key".to_string(), ValueTypes::default());
4923        assert!(!schema_with_extra_overrides.is_default());
4924    }
4925
4926    #[test]
4927    fn test_is_schema_default_with_space() {
4928        let schema = Schema::new_default(KnnIndex::Hnsw);
4929        assert!(schema.is_default());
4930
4931        let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4932        if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4933            if let Some(ref mut vector_index) = float_list.vector_index {
4934                vector_index.config.space = Some(Space::Cosine);
4935            }
4936        }
4937        assert!(!schema_with_space.is_default());
4938
4939        let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4940        if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4941            .keys
4942            .get_mut(EMBEDDING_KEY)
4943        {
4944            if let Some(ref mut float_list) = embedding_key.float_list {
4945                if let Some(ref mut vector_index) = float_list.vector_index {
4946                    vector_index.config.space = Some(Space::Cosine);
4947                }
4948            }
4949        }
4950        assert!(!schema_with_space_in_embedding_key.is_default());
4951    }
4952
4953    #[test]
4954    fn test_is_schema_default_with_embedding_function() {
4955        let schema = Schema::new_default(KnnIndex::Hnsw);
4956        assert!(schema.is_default());
4957
4958        let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4959        if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4960            if let Some(ref mut vector_index) = float_list.vector_index {
4961                vector_index.config.embedding_function =
4962                    Some(EmbeddingFunctionConfiguration::Legacy);
4963            }
4964        }
4965        assert!(!schema_with_embedding_function.is_default());
4966
4967        let mut schema_with_embedding_function_in_embedding_key =
4968            Schema::new_default(KnnIndex::Spann);
4969        if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4970            .keys
4971            .get_mut(EMBEDDING_KEY)
4972        {
4973            if let Some(ref mut float_list) = embedding_key.float_list {
4974                if let Some(ref mut vector_index) = float_list.vector_index {
4975                    vector_index.config.embedding_function =
4976                        Some(EmbeddingFunctionConfiguration::Legacy);
4977                }
4978            }
4979        }
4980        assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4981    }
4982
4983    #[test]
4984    fn test_add_merges_keys_by_value_type() {
4985        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4986        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4987
4988        let string_override = ValueTypes {
4989            string: Some(StringValueType {
4990                string_inverted_index: Some(StringInvertedIndexType {
4991                    enabled: true,
4992                    config: StringInvertedIndexConfig {},
4993                }),
4994                fts_index: None,
4995            }),
4996            ..Default::default()
4997        };
4998        schema_a
4999            .keys
5000            .insert("custom_field".to_string(), string_override);
5001
5002        let float_override = ValueTypes {
5003            float: Some(FloatValueType {
5004                float_inverted_index: Some(FloatInvertedIndexType {
5005                    enabled: true,
5006                    config: FloatInvertedIndexConfig {},
5007                }),
5008            }),
5009            ..Default::default()
5010        };
5011        schema_b
5012            .keys
5013            .insert("custom_field".to_string(), float_override);
5014
5015        let merged = schema_a.merge(&schema_b).unwrap();
5016        let merged_override = merged.keys.get("custom_field").unwrap();
5017
5018        assert!(merged_override.string.is_some());
5019        assert!(merged_override.float.is_some());
5020        assert!(
5021            merged_override
5022                .string
5023                .as_ref()
5024                .unwrap()
5025                .string_inverted_index
5026                .as_ref()
5027                .unwrap()
5028                .enabled
5029        );
5030        assert!(
5031            merged_override
5032                .float
5033                .as_ref()
5034                .unwrap()
5035                .float_inverted_index
5036                .as_ref()
5037                .unwrap()
5038                .enabled
5039        );
5040    }
5041
5042    #[test]
5043    fn test_add_rejects_different_defaults() {
5044        let schema_a = Schema::new_default(KnnIndex::Hnsw);
5045        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5046
5047        if let Some(string_type) = schema_b.defaults.string.as_mut() {
5048            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
5049                string_index.enabled = false;
5050            }
5051        }
5052
5053        let err = schema_a.merge(&schema_b).unwrap_err();
5054        assert!(matches!(err, SchemaError::DefaultsMismatch));
5055    }
5056
5057    #[test]
5058    fn test_add_detects_conflicting_value_type_configuration() {
5059        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
5060        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5061
5062        let string_override_enabled = ValueTypes {
5063            string: Some(StringValueType {
5064                string_inverted_index: Some(StringInvertedIndexType {
5065                    enabled: true,
5066                    config: StringInvertedIndexConfig {},
5067                }),
5068                fts_index: None,
5069            }),
5070            ..Default::default()
5071        };
5072        schema_a
5073            .keys
5074            .insert("custom_field".to_string(), string_override_enabled);
5075
5076        let string_override_disabled = ValueTypes {
5077            string: Some(StringValueType {
5078                string_inverted_index: Some(StringInvertedIndexType {
5079                    enabled: false,
5080                    config: StringInvertedIndexConfig {},
5081                }),
5082                fts_index: None,
5083            }),
5084            ..Default::default()
5085        };
5086        schema_b
5087            .keys
5088            .insert("custom_field".to_string(), string_override_disabled);
5089
5090        let err = schema_a.merge(&schema_b).unwrap_err();
5091        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
5092    }
5093
5094    // TODO(Sanket): Remove this test once deployed
5095    #[test]
5096    fn test_backward_compatibility_aliases() {
5097        // Test that old format with # and $ prefixes and key_overrides can be deserialized
5098        let old_format_json = r###"{
5099            "defaults": {
5100                "#string": {
5101                    "$fts_index": {
5102                        "enabled": true,
5103                        "config": {}
5104                    }
5105                },
5106                "#int": {
5107                    "$int_inverted_index": {
5108                        "enabled": true,
5109                        "config": {}
5110                    }
5111                },
5112                "#float_list": {
5113                    "$vector_index": {
5114                        "enabled": true,
5115                        "config": {
5116                            "spann": {
5117                                "search_nprobe": 10
5118                            }
5119                        }
5120                    }
5121                }
5122            },
5123            "key_overrides": {
5124                "#document": {
5125                    "#string": {
5126                        "$fts_index": {
5127                            "enabled": false,
5128                            "config": {}
5129                        }
5130                    }
5131                }
5132            }
5133        }"###;
5134
5135        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
5136
5137        // Test that new format without prefixes and keys can be deserialized
5138        let new_format_json = r###"{
5139            "defaults": {
5140                "string": {
5141                    "fts_index": {
5142                        "enabled": true,
5143                        "config": {}
5144                    }
5145                },
5146                "int": {
5147                    "int_inverted_index": {
5148                        "enabled": true,
5149                        "config": {}
5150                    }
5151                },
5152                "float_list": {
5153                    "vector_index": {
5154                        "enabled": true,
5155                        "config": {
5156                            "spann": {
5157                                "search_nprobe": 10
5158                            }
5159                        }
5160                    }
5161                }
5162            },
5163            "keys": {
5164                "#document": {
5165                    "string": {
5166                        "fts_index": {
5167                            "enabled": false,
5168                            "config": {}
5169                        }
5170                    }
5171                }
5172            }
5173        }"###;
5174
5175        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
5176
5177        // Both should deserialize to the same structure
5178        assert_eq!(schema_from_old, schema_from_new);
5179
5180        // Verify the deserialized content is correct
5181        assert!(schema_from_old.defaults.string.is_some());
5182        assert!(schema_from_old
5183            .defaults
5184            .string
5185            .as_ref()
5186            .unwrap()
5187            .fts_index
5188            .is_some());
5189        assert!(
5190            schema_from_old
5191                .defaults
5192                .string
5193                .as_ref()
5194                .unwrap()
5195                .fts_index
5196                .as_ref()
5197                .unwrap()
5198                .enabled
5199        );
5200
5201        assert!(schema_from_old.defaults.int.is_some());
5202        assert!(schema_from_old
5203            .defaults
5204            .int
5205            .as_ref()
5206            .unwrap()
5207            .int_inverted_index
5208            .is_some());
5209
5210        assert!(schema_from_old.defaults.float_list.is_some());
5211        assert!(schema_from_old
5212            .defaults
5213            .float_list
5214            .as_ref()
5215            .unwrap()
5216            .vector_index
5217            .is_some());
5218
5219        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
5220        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
5221        assert!(doc_override.string.is_some());
5222        assert!(
5223            !doc_override
5224                .string
5225                .as_ref()
5226                .unwrap()
5227                .fts_index
5228                .as_ref()
5229                .unwrap()
5230                .enabled
5231        );
5232
5233        // Test that serialization always outputs the new format (without prefixes)
5234        let serialized = serde_json::to_string(&schema_from_old).unwrap();
5235
5236        // Should contain new format keys
5237        assert!(serialized.contains(r#""keys":"#));
5238        assert!(serialized.contains(r#""string":"#));
5239        assert!(serialized.contains(r#""fts_index":"#));
5240        assert!(serialized.contains(r#""int_inverted_index":"#));
5241        assert!(serialized.contains(r#""vector_index":"#));
5242
5243        // Should NOT contain old format keys
5244        assert!(!serialized.contains(r#""key_overrides":"#));
5245        assert!(!serialized.contains(r###""#string":"###));
5246        assert!(!serialized.contains(r###""$fts_index":"###));
5247        assert!(!serialized.contains(r###""$int_inverted_index":"###));
5248        assert!(!serialized.contains(r###""$vector_index":"###));
5249    }
5250
5251    #[test]
5252    fn test_hnsw_index_config_validation() {
5253        use validator::Validate;
5254
5255        // Valid configuration - should pass
5256        let valid_config = HnswIndexConfig {
5257            batch_size: Some(10),
5258            sync_threshold: Some(100),
5259            ef_construction: Some(100),
5260            max_neighbors: Some(16),
5261            ..Default::default()
5262        };
5263        assert!(valid_config.validate().is_ok());
5264
5265        // Invalid: batch_size too small (min 2)
5266        let invalid_batch_size = HnswIndexConfig {
5267            batch_size: Some(1),
5268            ..Default::default()
5269        };
5270        assert!(invalid_batch_size.validate().is_err());
5271
5272        // Invalid: sync_threshold too small (min 2)
5273        let invalid_sync_threshold = HnswIndexConfig {
5274            sync_threshold: Some(1),
5275            ..Default::default()
5276        };
5277        assert!(invalid_sync_threshold.validate().is_err());
5278
5279        // Valid: boundary values (exactly 2) should pass
5280        let boundary_config = HnswIndexConfig {
5281            batch_size: Some(2),
5282            sync_threshold: Some(2),
5283            ..Default::default()
5284        };
5285        assert!(boundary_config.validate().is_ok());
5286
5287        // Valid: None values should pass validation
5288        let all_none_config = HnswIndexConfig {
5289            ..Default::default()
5290        };
5291        assert!(all_none_config.validate().is_ok());
5292
5293        // Valid: fields without validation can be any value
5294        let other_fields_config = HnswIndexConfig {
5295            ef_construction: Some(1),
5296            max_neighbors: Some(1),
5297            ef_search: Some(1),
5298            num_threads: Some(1),
5299            resize_factor: Some(0.1),
5300            ..Default::default()
5301        };
5302        assert!(other_fields_config.validate().is_ok());
5303    }
5304
5305    #[test]
5306    fn test_spann_index_config_validation() {
5307        use validator::Validate;
5308
5309        // Valid configuration - should pass
5310        let valid_config = SpannIndexConfig {
5311            write_nprobe: Some(32),
5312            nreplica_count: Some(4),
5313            split_threshold: Some(100),
5314            merge_threshold: Some(50),
5315            reassign_neighbor_count: Some(32),
5316            num_centers_to_merge_to: Some(4),
5317            ef_construction: Some(100),
5318            ef_search: Some(100),
5319            max_neighbors: Some(32),
5320            search_rng_factor: Some(1.0),
5321            write_rng_factor: Some(1.0),
5322            search_rng_epsilon: Some(7.5),
5323            write_rng_epsilon: Some(7.5),
5324            ..Default::default()
5325        };
5326        assert!(valid_config.validate().is_ok());
5327
5328        // Invalid: write_nprobe too large (max 64)
5329        let invalid_write_nprobe = SpannIndexConfig {
5330            write_nprobe: Some(200),
5331            ..Default::default()
5332        };
5333        assert!(invalid_write_nprobe.validate().is_err());
5334
5335        // Invalid: split_threshold too small (min 50)
5336        let invalid_split_threshold = SpannIndexConfig {
5337            split_threshold: Some(10),
5338            ..Default::default()
5339        };
5340        assert!(invalid_split_threshold.validate().is_err());
5341
5342        // Invalid: split_threshold too large (max 200)
5343        let invalid_split_threshold_high = SpannIndexConfig {
5344            split_threshold: Some(250),
5345            ..Default::default()
5346        };
5347        assert!(invalid_split_threshold_high.validate().is_err());
5348
5349        // Invalid: nreplica_count too large (max 8)
5350        let invalid_nreplica = SpannIndexConfig {
5351            nreplica_count: Some(10),
5352            ..Default::default()
5353        };
5354        assert!(invalid_nreplica.validate().is_err());
5355
5356        // Invalid: reassign_neighbor_count too large (max 64)
5357        let invalid_reassign = SpannIndexConfig {
5358            reassign_neighbor_count: Some(100),
5359            ..Default::default()
5360        };
5361        assert!(invalid_reassign.validate().is_err());
5362
5363        // Invalid: merge_threshold out of range (min 25, max 100)
5364        let invalid_merge_threshold_low = SpannIndexConfig {
5365            merge_threshold: Some(5),
5366            ..Default::default()
5367        };
5368        assert!(invalid_merge_threshold_low.validate().is_err());
5369
5370        let invalid_merge_threshold_high = SpannIndexConfig {
5371            merge_threshold: Some(150),
5372            ..Default::default()
5373        };
5374        assert!(invalid_merge_threshold_high.validate().is_err());
5375
5376        // Invalid: num_centers_to_merge_to too large (max 8)
5377        let invalid_num_centers = SpannIndexConfig {
5378            num_centers_to_merge_to: Some(10),
5379            ..Default::default()
5380        };
5381        assert!(invalid_num_centers.validate().is_err());
5382
5383        // Invalid: ef_construction too large (max 200)
5384        let invalid_ef_construction = SpannIndexConfig {
5385            ef_construction: Some(300),
5386            ..Default::default()
5387        };
5388        assert!(invalid_ef_construction.validate().is_err());
5389
5390        // Invalid: ef_search too large (max 200)
5391        let invalid_ef_search = SpannIndexConfig {
5392            ef_search: Some(300),
5393            ..Default::default()
5394        };
5395        assert!(invalid_ef_search.validate().is_err());
5396
5397        // Invalid: max_neighbors too large (max 64)
5398        let invalid_max_neighbors = SpannIndexConfig {
5399            max_neighbors: Some(100),
5400            ..Default::default()
5401        };
5402        assert!(invalid_max_neighbors.validate().is_err());
5403
5404        // Invalid: search_nprobe too large (max 128)
5405        let invalid_search_nprobe = SpannIndexConfig {
5406            search_nprobe: Some(200),
5407            ..Default::default()
5408        };
5409        assert!(invalid_search_nprobe.validate().is_err());
5410
5411        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5412        let invalid_search_rng_factor_low = SpannIndexConfig {
5413            search_rng_factor: Some(0.9),
5414            ..Default::default()
5415        };
5416        assert!(invalid_search_rng_factor_low.validate().is_err());
5417
5418        let invalid_search_rng_factor_high = SpannIndexConfig {
5419            search_rng_factor: Some(1.1),
5420            ..Default::default()
5421        };
5422        assert!(invalid_search_rng_factor_high.validate().is_err());
5423
5424        // Valid: search_rng_factor exactly 1.0
5425        let valid_search_rng_factor = SpannIndexConfig {
5426            search_rng_factor: Some(1.0),
5427            ..Default::default()
5428        };
5429        assert!(valid_search_rng_factor.validate().is_ok());
5430
5431        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
5432        let invalid_search_rng_epsilon_low = SpannIndexConfig {
5433            search_rng_epsilon: Some(4.0),
5434            ..Default::default()
5435        };
5436        assert!(invalid_search_rng_epsilon_low.validate().is_err());
5437
5438        let invalid_search_rng_epsilon_high = SpannIndexConfig {
5439            search_rng_epsilon: Some(11.0),
5440            ..Default::default()
5441        };
5442        assert!(invalid_search_rng_epsilon_high.validate().is_err());
5443
5444        // Valid: search_rng_epsilon within range
5445        let valid_search_rng_epsilon = SpannIndexConfig {
5446            search_rng_epsilon: Some(7.5),
5447            ..Default::default()
5448        };
5449        assert!(valid_search_rng_epsilon.validate().is_ok());
5450
5451        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5452        let invalid_write_rng_factor_low = SpannIndexConfig {
5453            write_rng_factor: Some(0.9),
5454            ..Default::default()
5455        };
5456        assert!(invalid_write_rng_factor_low.validate().is_err());
5457
5458        let invalid_write_rng_factor_high = SpannIndexConfig {
5459            write_rng_factor: Some(1.1),
5460            ..Default::default()
5461        };
5462        assert!(invalid_write_rng_factor_high.validate().is_err());
5463
5464        // Valid: write_rng_factor exactly 1.0
5465        let valid_write_rng_factor = SpannIndexConfig {
5466            write_rng_factor: Some(1.0),
5467            ..Default::default()
5468        };
5469        assert!(valid_write_rng_factor.validate().is_ok());
5470
5471        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
5472        let invalid_write_rng_epsilon_low = SpannIndexConfig {
5473            write_rng_epsilon: Some(4.0),
5474            ..Default::default()
5475        };
5476        assert!(invalid_write_rng_epsilon_low.validate().is_err());
5477
5478        let invalid_write_rng_epsilon_high = SpannIndexConfig {
5479            write_rng_epsilon: Some(11.0),
5480            ..Default::default()
5481        };
5482        assert!(invalid_write_rng_epsilon_high.validate().is_err());
5483
5484        // Valid: write_rng_epsilon within range
5485        let valid_write_rng_epsilon = SpannIndexConfig {
5486            write_rng_epsilon: Some(7.5),
5487            ..Default::default()
5488        };
5489        assert!(valid_write_rng_epsilon.validate().is_ok());
5490
5491        // Invalid: num_samples_kmeans too large (max 1000)
5492        let invalid_num_samples_kmeans = SpannIndexConfig {
5493            num_samples_kmeans: Some(1500),
5494            ..Default::default()
5495        };
5496        assert!(invalid_num_samples_kmeans.validate().is_err());
5497
5498        // Valid: num_samples_kmeans within range
5499        let valid_num_samples_kmeans = SpannIndexConfig {
5500            num_samples_kmeans: Some(500),
5501            ..Default::default()
5502        };
5503        assert!(valid_num_samples_kmeans.validate().is_ok());
5504
5505        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
5506        let invalid_initial_lambda_high = SpannIndexConfig {
5507            initial_lambda: Some(150.0),
5508            ..Default::default()
5509        };
5510        assert!(invalid_initial_lambda_high.validate().is_err());
5511
5512        let invalid_initial_lambda_low = SpannIndexConfig {
5513            initial_lambda: Some(50.0),
5514            ..Default::default()
5515        };
5516        assert!(invalid_initial_lambda_low.validate().is_err());
5517
5518        // Valid: initial_lambda exactly 100.0
5519        let valid_initial_lambda = SpannIndexConfig {
5520            initial_lambda: Some(100.0),
5521            ..Default::default()
5522        };
5523        assert!(valid_initial_lambda.validate().is_ok());
5524
5525        // Valid: None values should pass validation
5526        let all_none_config = SpannIndexConfig {
5527            ..Default::default()
5528        };
5529        assert!(all_none_config.validate().is_ok());
5530    }
5531
5532    #[test]
5533    fn test_builder_pattern_crud_workflow() {
5534        // Test comprehensive CRUD workflow using the builder pattern
5535
5536        // CREATE: Build a schema with multiple indexes
5537        let schema = Schema::new_default(KnnIndex::Hnsw)
5538            .create_index(
5539                None,
5540                IndexConfig::Vector(VectorIndexConfig {
5541                    space: Some(Space::Cosine),
5542                    embedding_function: None,
5543                    source_key: None,
5544                    hnsw: Some(HnswIndexConfig {
5545                        ef_construction: Some(200),
5546                        max_neighbors: Some(32),
5547                        ef_search: Some(50),
5548                        num_threads: None,
5549                        batch_size: None,
5550                        sync_threshold: None,
5551                        resize_factor: None,
5552                    }),
5553                    spann: None,
5554                }),
5555            )
5556            .expect("vector config should succeed")
5557            .create_index(
5558                Some("category"),
5559                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5560            )
5561            .expect("string inverted on key should succeed")
5562            .create_index(
5563                Some("year"),
5564                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5565            )
5566            .expect("int inverted on key should succeed")
5567            .create_index(
5568                Some("rating"),
5569                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5570            )
5571            .expect("float inverted on key should succeed")
5572            .create_index(
5573                Some("is_active"),
5574                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5575            )
5576            .expect("bool inverted on key should succeed");
5577
5578        // READ: Verify the schema was built correctly
5579        // Check vector config
5580        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5581        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5582        assert!(embedding.float_list.is_some());
5583        let vector_index = embedding
5584            .float_list
5585            .as_ref()
5586            .unwrap()
5587            .vector_index
5588            .as_ref()
5589            .unwrap();
5590        assert!(vector_index.enabled);
5591        assert_eq!(vector_index.config.space, Some(Space::Cosine));
5592        assert_eq!(
5593            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5594            Some(200)
5595        );
5596
5597        // Check per-key indexes
5598        assert!(schema.keys.contains_key("category"));
5599        assert!(schema.keys.contains_key("year"));
5600        assert!(schema.keys.contains_key("rating"));
5601        assert!(schema.keys.contains_key("is_active"));
5602
5603        // Verify category string inverted index
5604        let category = schema.keys.get("category").unwrap();
5605        assert!(category.string.is_some());
5606        let string_idx = category
5607            .string
5608            .as_ref()
5609            .unwrap()
5610            .string_inverted_index
5611            .as_ref()
5612            .unwrap();
5613        assert!(string_idx.enabled);
5614
5615        // Verify year int inverted index
5616        let year = schema.keys.get("year").unwrap();
5617        assert!(year.int.is_some());
5618        let int_idx = year
5619            .int
5620            .as_ref()
5621            .unwrap()
5622            .int_inverted_index
5623            .as_ref()
5624            .unwrap();
5625        assert!(int_idx.enabled);
5626
5627        // UPDATE/DELETE: Disable some indexes
5628        let schema = schema
5629            .delete_index(
5630                Some("category"),
5631                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5632            )
5633            .expect("delete string inverted should succeed")
5634            .delete_index(
5635                Some("year"),
5636                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5637            )
5638            .expect("delete int inverted should succeed");
5639
5640        // VERIFY DELETE: Check that indexes were disabled
5641        let category = schema.keys.get("category").unwrap();
5642        let string_idx = category
5643            .string
5644            .as_ref()
5645            .unwrap()
5646            .string_inverted_index
5647            .as_ref()
5648            .unwrap();
5649        assert!(!string_idx.enabled); // Should be disabled now
5650
5651        let year = schema.keys.get("year").unwrap();
5652        let int_idx = year
5653            .int
5654            .as_ref()
5655            .unwrap()
5656            .int_inverted_index
5657            .as_ref()
5658            .unwrap();
5659        assert!(!int_idx.enabled); // Should be disabled now
5660
5661        // Verify other indexes still enabled
5662        let rating = schema.keys.get("rating").unwrap();
5663        let float_idx = rating
5664            .float
5665            .as_ref()
5666            .unwrap()
5667            .float_inverted_index
5668            .as_ref()
5669            .unwrap();
5670        assert!(float_idx.enabled); // Should still be enabled
5671
5672        let is_active = schema.keys.get("is_active").unwrap();
5673        let bool_idx = is_active
5674            .boolean
5675            .as_ref()
5676            .unwrap()
5677            .bool_inverted_index
5678            .as_ref()
5679            .unwrap();
5680        assert!(bool_idx.enabled); // Should still be enabled
5681    }
5682
5683    #[test]
5684    fn test_builder_create_index_validation_errors() {
5685        // Test all validation errors for create_index() as documented in the docstring:
5686        // - Attempting to create index on special keys (#document, #embedding)
5687        // - Invalid configuration (e.g., vector index on non-embedding key)
5688        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
5689
5690        // Error: Vector index on specific key (must be global)
5691        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5692            Some("my_vectors"),
5693            IndexConfig::Vector(VectorIndexConfig {
5694                space: Some(Space::L2),
5695                embedding_function: None,
5696                source_key: None,
5697                hnsw: None,
5698                spann: None,
5699            }),
5700        );
5701        assert!(result.is_err());
5702        assert!(matches!(
5703            result.unwrap_err(),
5704            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5705        ));
5706
5707        // Error: FTS index on specific key (must be global)
5708        let result = Schema::new_default(KnnIndex::Hnsw)
5709            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5710        assert!(result.is_err());
5711        assert!(matches!(
5712            result.unwrap_err(),
5713            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5714        ));
5715
5716        // Error: Cannot create index on special key #document
5717        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5718            Some(DOCUMENT_KEY),
5719            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5720        );
5721        assert!(result.is_err());
5722        assert!(matches!(
5723            result.unwrap_err(),
5724            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5725        ));
5726
5727        // Error: Cannot create index on special key #embedding
5728        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5729            Some(EMBEDDING_KEY),
5730            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5731        );
5732        assert!(result.is_err());
5733        assert!(matches!(
5734            result.unwrap_err(),
5735            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5736        ));
5737
5738        // Error: Sparse vector without key (must specify key)
5739        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5740            None,
5741            IndexConfig::SparseVector(SparseVectorIndexConfig {
5742                embedding_function: None,
5743                source_key: None,
5744                bm25: None,
5745            }),
5746        );
5747        assert!(result.is_err());
5748        assert!(matches!(
5749            result.unwrap_err(),
5750            SchemaBuilderError::SparseVectorRequiresKey
5751        ));
5752
5753        // Error: Multiple sparse vector indexes (only one allowed per collection)
5754        let result = Schema::new_default(KnnIndex::Hnsw)
5755            .create_index(
5756                Some("sparse1"),
5757                IndexConfig::SparseVector(SparseVectorIndexConfig {
5758                    embedding_function: None,
5759                    source_key: None,
5760                    bm25: None,
5761                }),
5762            )
5763            .expect("first sparse should succeed")
5764            .create_index(
5765                Some("sparse2"),
5766                IndexConfig::SparseVector(SparseVectorIndexConfig {
5767                    embedding_function: None,
5768                    source_key: None,
5769                    bm25: None,
5770                }),
5771            );
5772        assert!(result.is_err());
5773        assert!(matches!(
5774            result.unwrap_err(),
5775            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5776        ));
5777    }
5778
5779    #[test]
5780    fn test_builder_delete_index_validation_errors() {
5781        // Test all validation errors for delete_index() as documented in the docstring:
5782        // - Attempting to delete index on special keys (#document, #embedding)
5783        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
5784
5785        // Error: Delete on special key #embedding
5786        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5787            Some(EMBEDDING_KEY),
5788            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5789        );
5790        assert!(result.is_err());
5791        assert!(matches!(
5792            result.unwrap_err(),
5793            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5794        ));
5795
5796        // Error: Delete on special key #document
5797        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5798            Some(DOCUMENT_KEY),
5799            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5800        );
5801        assert!(result.is_err());
5802        assert!(matches!(
5803            result.unwrap_err(),
5804            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5805        ));
5806
5807        // Error: Delete vector index (not currently supported)
5808        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5809            None,
5810            IndexConfig::Vector(VectorIndexConfig {
5811                space: None,
5812                embedding_function: None,
5813                source_key: None,
5814                hnsw: None,
5815                spann: None,
5816            }),
5817        );
5818        assert!(result.is_err());
5819        assert!(matches!(
5820            result.unwrap_err(),
5821            SchemaBuilderError::VectorIndexDeletionNotSupported
5822        ));
5823
5824        // Error: Delete FTS index (not currently supported)
5825        let result = Schema::new_default(KnnIndex::Hnsw)
5826            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5827        assert!(result.is_err());
5828        assert!(matches!(
5829            result.unwrap_err(),
5830            SchemaBuilderError::FtsIndexDeletionNotSupported
5831        ));
5832
5833        // Error: Delete sparse vector index (not currently supported)
5834        let result = Schema::new_default(KnnIndex::Hnsw)
5835            .create_index(
5836                Some("sparse"),
5837                IndexConfig::SparseVector(SparseVectorIndexConfig {
5838                    embedding_function: None,
5839                    source_key: None,
5840                    bm25: None,
5841                }),
5842            )
5843            .expect("create should succeed")
5844            .delete_index(
5845                Some("sparse"),
5846                IndexConfig::SparseVector(SparseVectorIndexConfig {
5847                    embedding_function: None,
5848                    source_key: None,
5849                    bm25: None,
5850                }),
5851            );
5852        assert!(result.is_err());
5853        assert!(matches!(
5854            result.unwrap_err(),
5855            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5856        ));
5857    }
5858
5859    #[test]
5860    fn test_builder_pattern_chaining() {
5861        // Test complex chaining scenario
5862        let schema = Schema::new_default(KnnIndex::Hnsw)
5863            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5864            .unwrap()
5865            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5866            .unwrap()
5867            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5868            .unwrap()
5869            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5870            .unwrap()
5871            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5872            .unwrap()
5873            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5874            .unwrap();
5875
5876        // Verify tag1 is enabled
5877        assert!(
5878            schema
5879                .keys
5880                .get("tag1")
5881                .unwrap()
5882                .string
5883                .as_ref()
5884                .unwrap()
5885                .string_inverted_index
5886                .as_ref()
5887                .unwrap()
5888                .enabled
5889        );
5890
5891        // Verify tag2 is disabled
5892        assert!(
5893            !schema
5894                .keys
5895                .get("tag2")
5896                .unwrap()
5897                .string
5898                .as_ref()
5899                .unwrap()
5900                .string_inverted_index
5901                .as_ref()
5902                .unwrap()
5903                .enabled
5904        );
5905
5906        // Verify tag3 is enabled
5907        assert!(
5908            schema
5909                .keys
5910                .get("tag3")
5911                .unwrap()
5912                .string
5913                .as_ref()
5914                .unwrap()
5915                .string_inverted_index
5916                .as_ref()
5917                .unwrap()
5918                .enabled
5919        );
5920
5921        // Verify count is enabled
5922        assert!(
5923            schema
5924                .keys
5925                .get("count")
5926                .unwrap()
5927                .int
5928                .as_ref()
5929                .unwrap()
5930                .int_inverted_index
5931                .as_ref()
5932                .unwrap()
5933                .enabled
5934        );
5935
5936        // Verify score is enabled
5937        assert!(
5938            schema
5939                .keys
5940                .get("score")
5941                .unwrap()
5942                .float
5943                .as_ref()
5944                .unwrap()
5945                .float_inverted_index
5946                .as_ref()
5947                .unwrap()
5948                .enabled
5949        );
5950    }
5951
5952    #[test]
5953    fn test_schema_default_matches_python() {
5954        // Test that Schema::default() matches Python's Schema() behavior exactly
5955        let schema = Schema::default();
5956
5957        // ============================================================================
5958        // VERIFY DEFAULTS (match Python's _initialize_defaults)
5959        // ============================================================================
5960
5961        // String defaults: FTS disabled, string inverted enabled
5962        assert!(schema.defaults.string.is_some());
5963        let string = schema.defaults.string.as_ref().unwrap();
5964        assert!(!string.fts_index.as_ref().unwrap().enabled);
5965        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5966
5967        // Float list defaults: vector index disabled
5968        assert!(schema.defaults.float_list.is_some());
5969        let float_list = schema.defaults.float_list.as_ref().unwrap();
5970        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5971        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5972        assert_eq!(vector_config.space, None); // Python leaves as None
5973        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
5974        assert_eq!(vector_config.spann, None); // Python doesn't specify
5975        assert_eq!(vector_config.source_key, None);
5976
5977        // Sparse vector defaults: disabled
5978        assert!(schema.defaults.sparse_vector.is_some());
5979        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5980        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5981
5982        // Int defaults: inverted index enabled
5983        assert!(schema.defaults.int.is_some());
5984        assert!(
5985            schema
5986                .defaults
5987                .int
5988                .as_ref()
5989                .unwrap()
5990                .int_inverted_index
5991                .as_ref()
5992                .unwrap()
5993                .enabled
5994        );
5995
5996        // Float defaults: inverted index enabled
5997        assert!(schema.defaults.float.is_some());
5998        assert!(
5999            schema
6000                .defaults
6001                .float
6002                .as_ref()
6003                .unwrap()
6004                .float_inverted_index
6005                .as_ref()
6006                .unwrap()
6007                .enabled
6008        );
6009
6010        // Bool defaults: inverted index enabled
6011        assert!(schema.defaults.boolean.is_some());
6012        assert!(
6013            schema
6014                .defaults
6015                .boolean
6016                .as_ref()
6017                .unwrap()
6018                .bool_inverted_index
6019                .as_ref()
6020                .unwrap()
6021                .enabled
6022        );
6023
6024        // ============================================================================
6025        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
6026        // ============================================================================
6027
6028        // #document: FTS enabled, string inverted disabled
6029        assert!(schema.keys.contains_key(DOCUMENT_KEY));
6030        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
6031        assert!(doc.string.is_some());
6032        assert!(
6033            doc.string
6034                .as_ref()
6035                .unwrap()
6036                .fts_index
6037                .as_ref()
6038                .unwrap()
6039                .enabled
6040        );
6041        assert!(
6042            !doc.string
6043                .as_ref()
6044                .unwrap()
6045                .string_inverted_index
6046                .as_ref()
6047                .unwrap()
6048                .enabled
6049        );
6050
6051        // #embedding: vector index enabled with source_key=#document
6052        assert!(schema.keys.contains_key(EMBEDDING_KEY));
6053        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
6054        assert!(embedding.float_list.is_some());
6055        let vec_idx = embedding
6056            .float_list
6057            .as_ref()
6058            .unwrap()
6059            .vector_index
6060            .as_ref()
6061            .unwrap();
6062        assert!(vec_idx.enabled);
6063        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
6064        assert_eq!(vec_idx.config.space, None); // Python leaves as None
6065        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
6066        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
6067
6068        // Verify only these two special keys exist
6069        assert_eq!(schema.keys.len(), 2);
6070    }
6071
6072    #[test]
6073    fn test_schema_default_works_with_builder() {
6074        // Test that Schema::default() can be used with builder pattern
6075        let schema = Schema::default()
6076            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
6077            .expect("should succeed");
6078
6079        // Verify the new index was added
6080        assert!(schema.keys.contains_key("category"));
6081        assert!(schema.keys.contains_key(DOCUMENT_KEY));
6082        assert!(schema.keys.contains_key(EMBEDDING_KEY));
6083        assert_eq!(schema.keys.len(), 3);
6084    }
6085
6086    #[cfg(feature = "testing")]
6087    mod proptests {
6088        use super::*;
6089        use crate::strategies::{
6090            embedding_function_strategy, internal_collection_configuration_strategy,
6091            internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
6092            knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
6093        };
6094        use crate::{
6095            HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
6096        };
6097        use proptest::prelude::*;
6098        use proptest::strategy::BoxedStrategy;
6099        use proptest::string::string_regex;
6100        use serde_json::json;
6101
6102        fn default_embedding_function_strategy(
6103        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6104            proptest::option::of(prop_oneof![
6105                Just(EmbeddingFunctionConfiguration::Unknown),
6106                Just(EmbeddingFunctionConfiguration::Known(
6107                    EmbeddingFunctionNewConfiguration {
6108                        name: "default".to_string(),
6109                        config: json!({ "alpha": 1 }),
6110                    }
6111                )),
6112            ])
6113        }
6114
6115        fn sparse_embedding_function_strategy(
6116        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6117            let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
6118                EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
6119                    name,
6120                    config: json!({ "alpha": 1 }),
6121                })
6122            });
6123
6124            proptest::option::of(prop_oneof![
6125                Just(EmbeddingFunctionConfiguration::Unknown),
6126                known_strategy,
6127            ])
6128        }
6129
6130        fn non_default_internal_collection_configuration_strategy(
6131        ) -> impl Strategy<Value = InternalCollectionConfiguration> {
6132            internal_collection_configuration_strategy()
6133                .prop_filter("non-default configuration", |config| !config.is_default())
6134        }
6135
6136        fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6137            (
6138                proptest::option::of(1usize..=512),
6139                proptest::option::of(1usize..=128),
6140                proptest::option::of(1usize..=512),
6141                proptest::option::of(1usize..=64),
6142                proptest::option::of(2usize..=4096),
6143                proptest::option::of(2usize..=4096),
6144                proptest::option::of(prop_oneof![
6145                    Just(0.5f64),
6146                    Just(1.0f64),
6147                    Just(1.5f64),
6148                    Just(2.0f64)
6149                ]),
6150            )
6151                .prop_map(
6152                    |(
6153                        ef_construction,
6154                        max_neighbors,
6155                        ef_search,
6156                        num_threads,
6157                        batch_size,
6158                        sync_threshold,
6159                        resize_factor,
6160                    )| HnswIndexConfig {
6161                        ef_construction,
6162                        max_neighbors,
6163                        ef_search,
6164                        num_threads,
6165                        batch_size,
6166                        sync_threshold,
6167                        resize_factor,
6168                    },
6169                )
6170        }
6171
6172        fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6173            let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
6174            (
6175                (
6176                    proptest::option::of(1u32..=128),               // search_nprobe
6177                    proptest::option::of(Just(1.0f32)), // search_rng_factor (must be 1.0)
6178                    proptest::option::of(epsilon_strategy.clone()), // search_rng_epsilon
6179                    proptest::option::of(1u32..=8),     // nreplica_count
6180                    proptest::option::of(Just(1.0f32)), // write_rng_factor (must be 1.0)
6181                    proptest::option::of(epsilon_strategy), // write_rng_epsilon
6182                    proptest::option::of(50u32..=200),  // split_threshold
6183                    proptest::option::of(1usize..=1000), // num_samples_kmeans
6184                ),
6185                (
6186                    proptest::option::of(Just(100.0f32)), // initial_lambda (must be 100.0)
6187                    proptest::option::of(1u32..=64),      // reassign_neighbor_count
6188                    proptest::option::of(25u32..=100),    // merge_threshold
6189                    proptest::option::of(1u32..=8),       // num_centers_to_merge_to
6190                    proptest::option::of(1u32..=64),      // write_nprobe
6191                    proptest::option::of(1usize..=200),   // ef_construction
6192                    proptest::option::of(1usize..=200),   // ef_search
6193                    proptest::option::of(1usize..=64),    // max_neighbors
6194                ),
6195            )
6196                .prop_map(
6197                    |(
6198                        (
6199                            search_nprobe,
6200                            search_rng_factor,
6201                            search_rng_epsilon,
6202                            nreplica_count,
6203                            write_rng_factor,
6204                            write_rng_epsilon,
6205                            split_threshold,
6206                            num_samples_kmeans,
6207                        ),
6208                        (
6209                            initial_lambda,
6210                            reassign_neighbor_count,
6211                            merge_threshold,
6212                            num_centers_to_merge_to,
6213                            write_nprobe,
6214                            ef_construction,
6215                            ef_search,
6216                            max_neighbors,
6217                        ),
6218                    )| SpannIndexConfig {
6219                        search_nprobe,
6220                        search_rng_factor,
6221                        search_rng_epsilon,
6222                        nreplica_count,
6223                        write_rng_factor,
6224                        write_rng_epsilon,
6225                        split_threshold,
6226                        num_samples_kmeans,
6227                        initial_lambda,
6228                        reassign_neighbor_count,
6229                        merge_threshold,
6230                        num_centers_to_merge_to,
6231                        write_nprobe,
6232                        ef_construction,
6233                        ef_search,
6234                        max_neighbors,
6235                        center_drift_threshold: None,
6236                        quantize: false,
6237                    },
6238                )
6239        }
6240
6241        proptest! {
6242            #[test]
6243            fn merge_hnsw_configs_preserves_user_overrides(
6244                base in partial_hnsw_index_config_strategy(),
6245                user in partial_hnsw_index_config_strategy(),
6246            ) {
6247                let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
6248                    .expect("merge should return Some when both are Some");
6249
6250                // Property: user values always take precedence when Some
6251                if user.ef_construction.is_some() {
6252                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
6253                }
6254                if user.max_neighbors.is_some() {
6255                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6256                }
6257                if user.ef_search.is_some() {
6258                    prop_assert_eq!(merged.ef_search, user.ef_search);
6259                }
6260                if user.num_threads.is_some() {
6261                    prop_assert_eq!(merged.num_threads, user.num_threads);
6262                }
6263                if user.batch_size.is_some() {
6264                    prop_assert_eq!(merged.batch_size, user.batch_size);
6265                }
6266                if user.sync_threshold.is_some() {
6267                    prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
6268                }
6269                if user.resize_factor.is_some() {
6270                    prop_assert_eq!(merged.resize_factor, user.resize_factor);
6271                }
6272            }
6273
6274            #[test]
6275            fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
6276                base in partial_hnsw_index_config_strategy(),
6277            ) {
6278                let merged = Schema::merge_hnsw_configs(Some(&base), None)
6279                    .expect("merge should return Some when base is Some");
6280
6281                // Property: when user is None, base values are preserved
6282                prop_assert_eq!(merged, base);
6283            }
6284
6285            #[test]
6286            fn merge_hnsw_configs_returns_user_when_base_is_none(
6287                user in partial_hnsw_index_config_strategy(),
6288            ) {
6289                let merged = Schema::merge_hnsw_configs(None, Some(&user))
6290                    .expect("merge should return Some when user is Some");
6291
6292                // Property: when base is None, user values are preserved
6293                prop_assert_eq!(merged, user);
6294            }
6295
6296            #[test]
6297            fn merge_spann_configs_preserves_user_overrides(
6298                base in partial_spann_index_config_strategy(),
6299                user in partial_spann_index_config_strategy(),
6300            ) {
6301                let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
6302                    .expect("merge should return Ok")
6303                    .expect("merge should return Some when both are Some");
6304
6305                // Property: user values always take precedence when Some
6306                if user.search_nprobe.is_some() {
6307                    prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
6308                }
6309                if user.search_rng_epsilon.is_some() {
6310                    prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
6311                }
6312                if user.split_threshold.is_some() {
6313                    prop_assert_eq!(merged.split_threshold, user.split_threshold);
6314                }
6315                if user.ef_construction.is_some() {
6316                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
6317                }
6318                if user.ef_search.is_some() {
6319                    prop_assert_eq!(merged.ef_search, user.ef_search);
6320                }
6321                if user.max_neighbors.is_some() {
6322                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6323                }
6324            }
6325
6326            #[test]
6327            fn merge_spann_configs_falls_back_to_base_when_user_is_none(
6328                base in partial_spann_index_config_strategy(),
6329            ) {
6330                let merged = Schema::merge_spann_configs(Some(&base), None)
6331                    .expect("merge should return Ok")
6332                    .expect("merge should return Some when base is Some");
6333
6334                // Property: when user is None, base values are preserved
6335                prop_assert_eq!(merged, base);
6336            }
6337
6338            #[test]
6339            fn merge_vector_index_config_preserves_user_overrides(
6340                base in vector_index_config_strategy(),
6341                user in vector_index_config_strategy(),
6342                knn in knn_index_strategy(),
6343            ) {
6344                let merged = Schema::merge_vector_index_config(&base, &user, knn)
6345                    .expect("merge should succeed");
6346
6347                // Property: user values take precedence for top-level fields
6348                if user.space.is_some() {
6349                    prop_assert_eq!(merged.space, user.space);
6350                }
6351                if user.embedding_function.is_some() {
6352                    prop_assert_eq!(merged.embedding_function, user.embedding_function);
6353                }
6354                if user.source_key.is_some() {
6355                    prop_assert_eq!(merged.source_key, user.source_key);
6356                }
6357
6358                // Property: nested configs are merged according to merge rules
6359                match knn {
6360                    KnnIndex::Hnsw => {
6361                        if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6362                            let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6363                            if user_hnsw.ef_construction.is_some() {
6364                                prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6365                            }
6366                        }
6367                    }
6368                    KnnIndex::Spann => {
6369                        if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6370                            let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6371                            if user_spann.search_nprobe.is_some() {
6372                                prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6373                            }
6374                        }
6375                    }
6376                }
6377            }
6378        }
6379
6380        fn expected_vector_index_config(
6381            config: &InternalCollectionConfiguration,
6382        ) -> VectorIndexConfig {
6383            match &config.vector_index {
6384                VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6385                    space: Some(hnsw_config.space.clone()),
6386                    embedding_function: config.embedding_function.clone(),
6387                    source_key: None,
6388                    hnsw: Some(HnswIndexConfig {
6389                        ef_construction: Some(hnsw_config.ef_construction),
6390                        max_neighbors: Some(hnsw_config.max_neighbors),
6391                        ef_search: Some(hnsw_config.ef_search),
6392                        num_threads: Some(hnsw_config.num_threads),
6393                        batch_size: Some(hnsw_config.batch_size),
6394                        sync_threshold: Some(hnsw_config.sync_threshold),
6395                        resize_factor: Some(hnsw_config.resize_factor),
6396                    }),
6397                    spann: None,
6398                },
6399                VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6400                    space: Some(spann_config.space.clone()),
6401                    embedding_function: config.embedding_function.clone(),
6402                    source_key: None,
6403                    hnsw: None,
6404                    spann: Some(SpannIndexConfig {
6405                        search_nprobe: Some(spann_config.search_nprobe),
6406                        search_rng_factor: Some(spann_config.search_rng_factor),
6407                        search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6408                        nreplica_count: Some(spann_config.nreplica_count),
6409                        write_rng_factor: Some(spann_config.write_rng_factor),
6410                        write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6411                        split_threshold: Some(spann_config.split_threshold),
6412                        num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6413                        initial_lambda: Some(spann_config.initial_lambda),
6414                        reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6415                        merge_threshold: Some(spann_config.merge_threshold),
6416                        num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6417                        write_nprobe: Some(spann_config.write_nprobe),
6418                        ef_construction: Some(spann_config.ef_construction),
6419                        ef_search: Some(spann_config.ef_search),
6420                        max_neighbors: Some(spann_config.max_neighbors),
6421                        center_drift_threshold: None,
6422                        quantize: false,
6423                    }),
6424                },
6425            }
6426        }
6427
6428        fn non_special_key_strategy() -> BoxedStrategy<String> {
6429            string_regex(TEST_NAME_PATTERN)
6430                .unwrap()
6431                .prop_filter("exclude special keys", |key| {
6432                    key != DOCUMENT_KEY && key != EMBEDDING_KEY
6433                })
6434                .boxed()
6435        }
6436
6437        fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6438            proptest::option::of(prop_oneof![
6439                Just(DOCUMENT_KEY.to_string()),
6440                string_regex(TEST_NAME_PATTERN).unwrap(),
6441            ])
6442            .boxed()
6443        }
6444
6445        fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6446            any::<bool>().prop_map(|enabled| FtsIndexType {
6447                enabled,
6448                config: FtsIndexConfig {},
6449            })
6450        }
6451
6452        fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6453            any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6454                enabled,
6455                config: StringInvertedIndexConfig {},
6456            })
6457        }
6458
6459        fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6460            proptest::option::of(
6461                (
6462                    proptest::option::of(string_inverted_index_type_strategy()),
6463                    proptest::option::of(fts_index_type_strategy()),
6464                )
6465                    .prop_map(|(string_inverted_index, fts_index)| {
6466                        StringValueType {
6467                            string_inverted_index,
6468                            fts_index,
6469                        }
6470                    }),
6471            )
6472            .boxed()
6473        }
6474
6475        fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6476            any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6477                enabled,
6478                config: FloatInvertedIndexConfig {},
6479            })
6480        }
6481
6482        fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6483            proptest::option::of(
6484                proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6485                    |float_inverted_index| FloatValueType {
6486                        float_inverted_index,
6487                    },
6488                ),
6489            )
6490            .boxed()
6491        }
6492
6493        fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6494            any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6495                enabled,
6496                config: IntInvertedIndexConfig {},
6497            })
6498        }
6499
6500        fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6501            proptest::option::of(
6502                proptest::option::of(int_inverted_index_type_strategy())
6503                    .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6504            )
6505            .boxed()
6506        }
6507
6508        fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6509            any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6510                enabled,
6511                config: BoolInvertedIndexConfig {},
6512            })
6513        }
6514
6515        fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6516            proptest::option::of(
6517                proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6518                    |bool_inverted_index| BoolValueType {
6519                        bool_inverted_index,
6520                    },
6521                ),
6522            )
6523            .boxed()
6524        }
6525
6526        fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6527            (
6528                sparse_embedding_function_strategy(),
6529                source_key_strategy(),
6530                proptest::option::of(any::<bool>()),
6531            )
6532                .prop_map(|(embedding_function, source_key, bm25)| {
6533                    SparseVectorIndexConfig {
6534                        embedding_function,
6535                        source_key,
6536                        bm25,
6537                    }
6538                })
6539        }
6540
6541        fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6542            proptest::option::of(
6543                (
6544                    any::<bool>(),
6545                    proptest::option::of(sparse_vector_index_config_strategy()),
6546                )
6547                    .prop_map(|(enabled, config)| SparseVectorValueType {
6548                        sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6549                            enabled,
6550                            config: cfg,
6551                        }),
6552                    }),
6553            )
6554            .boxed()
6555        }
6556
6557        fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6558            internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6559                ef_construction: Some(config.ef_construction),
6560                max_neighbors: Some(config.max_neighbors),
6561                ef_search: Some(config.ef_search),
6562                num_threads: Some(config.num_threads),
6563                batch_size: Some(config.batch_size),
6564                sync_threshold: Some(config.sync_threshold),
6565                resize_factor: Some(config.resize_factor),
6566            })
6567        }
6568
6569        fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6570            internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6571                search_nprobe: Some(config.search_nprobe),
6572                search_rng_factor: Some(config.search_rng_factor),
6573                search_rng_epsilon: Some(config.search_rng_epsilon),
6574                nreplica_count: Some(config.nreplica_count),
6575                write_rng_factor: Some(config.write_rng_factor),
6576                write_rng_epsilon: Some(config.write_rng_epsilon),
6577                split_threshold: Some(config.split_threshold),
6578                num_samples_kmeans: Some(config.num_samples_kmeans),
6579                initial_lambda: Some(config.initial_lambda),
6580                reassign_neighbor_count: Some(config.reassign_neighbor_count),
6581                merge_threshold: Some(config.merge_threshold),
6582                num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6583                write_nprobe: Some(config.write_nprobe),
6584                ef_construction: Some(config.ef_construction),
6585                ef_search: Some(config.ef_search),
6586                max_neighbors: Some(config.max_neighbors),
6587                center_drift_threshold: None,
6588                quantize: false,
6589            })
6590        }
6591
6592        fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6593            (
6594                proptest::option::of(space_strategy()),
6595                embedding_function_strategy(),
6596                source_key_strategy(),
6597                proptest::option::of(hnsw_index_config_strategy()),
6598                proptest::option::of(spann_index_config_strategy()),
6599            )
6600                .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6601                    VectorIndexConfig {
6602                        space,
6603                        embedding_function,
6604                        source_key,
6605                        hnsw,
6606                        spann,
6607                    }
6608                })
6609        }
6610
6611        fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6612            (any::<bool>(), vector_index_config_strategy())
6613                .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6614        }
6615
6616        fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6617            proptest::option::of(
6618                proptest::option::of(vector_index_type_strategy())
6619                    .prop_map(|vector_index| FloatListValueType { vector_index }),
6620            )
6621            .boxed()
6622        }
6623
6624        fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6625            (
6626                string_value_type_strategy(),
6627                float_list_value_type_strategy(),
6628                sparse_vector_value_type_strategy(),
6629                int_value_type_strategy(),
6630                float_value_type_strategy(),
6631                bool_value_type_strategy(),
6632            )
6633                .prop_map(
6634                    |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6635                        string,
6636                        float_list,
6637                        sparse_vector,
6638                        int,
6639                        float,
6640                        boolean,
6641                    },
6642                )
6643                .boxed()
6644        }
6645
6646        fn schema_strategy() -> BoxedStrategy<Schema> {
6647            (
6648                value_types_strategy(),
6649                proptest::collection::hash_map(
6650                    non_special_key_strategy(),
6651                    value_types_strategy(),
6652                    0..=3,
6653                ),
6654                proptest::option::of(value_types_strategy()),
6655                proptest::option::of(value_types_strategy()),
6656            )
6657                .prop_map(
6658                    |(defaults, mut extra_keys, document_override, embedding_override)| {
6659                        if let Some(doc) = document_override {
6660                            extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6661                        }
6662                        if let Some(embed) = embedding_override {
6663                            extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6664                        }
6665                        Schema {
6666                            defaults,
6667                            keys: extra_keys,
6668                            cmek: None,
6669                            source_attached_function_id: None,
6670                        }
6671                    },
6672                )
6673                .boxed()
6674        }
6675
6676        fn force_non_default_schema(mut schema: Schema) -> Schema {
6677            if schema.is_default() {
6678                if let Some(string_value) = schema
6679                    .defaults
6680                    .string
6681                    .as_mut()
6682                    .and_then(|string_value| string_value.string_inverted_index.as_mut())
6683                {
6684                    string_value.enabled = !string_value.enabled;
6685                } else {
6686                    schema.defaults.string = Some(StringValueType {
6687                        string_inverted_index: Some(StringInvertedIndexType {
6688                            enabled: false,
6689                            config: StringInvertedIndexConfig {},
6690                        }),
6691                        fts_index: None,
6692                    });
6693                }
6694            }
6695            schema
6696        }
6697
6698        fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6699            schema_strategy().prop_map(force_non_default_schema).boxed()
6700        }
6701
6702        fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6703            let defaults = schema
6704                .defaults
6705                .float_list
6706                .as_ref()
6707                .and_then(|fl| fl.vector_index.as_ref())
6708                .map(|vi| vi.config.clone())
6709                .expect("defaults vector index missing");
6710
6711            let embedding = schema
6712                .keys
6713                .get(EMBEDDING_KEY)
6714                .and_then(|value_types| value_types.float_list.as_ref())
6715                .and_then(|fl| fl.vector_index.as_ref())
6716                .map(|vi| vi.config.clone())
6717                .expect("#embedding vector index missing");
6718
6719            (defaults, embedding)
6720        }
6721
6722        proptest! {
6723            #[test]
6724            fn reconcile_schema_and_config_matches_convert_for_config_only(
6725                config in internal_collection_configuration_strategy(),
6726                knn in knn_index_strategy(),
6727            ) {
6728                let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6729                    .expect("reconciliation should succeed");
6730
6731                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6732                let expected_config = expected_vector_index_config(&config);
6733
6734                prop_assert_eq!(defaults_vi, expected_config.clone());
6735
6736                let mut expected_embedding_config = expected_config;
6737                expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6738                prop_assert_eq!(embedding_vi, expected_embedding_config);
6739
6740                prop_assert_eq!(result.keys.len(), 2);
6741            }
6742        }
6743
6744        proptest! {
6745            #[test]
6746            fn reconcile_schema_and_config_errors_when_both_non_default(
6747                config in non_default_internal_collection_configuration_strategy(),
6748                knn in knn_index_strategy(),
6749            ) {
6750                let schema = Schema::try_from(&config)
6751                    .expect("conversion should succeed");
6752                prop_assume!(!schema.is_default());
6753
6754                let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6755
6756                prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6757            }
6758        }
6759
6760        proptest! {
6761            #[test]
6762            fn reconcile_schema_and_config_matches_schema_only_path(
6763                schema in schema_strategy(),
6764                knn in knn_index_strategy(),
6765            ) {
6766                let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6767                    .expect("reconciliation should succeed");
6768
6769                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6770
6771                // Property: schema defaults.float_list vector_index config should be merged into defaults
6772                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6773                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6774                        // Property: schema values take precedence over defaults
6775                        if let Some(schema_space) = &schema_vi.config.space {
6776                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6777                        }
6778                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6779                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6780                        }
6781                        // Test nested config merging properties
6782                        match knn {
6783                            KnnIndex::Hnsw => {
6784                                if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6785                                    if let Some(merged_hnsw) = &defaults_vi.hnsw {
6786                                        if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6787                                            prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6788                                        }
6789                                    }
6790                                }
6791                            }
6792                            KnnIndex::Spann => {
6793                                if let Some(schema_spann) = &schema_vi.config.spann {
6794                                    if let Some(merged_spann) = &defaults_vi.spann {
6795                                        if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6796                                            prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6797                                        }
6798                                    }
6799                                }
6800                            }
6801                        }
6802                    }
6803                }
6804
6805                // Property: schema #embedding float_list vector_index config should be merged into embedding
6806                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6807                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6808                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6809                            if let Some(schema_space) = &embedding_vi_type.config.space {
6810                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6811                            }
6812                        }
6813                    }
6814                }
6815            }
6816        }
6817
6818        proptest! {
6819            #[test]
6820            fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6821                embedding_function in default_embedding_function_strategy(),
6822                knn in knn_index_strategy(),
6823            ) {
6824                let schema = Schema::new_default(knn);
6825                let mut config = match knn {
6826                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6827                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6828                };
6829                config.embedding_function = embedding_function.clone();
6830
6831                let result = Schema::reconcile_schema_and_config(
6832                    Some(&schema),
6833                    Some(&config),
6834                    knn,
6835                )
6836                .expect("reconciliation should succeed");
6837
6838                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6839
6840                // Property: embedding function from config should be applied to both defaults and embedding
6841                if let Some(ef) = embedding_function {
6842                    prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6843                    prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6844                } else {
6845                    // Property: when embedding function is None, it should remain None
6846                    prop_assert_eq!(defaults_vi.embedding_function, None);
6847                    prop_assert_eq!(embedding_vi.embedding_function, None);
6848                }
6849            }
6850        }
6851
6852        proptest! {
6853            #[test]
6854            fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6855                schema in non_default_schema_strategy(),
6856                knn in knn_index_strategy(),
6857            ) {
6858                let default_config = match knn {
6859                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6860                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6861                };
6862
6863                let result = Schema::reconcile_schema_and_config(
6864                    Some(&schema),
6865                    Some(&default_config),
6866                    knn,
6867                )
6868                .expect("reconciliation should succeed");
6869
6870                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6871
6872                // Property: when config is default, schema values should be preserved
6873                // Test that schema defaults.float_list vector_index config is applied
6874                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6875                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6876                        if let Some(schema_space) = &schema_vi.config.space {
6877                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6878                        }
6879                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6880                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6881                        }
6882                    }
6883                }
6884
6885                // Property: schema #embedding float_list vector_index config should be applied
6886                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6887                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6888                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6889                            if let Some(schema_space) = &embedding_vi_type.config.space {
6890                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6891                            }
6892                        }
6893                    }
6894                }
6895            }
6896        }
6897    }
6898}