chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8    EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14    default_batch_size, default_construction_ef, default_construction_ef_spann,
15    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21    InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25    fn code(&self) -> ErrorCodes {
26        ErrorCodes::Internal
27    }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33    MissingIndexConfiguration { key: String, value_type: String },
34    #[error("Schema reconciliation failed: {reason}")]
35    InvalidSchema { reason: String },
36    #[error("Cannot set both collection config and schema simultaneously")]
37    ConfigAndSchemaConflict,
38    #[error("Cannot merge schemas with differing defaults")]
39    DefaultsMismatch,
40    #[error("Conflicting configuration for {context}")]
41    ConfigurationConflict { context: String },
42    #[error("Invalid HNSW configuration: {0}")]
43    InvalidHnswConfig(validator::ValidationErrors),
44    #[error("Invalid SPANN configuration: {0}")]
45    InvalidSpannConfig(validator::ValidationErrors),
46    #[error(transparent)]
47    Builder(#[from] SchemaBuilderError),
48}
49
50#[derive(Debug, Error)]
51pub enum SchemaBuilderError {
52    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
53    VectorIndexMustBeGlobal { key: String },
54    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
55    FtsIndexMustBeGlobal { key: String },
56    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
57    SpecialKeyModificationNotAllowed { key: String },
58    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
59    SparseVectorRequiresKey,
60    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
61    MultipleSparseVectorIndexes { existing_key: String },
62    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
63    VectorIndexDeletionNotSupported,
64    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
65    FtsIndexDeletionNotSupported,
66    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
67    SparseVectorIndexDeletionNotSupported,
68}
69
70#[derive(Debug, Error)]
71pub enum FilterValidationError {
72    #[error(
73        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
74    )]
75    IndexingDisabled {
76        key: String,
77        value_type: MetadataValueType,
78    },
79    #[error(transparent)]
80    Schema(#[from] SchemaError),
81}
82
83impl ChromaError for SchemaBuilderError {
84    fn code(&self) -> ErrorCodes {
85        ErrorCodes::InvalidArgument
86    }
87}
88
89impl ChromaError for FilterValidationError {
90    fn code(&self) -> ErrorCodes {
91        match self {
92            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
93            FilterValidationError::Schema(_) => ErrorCodes::Internal,
94        }
95    }
96}
97
98// ============================================================================
99// SCHEMA CONSTANTS
100// ============================================================================
101// These constants must match the Python constants in chromadb/api/types.py
102
103// Value type name constants
104pub const STRING_VALUE_NAME: &str = "string";
105pub const INT_VALUE_NAME: &str = "int";
106pub const BOOL_VALUE_NAME: &str = "bool";
107pub const FLOAT_VALUE_NAME: &str = "float";
108pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
109pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
110
111// Index type name constants
112pub const FTS_INDEX_NAME: &str = "fts_index";
113pub const VECTOR_INDEX_NAME: &str = "vector_index";
114pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
115pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
116pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
117pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
118pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
119
120// Special metadata keys - must match Python constants in chromadb/api/types.py
121pub const DOCUMENT_KEY: &str = "#document";
122pub const EMBEDDING_KEY: &str = "#embedding";
123
124// ============================================================================
125// SCHEMA STRUCTURES
126// ============================================================================
127
128/// Schema representation for collection index configurations
129///
130/// This represents the server-side schema structure used for index management
131
132#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
133#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
134pub struct Schema {
135    /// Default index configurations for each value type
136    pub defaults: ValueTypes,
137    /// Key-specific index overrides
138    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
139    #[serde(rename = "keys", alias = "key_overrides")]
140    pub keys: HashMap<String, ValueTypes>,
141}
142
143impl Default for Schema {
144    /// Create a default Schema that matches Python's behavior exactly.
145    ///
146    /// Python creates a Schema with:
147    /// - All inverted indexes enabled by default (string, int, float, bool)
148    /// - Vector and FTS indexes disabled in defaults
149    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
150    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
151    ///
152    /// # Examples
153    /// ```
154    /// use chroma_types::Schema;
155    ///
156    /// let schema = Schema::default();
157    /// assert!(schema.keys.contains_key("#document"));
158    /// assert!(schema.keys.contains_key("#embedding"));
159    /// ```
160    fn default() -> Self {
161        // Initialize defaults - match Python's _initialize_defaults()
162        let defaults = ValueTypes {
163            string: Some(StringValueType {
164                fts_index: Some(FtsIndexType {
165                    enabled: false,
166                    config: FtsIndexConfig {},
167                }),
168                string_inverted_index: Some(StringInvertedIndexType {
169                    enabled: true,
170                    config: StringInvertedIndexConfig {},
171                }),
172            }),
173            float_list: Some(FloatListValueType {
174                vector_index: Some(VectorIndexType {
175                    enabled: false,
176                    config: VectorIndexConfig {
177                        space: None, // Python leaves as None (resolved on serialization)
178                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
179                        source_key: None,
180                        hnsw: None,  // Python doesn't specify
181                        spann: None, // Python doesn't specify
182                    },
183                }),
184            }),
185            sparse_vector: Some(SparseVectorValueType {
186                sparse_vector_index: Some(SparseVectorIndexType {
187                    enabled: false,
188                    config: SparseVectorIndexConfig {
189                        embedding_function: None,
190                        source_key: None,
191                        bm25: None,
192                    },
193                }),
194            }),
195            int: Some(IntValueType {
196                int_inverted_index: Some(IntInvertedIndexType {
197                    enabled: true,
198                    config: IntInvertedIndexConfig {},
199                }),
200            }),
201            float: Some(FloatValueType {
202                float_inverted_index: Some(FloatInvertedIndexType {
203                    enabled: true,
204                    config: FloatInvertedIndexConfig {},
205                }),
206            }),
207            boolean: Some(BoolValueType {
208                bool_inverted_index: Some(BoolInvertedIndexType {
209                    enabled: true,
210                    config: BoolInvertedIndexConfig {},
211                }),
212            }),
213        };
214
215        // Initialize key-specific overrides - match Python's _initialize_keys()
216        let mut keys = HashMap::new();
217
218        // #document: FTS enabled, string inverted disabled
219        keys.insert(
220            DOCUMENT_KEY.to_string(),
221            ValueTypes {
222                string: Some(StringValueType {
223                    fts_index: Some(FtsIndexType {
224                        enabled: true,
225                        config: FtsIndexConfig {},
226                    }),
227                    string_inverted_index: Some(StringInvertedIndexType {
228                        enabled: false,
229                        config: StringInvertedIndexConfig {},
230                    }),
231                }),
232                ..Default::default()
233            },
234        );
235
236        // #embedding: Vector index enabled with source_key=#document
237        keys.insert(
238            EMBEDDING_KEY.to_string(),
239            ValueTypes {
240                float_list: Some(FloatListValueType {
241                    vector_index: Some(VectorIndexType {
242                        enabled: true,
243                        config: VectorIndexConfig {
244                            space: None, // Python leaves as None (resolved on serialization)
245                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
246                            source_key: Some(DOCUMENT_KEY.to_string()),
247                            hnsw: None,  // Python doesn't specify
248                            spann: None, // Python doesn't specify
249                        },
250                    }),
251                }),
252                ..Default::default()
253            },
254        );
255
256        Schema { defaults, keys }
257    }
258}
259
260pub fn is_embedding_function_default(
261    embedding_function: &Option<EmbeddingFunctionConfiguration>,
262) -> bool {
263    match embedding_function {
264        None => true,
265        Some(embedding_function) => embedding_function.is_default(),
266    }
267}
268
269/// Check if space is default (None means default, or if present, should be default space)
270pub fn is_space_default(space: &Option<Space>) -> bool {
271    match space {
272        None => true,                     // None means default
273        Some(s) => *s == default_space(), // If present, check if it's the default space
274    }
275}
276
277/// Check if HNSW config is default
278pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
279    hnsw_config.ef_construction == Some(default_construction_ef())
280        && hnsw_config.ef_search == Some(default_search_ef())
281        && hnsw_config.max_neighbors == Some(default_m())
282        && hnsw_config.num_threads == Some(default_num_threads())
283        && hnsw_config.batch_size == Some(default_batch_size())
284        && hnsw_config.sync_threshold == Some(default_sync_threshold())
285        && hnsw_config.resize_factor == Some(default_resize_factor())
286}
287
288// ============================================================================
289// NEW STRONGLY-TYPED SCHEMA STRUCTURES
290// ============================================================================
291
292/// Strongly-typed value type configurations
293/// Contains optional configurations for each supported value type
294#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
295#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
296pub struct ValueTypes {
297    #[serde(
298        rename = "string",
299        alias = "#string",
300        skip_serializing_if = "Option::is_none"
301    )] // STRING_VALUE_NAME
302    pub string: Option<StringValueType>,
303
304    #[serde(
305        rename = "float_list",
306        alias = "#float_list",
307        skip_serializing_if = "Option::is_none"
308    )]
309    // FLOAT_LIST_VALUE_NAME
310    pub float_list: Option<FloatListValueType>,
311
312    #[serde(
313        rename = "sparse_vector",
314        alias = "#sparse_vector",
315        skip_serializing_if = "Option::is_none"
316    )]
317    // SPARSE_VECTOR_VALUE_NAME
318    pub sparse_vector: Option<SparseVectorValueType>,
319
320    #[serde(
321        rename = "int",
322        alias = "#int",
323        skip_serializing_if = "Option::is_none"
324    )] // INT_VALUE_NAME
325    pub int: Option<IntValueType>,
326
327    #[serde(
328        rename = "float",
329        alias = "#float",
330        skip_serializing_if = "Option::is_none"
331    )] // FLOAT_VALUE_NAME
332    pub float: Option<FloatValueType>,
333
334    #[serde(
335        rename = "bool",
336        alias = "#bool",
337        skip_serializing_if = "Option::is_none"
338    )] // BOOL_VALUE_NAME
339    pub boolean: Option<BoolValueType>,
340}
341
342/// String value type index configurations
343#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
344#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
345pub struct StringValueType {
346    #[serde(
347        rename = "fts_index",
348        alias = "$fts_index",
349        skip_serializing_if = "Option::is_none"
350    )] // FTS_INDEX_NAME
351    pub fts_index: Option<FtsIndexType>,
352
353    #[serde(
354        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
355        alias = "$string_inverted_index",
356        skip_serializing_if = "Option::is_none"
357    )]
358    pub string_inverted_index: Option<StringInvertedIndexType>,
359}
360
361/// Float list value type index configurations (for vectors)
362#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
363#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
364pub struct FloatListValueType {
365    #[serde(
366        rename = "vector_index",
367        alias = "$vector_index",
368        skip_serializing_if = "Option::is_none"
369    )] // VECTOR_INDEX_NAME
370    pub vector_index: Option<VectorIndexType>,
371}
372
373/// Sparse vector value type index configurations
374#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
375#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
376pub struct SparseVectorValueType {
377    #[serde(
378        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
379        alias = "$sparse_vector_index",
380        skip_serializing_if = "Option::is_none"
381    )]
382    pub sparse_vector_index: Option<SparseVectorIndexType>,
383}
384
385/// Integer value type index configurations
386#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
387#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
388pub struct IntValueType {
389    #[serde(
390        rename = "int_inverted_index",
391        alias = "$int_inverted_index",
392        skip_serializing_if = "Option::is_none"
393    )]
394    // INT_INVERTED_INDEX_NAME
395    pub int_inverted_index: Option<IntInvertedIndexType>,
396}
397
398/// Float value type index configurations
399#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
400#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
401pub struct FloatValueType {
402    #[serde(
403        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
404        alias = "$float_inverted_index",
405        skip_serializing_if = "Option::is_none"
406    )]
407    pub float_inverted_index: Option<FloatInvertedIndexType>,
408}
409
410/// Boolean value type index configurations
411#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
412#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
413pub struct BoolValueType {
414    #[serde(
415        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
416        alias = "$bool_inverted_index",
417        skip_serializing_if = "Option::is_none"
418    )]
419    pub bool_inverted_index: Option<BoolInvertedIndexType>,
420}
421
422// Individual index type structs with enabled status and config
423#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
424#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
425pub struct FtsIndexType {
426    pub enabled: bool,
427    pub config: FtsIndexConfig,
428}
429
430#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
431#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
432pub struct VectorIndexType {
433    pub enabled: bool,
434    pub config: VectorIndexConfig,
435}
436
437#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
438#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
439pub struct SparseVectorIndexType {
440    pub enabled: bool,
441    pub config: SparseVectorIndexConfig,
442}
443
444#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
445#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
446pub struct StringInvertedIndexType {
447    pub enabled: bool,
448    pub config: StringInvertedIndexConfig,
449}
450
451#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
452#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
453pub struct IntInvertedIndexType {
454    pub enabled: bool,
455    pub config: IntInvertedIndexConfig,
456}
457
458#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
459#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
460pub struct FloatInvertedIndexType {
461    pub enabled: bool,
462    pub config: FloatInvertedIndexConfig,
463}
464
465#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
466#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
467pub struct BoolInvertedIndexType {
468    pub enabled: bool,
469    pub config: BoolInvertedIndexConfig,
470}
471
472impl Schema {
473    /// Create a new Schema with strongly-typed default configurations
474    pub fn new_default(default_knn_index: KnnIndex) -> Self {
475        // Vector index disabled on all keys except #embedding.
476        let vector_config = VectorIndexType {
477            enabled: false,
478            config: VectorIndexConfig {
479                space: Some(default_space()),
480                embedding_function: None,
481                source_key: None,
482                hnsw: match default_knn_index {
483                    KnnIndex::Hnsw => Some(HnswIndexConfig {
484                        ef_construction: Some(default_construction_ef()),
485                        max_neighbors: Some(default_m()),
486                        ef_search: Some(default_search_ef()),
487                        num_threads: Some(default_num_threads()),
488                        batch_size: Some(default_batch_size()),
489                        sync_threshold: Some(default_sync_threshold()),
490                        resize_factor: Some(default_resize_factor()),
491                    }),
492                    KnnIndex::Spann => None,
493                },
494                spann: match default_knn_index {
495                    KnnIndex::Hnsw => None,
496                    KnnIndex::Spann => Some(SpannIndexConfig {
497                        search_nprobe: Some(default_search_nprobe()),
498                        search_rng_factor: Some(default_search_rng_factor()),
499                        search_rng_epsilon: Some(default_search_rng_epsilon()),
500                        nreplica_count: Some(default_nreplica_count()),
501                        write_rng_factor: Some(default_write_rng_factor()),
502                        write_rng_epsilon: Some(default_write_rng_epsilon()),
503                        split_threshold: Some(default_split_threshold()),
504                        num_samples_kmeans: Some(default_num_samples_kmeans()),
505                        initial_lambda: Some(default_initial_lambda()),
506                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
507                        merge_threshold: Some(default_merge_threshold()),
508                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
509                        write_nprobe: Some(default_write_nprobe()),
510                        ef_construction: Some(default_construction_ef_spann()),
511                        ef_search: Some(default_search_ef_spann()),
512                        max_neighbors: Some(default_m_spann()),
513                    }),
514                },
515            },
516        };
517
518        // Initialize defaults struct directly instead of using Default::default() + field assignments
519        let defaults = ValueTypes {
520            string: Some(StringValueType {
521                string_inverted_index: Some(StringInvertedIndexType {
522                    enabled: true,
523                    config: StringInvertedIndexConfig {},
524                }),
525                fts_index: Some(FtsIndexType {
526                    enabled: false,
527                    config: FtsIndexConfig {},
528                }),
529            }),
530            float: Some(FloatValueType {
531                float_inverted_index: Some(FloatInvertedIndexType {
532                    enabled: true,
533                    config: FloatInvertedIndexConfig {},
534                }),
535            }),
536            int: Some(IntValueType {
537                int_inverted_index: Some(IntInvertedIndexType {
538                    enabled: true,
539                    config: IntInvertedIndexConfig {},
540                }),
541            }),
542            boolean: Some(BoolValueType {
543                bool_inverted_index: Some(BoolInvertedIndexType {
544                    enabled: true,
545                    config: BoolInvertedIndexConfig {},
546                }),
547            }),
548            float_list: Some(FloatListValueType {
549                vector_index: Some(vector_config),
550            }),
551            sparse_vector: Some(SparseVectorValueType {
552                sparse_vector_index: Some(SparseVectorIndexType {
553                    enabled: false,
554                    config: SparseVectorIndexConfig {
555                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
556                        source_key: None,
557                        bm25: Some(false),
558                    },
559                }),
560            }),
561        };
562
563        // Set up key overrides
564        let mut keys = HashMap::new();
565
566        // Enable vector index for #embedding.
567        let embedding_defaults = ValueTypes {
568            float_list: Some(FloatListValueType {
569                vector_index: Some(VectorIndexType {
570                    enabled: true,
571                    config: VectorIndexConfig {
572                        space: Some(default_space()),
573                        embedding_function: None,
574                        source_key: Some(DOCUMENT_KEY.to_string()),
575                        hnsw: match default_knn_index {
576                            KnnIndex::Hnsw => Some(HnswIndexConfig {
577                                ef_construction: Some(default_construction_ef()),
578                                max_neighbors: Some(default_m()),
579                                ef_search: Some(default_search_ef()),
580                                num_threads: Some(default_num_threads()),
581                                batch_size: Some(default_batch_size()),
582                                sync_threshold: Some(default_sync_threshold()),
583                                resize_factor: Some(default_resize_factor()),
584                            }),
585                            KnnIndex::Spann => None,
586                        },
587                        spann: match default_knn_index {
588                            KnnIndex::Hnsw => None,
589                            KnnIndex::Spann => Some(SpannIndexConfig {
590                                search_nprobe: Some(default_search_nprobe()),
591                                search_rng_factor: Some(default_search_rng_factor()),
592                                search_rng_epsilon: Some(default_search_rng_epsilon()),
593                                nreplica_count: Some(default_nreplica_count()),
594                                write_rng_factor: Some(default_write_rng_factor()),
595                                write_rng_epsilon: Some(default_write_rng_epsilon()),
596                                split_threshold: Some(default_split_threshold()),
597                                num_samples_kmeans: Some(default_num_samples_kmeans()),
598                                initial_lambda: Some(default_initial_lambda()),
599                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
600                                merge_threshold: Some(default_merge_threshold()),
601                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
602                                write_nprobe: Some(default_write_nprobe()),
603                                ef_construction: Some(default_construction_ef_spann()),
604                                ef_search: Some(default_search_ef_spann()),
605                                max_neighbors: Some(default_m_spann()),
606                            }),
607                        },
608                    },
609                }),
610            }),
611            ..Default::default()
612        };
613        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
614
615        // Document defaults - initialize directly instead of Default::default() + field assignment
616        let document_defaults = ValueTypes {
617            string: Some(StringValueType {
618                fts_index: Some(FtsIndexType {
619                    enabled: true,
620                    config: FtsIndexConfig {},
621                }),
622                string_inverted_index: Some(StringInvertedIndexType {
623                    enabled: false,
624                    config: StringInvertedIndexConfig {},
625                }),
626            }),
627            ..Default::default()
628        };
629        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
630
631        Schema { defaults, keys }
632    }
633
634    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
635        let to_internal = |vector_index: &VectorIndexType| {
636            let space = vector_index.config.space.clone();
637            vector_index
638                .config
639                .spann
640                .clone()
641                .map(|config| (space.as_ref(), &config).into())
642        };
643
644        self.keys
645            .get(EMBEDDING_KEY)
646            .and_then(|value_types| value_types.float_list.as_ref())
647            .and_then(|float_list| float_list.vector_index.as_ref())
648            .and_then(to_internal)
649            .or_else(|| {
650                self.defaults
651                    .float_list
652                    .as_ref()
653                    .and_then(|float_list| float_list.vector_index.as_ref())
654                    .and_then(to_internal)
655            })
656    }
657
658    /// Reconcile user-provided schema with system defaults
659    ///
660    /// This method merges user configurations with system defaults, ensuring that:
661    /// - User overrides take precedence over defaults
662    /// - Missing user configurations fall back to system defaults
663    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
664    pub fn reconcile_with_defaults(user_schema: Option<&Schema>) -> Result<Self, SchemaError> {
665        let default_schema = Schema::new_default(KnnIndex::Spann);
666
667        match user_schema {
668            Some(user) => {
669                // Merge defaults with user overrides
670                let merged_defaults =
671                    Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
672
673                // Merge key overrides
674                let mut merged_keys = default_schema.keys.clone();
675                for (key, user_value_types) in &user.keys {
676                    if let Some(default_value_types) = merged_keys.get(key) {
677                        // Merge with existing default key override
678                        let merged_value_types =
679                            Self::merge_value_types(default_value_types, user_value_types)?;
680                        merged_keys.insert(key.clone(), merged_value_types);
681                    } else {
682                        // New key override from user
683                        merged_keys.insert(key.clone(), user_value_types.clone());
684                    }
685                }
686
687                Ok(Schema {
688                    defaults: merged_defaults,
689                    keys: merged_keys,
690                })
691            }
692            None => Ok(default_schema),
693        }
694    }
695
696    /// Merge two schemas together, combining key overrides when possible.
697    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
698        if self.defaults != other.defaults {
699            return Err(SchemaError::DefaultsMismatch);
700        }
701
702        let mut keys = self.keys.clone();
703
704        for (key, other_value_types) in &other.keys {
705            if let Some(existing) = keys.get(key).cloned() {
706                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
707                keys.insert(key.clone(), merged);
708            } else {
709                keys.insert(key.clone(), other_value_types.clone());
710            }
711        }
712
713        Ok(Schema {
714            defaults: self.defaults.clone(),
715            keys,
716        })
717    }
718
719    fn merge_override_value_types(
720        key: &str,
721        left: &ValueTypes,
722        right: &ValueTypes,
723    ) -> Result<ValueTypes, SchemaError> {
724        Ok(ValueTypes {
725            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
726            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
727            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
728            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
729            float_list: Self::merge_float_list_override(
730                key,
731                left.float_list.as_ref(),
732                right.float_list.as_ref(),
733            )?,
734            sparse_vector: Self::merge_sparse_vector_override(
735                key,
736                left.sparse_vector.as_ref(),
737                right.sparse_vector.as_ref(),
738            )?,
739        })
740    }
741
742    fn merge_string_override(
743        key: &str,
744        left: Option<&StringValueType>,
745        right: Option<&StringValueType>,
746    ) -> Result<Option<StringValueType>, SchemaError> {
747        match (left, right) {
748            (Some(l), Some(r)) => Ok(Some(StringValueType {
749                string_inverted_index: Self::merge_index_or_error(
750                    l.string_inverted_index.as_ref(),
751                    r.string_inverted_index.as_ref(),
752                    &format!("key '{key}' string.string_inverted_index"),
753                )?,
754                fts_index: Self::merge_index_or_error(
755                    l.fts_index.as_ref(),
756                    r.fts_index.as_ref(),
757                    &format!("key '{key}' string.fts_index"),
758                )?,
759            })),
760            (Some(l), None) => Ok(Some(l.clone())),
761            (None, Some(r)) => Ok(Some(r.clone())),
762            (None, None) => Ok(None),
763        }
764    }
765
766    fn merge_float_override(
767        key: &str,
768        left: Option<&FloatValueType>,
769        right: Option<&FloatValueType>,
770    ) -> Result<Option<FloatValueType>, SchemaError> {
771        match (left, right) {
772            (Some(l), Some(r)) => Ok(Some(FloatValueType {
773                float_inverted_index: Self::merge_index_or_error(
774                    l.float_inverted_index.as_ref(),
775                    r.float_inverted_index.as_ref(),
776                    &format!("key '{key}' float.float_inverted_index"),
777                )?,
778            })),
779            (Some(l), None) => Ok(Some(l.clone())),
780            (None, Some(r)) => Ok(Some(r.clone())),
781            (None, None) => Ok(None),
782        }
783    }
784
785    fn merge_int_override(
786        key: &str,
787        left: Option<&IntValueType>,
788        right: Option<&IntValueType>,
789    ) -> Result<Option<IntValueType>, SchemaError> {
790        match (left, right) {
791            (Some(l), Some(r)) => Ok(Some(IntValueType {
792                int_inverted_index: Self::merge_index_or_error(
793                    l.int_inverted_index.as_ref(),
794                    r.int_inverted_index.as_ref(),
795                    &format!("key '{key}' int.int_inverted_index"),
796                )?,
797            })),
798            (Some(l), None) => Ok(Some(l.clone())),
799            (None, Some(r)) => Ok(Some(r.clone())),
800            (None, None) => Ok(None),
801        }
802    }
803
804    fn merge_bool_override(
805        key: &str,
806        left: Option<&BoolValueType>,
807        right: Option<&BoolValueType>,
808    ) -> Result<Option<BoolValueType>, SchemaError> {
809        match (left, right) {
810            (Some(l), Some(r)) => Ok(Some(BoolValueType {
811                bool_inverted_index: Self::merge_index_or_error(
812                    l.bool_inverted_index.as_ref(),
813                    r.bool_inverted_index.as_ref(),
814                    &format!("key '{key}' bool.bool_inverted_index"),
815                )?,
816            })),
817            (Some(l), None) => Ok(Some(l.clone())),
818            (None, Some(r)) => Ok(Some(r.clone())),
819            (None, None) => Ok(None),
820        }
821    }
822
823    fn merge_float_list_override(
824        key: &str,
825        left: Option<&FloatListValueType>,
826        right: Option<&FloatListValueType>,
827    ) -> Result<Option<FloatListValueType>, SchemaError> {
828        match (left, right) {
829            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
830                vector_index: Self::merge_index_or_error(
831                    l.vector_index.as_ref(),
832                    r.vector_index.as_ref(),
833                    &format!("key '{key}' float_list.vector_index"),
834                )?,
835            })),
836            (Some(l), None) => Ok(Some(l.clone())),
837            (None, Some(r)) => Ok(Some(r.clone())),
838            (None, None) => Ok(None),
839        }
840    }
841
842    fn merge_sparse_vector_override(
843        key: &str,
844        left: Option<&SparseVectorValueType>,
845        right: Option<&SparseVectorValueType>,
846    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
847        match (left, right) {
848            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
849                sparse_vector_index: Self::merge_index_or_error(
850                    l.sparse_vector_index.as_ref(),
851                    r.sparse_vector_index.as_ref(),
852                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
853                )?,
854            })),
855            (Some(l), None) => Ok(Some(l.clone())),
856            (None, Some(r)) => Ok(Some(r.clone())),
857            (None, None) => Ok(None),
858        }
859    }
860
861    fn merge_index_or_error<T: Clone + PartialEq>(
862        left: Option<&T>,
863        right: Option<&T>,
864        context: &str,
865    ) -> Result<Option<T>, SchemaError> {
866        match (left, right) {
867            (Some(l), Some(r)) => {
868                if l == r {
869                    Ok(Some(l.clone()))
870                } else {
871                    Err(SchemaError::ConfigurationConflict {
872                        context: context.to_string(),
873                    })
874                }
875            }
876            (Some(l), None) => Ok(Some(l.clone())),
877            (None, Some(r)) => Ok(Some(r.clone())),
878            (None, None) => Ok(None),
879        }
880    }
881
882    /// Merge two ValueTypes with field-level merging
883    /// User values take precedence over default values
884    fn merge_value_types(
885        default: &ValueTypes,
886        user: &ValueTypes,
887    ) -> Result<ValueTypes, SchemaError> {
888        // Merge float_list first
889        let float_list =
890            Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
891
892        // Validate the merged float_list (covers all merge cases)
893        if let Some(ref fl) = float_list {
894            Self::validate_float_list_value_type(fl)?;
895        }
896
897        Ok(ValueTypes {
898            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
899            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
900            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
901            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
902            float_list,
903            sparse_vector: Self::merge_sparse_vector_type(
904                default.sparse_vector.as_ref(),
905                user.sparse_vector.as_ref(),
906            )?,
907        })
908    }
909
910    /// Merge StringValueType configurations
911    fn merge_string_type(
912        default: Option<&StringValueType>,
913        user: Option<&StringValueType>,
914    ) -> Result<Option<StringValueType>, SchemaError> {
915        match (default, user) {
916            (Some(default), Some(user)) => Ok(Some(StringValueType {
917                string_inverted_index: Self::merge_string_inverted_index_type(
918                    default.string_inverted_index.as_ref(),
919                    user.string_inverted_index.as_ref(),
920                )?,
921                fts_index: Self::merge_fts_index_type(
922                    default.fts_index.as_ref(),
923                    user.fts_index.as_ref(),
924                )?,
925            })),
926            (Some(default), None) => Ok(Some(default.clone())),
927            (None, Some(user)) => Ok(Some(user.clone())),
928            (None, None) => Ok(None),
929        }
930    }
931
932    /// Merge FloatValueType configurations
933    fn merge_float_type(
934        default: Option<&FloatValueType>,
935        user: Option<&FloatValueType>,
936    ) -> Result<Option<FloatValueType>, SchemaError> {
937        match (default, user) {
938            (Some(default), Some(user)) => Ok(Some(FloatValueType {
939                float_inverted_index: Self::merge_float_inverted_index_type(
940                    default.float_inverted_index.as_ref(),
941                    user.float_inverted_index.as_ref(),
942                )?,
943            })),
944            (Some(default), None) => Ok(Some(default.clone())),
945            (None, Some(user)) => Ok(Some(user.clone())),
946            (None, None) => Ok(None),
947        }
948    }
949
950    /// Merge IntValueType configurations
951    fn merge_int_type(
952        default: Option<&IntValueType>,
953        user: Option<&IntValueType>,
954    ) -> Result<Option<IntValueType>, SchemaError> {
955        match (default, user) {
956            (Some(default), Some(user)) => Ok(Some(IntValueType {
957                int_inverted_index: Self::merge_int_inverted_index_type(
958                    default.int_inverted_index.as_ref(),
959                    user.int_inverted_index.as_ref(),
960                )?,
961            })),
962            (Some(default), None) => Ok(Some(default.clone())),
963            (None, Some(user)) => Ok(Some(user.clone())),
964            (None, None) => Ok(None),
965        }
966    }
967
968    /// Merge BoolValueType configurations
969    fn merge_bool_type(
970        default: Option<&BoolValueType>,
971        user: Option<&BoolValueType>,
972    ) -> Result<Option<BoolValueType>, SchemaError> {
973        match (default, user) {
974            (Some(default), Some(user)) => Ok(Some(BoolValueType {
975                bool_inverted_index: Self::merge_bool_inverted_index_type(
976                    default.bool_inverted_index.as_ref(),
977                    user.bool_inverted_index.as_ref(),
978                )?,
979            })),
980            (Some(default), None) => Ok(Some(default.clone())),
981            (None, Some(user)) => Ok(Some(user.clone())),
982            (None, None) => Ok(None),
983        }
984    }
985
986    /// Merge FloatListValueType configurations
987    fn merge_float_list_type(
988        default: Option<&FloatListValueType>,
989        user: Option<&FloatListValueType>,
990    ) -> Option<FloatListValueType> {
991        match (default, user) {
992            (Some(default), Some(user)) => Some(FloatListValueType {
993                vector_index: Self::merge_vector_index_type(
994                    default.vector_index.as_ref(),
995                    user.vector_index.as_ref(),
996                ),
997            }),
998            (Some(default), None) => Some(default.clone()),
999            (None, Some(user)) => Some(user.clone()),
1000            (None, None) => None,
1001        }
1002    }
1003
1004    /// Merge SparseVectorValueType configurations
1005    fn merge_sparse_vector_type(
1006        default: Option<&SparseVectorValueType>,
1007        user: Option<&SparseVectorValueType>,
1008    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1009        match (default, user) {
1010            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1011                sparse_vector_index: Self::merge_sparse_vector_index_type(
1012                    default.sparse_vector_index.as_ref(),
1013                    user.sparse_vector_index.as_ref(),
1014                )?,
1015            })),
1016            (Some(default), None) => Ok(Some(default.clone())),
1017            (None, Some(user)) => Ok(Some(user.clone())),
1018            (None, None) => Ok(None),
1019        }
1020    }
1021
1022    /// Merge individual index type configurations
1023    fn merge_string_inverted_index_type(
1024        default: Option<&StringInvertedIndexType>,
1025        user: Option<&StringInvertedIndexType>,
1026    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1027        match (default, user) {
1028            (Some(_default), Some(user)) => {
1029                Ok(Some(StringInvertedIndexType {
1030                    enabled: user.enabled,       // User enabled state takes precedence
1031                    config: user.config.clone(), // User config takes precedence
1032                }))
1033            }
1034            (Some(default), None) => Ok(Some(default.clone())),
1035            (None, Some(user)) => Ok(Some(user.clone())),
1036            (None, None) => Ok(None),
1037        }
1038    }
1039
1040    fn merge_fts_index_type(
1041        default: Option<&FtsIndexType>,
1042        user: Option<&FtsIndexType>,
1043    ) -> Result<Option<FtsIndexType>, SchemaError> {
1044        match (default, user) {
1045            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1046                enabled: user.enabled,
1047                config: user.config.clone(),
1048            })),
1049            (Some(default), None) => Ok(Some(default.clone())),
1050            (None, Some(user)) => Ok(Some(user.clone())),
1051            (None, None) => Ok(None),
1052        }
1053    }
1054
1055    fn merge_float_inverted_index_type(
1056        default: Option<&FloatInvertedIndexType>,
1057        user: Option<&FloatInvertedIndexType>,
1058    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1059        match (default, user) {
1060            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1061                enabled: user.enabled,
1062                config: user.config.clone(),
1063            })),
1064            (Some(default), None) => Ok(Some(default.clone())),
1065            (None, Some(user)) => Ok(Some(user.clone())),
1066            (None, None) => Ok(None),
1067        }
1068    }
1069
1070    fn merge_int_inverted_index_type(
1071        default: Option<&IntInvertedIndexType>,
1072        user: Option<&IntInvertedIndexType>,
1073    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1074        match (default, user) {
1075            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1076                enabled: user.enabled,
1077                config: user.config.clone(),
1078            })),
1079            (Some(default), None) => Ok(Some(default.clone())),
1080            (None, Some(user)) => Ok(Some(user.clone())),
1081            (None, None) => Ok(None),
1082        }
1083    }
1084
1085    fn merge_bool_inverted_index_type(
1086        default: Option<&BoolInvertedIndexType>,
1087        user: Option<&BoolInvertedIndexType>,
1088    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1089        match (default, user) {
1090            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1091                enabled: user.enabled,
1092                config: user.config.clone(),
1093            })),
1094            (Some(default), None) => Ok(Some(default.clone())),
1095            (None, Some(user)) => Ok(Some(user.clone())),
1096            (None, None) => Ok(None),
1097        }
1098    }
1099
1100    fn merge_vector_index_type(
1101        default: Option<&VectorIndexType>,
1102        user: Option<&VectorIndexType>,
1103    ) -> Option<VectorIndexType> {
1104        match (default, user) {
1105            (Some(default), Some(user)) => Some(VectorIndexType {
1106                enabled: user.enabled,
1107                config: Self::merge_vector_index_config(&default.config, &user.config),
1108            }),
1109            (Some(default), None) => Some(default.clone()),
1110            (None, Some(user)) => Some(user.clone()),
1111            (None, None) => None,
1112        }
1113    }
1114
1115    fn merge_sparse_vector_index_type(
1116        default: Option<&SparseVectorIndexType>,
1117        user: Option<&SparseVectorIndexType>,
1118    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1119        match (default, user) {
1120            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1121                enabled: user.enabled,
1122                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1123            })),
1124            (Some(default), None) => Ok(Some(default.clone())),
1125            (None, Some(user)) => Ok(Some(user.clone())),
1126            (None, None) => Ok(None),
1127        }
1128    }
1129
1130    /// Validate FloatListValueType vector index configurations
1131    /// This validates HNSW and SPANN configs within the merged float_list
1132    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1133        if let Some(vector_index) = &float_list.vector_index {
1134            if let Some(hnsw) = &vector_index.config.hnsw {
1135                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1136            }
1137            if let Some(spann) = &vector_index.config.spann {
1138                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1139            }
1140        }
1141        Ok(())
1142    }
1143
1144    /// Merge VectorIndexConfig with field-level merging
1145    fn merge_vector_index_config(
1146        default: &VectorIndexConfig,
1147        user: &VectorIndexConfig,
1148    ) -> VectorIndexConfig {
1149        VectorIndexConfig {
1150            space: user.space.clone().or(default.space.clone()),
1151            embedding_function: user
1152                .embedding_function
1153                .clone()
1154                .or(default.embedding_function.clone()),
1155            source_key: user.source_key.clone().or(default.source_key.clone()),
1156            hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1157            spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1158        }
1159    }
1160
1161    /// Merge SparseVectorIndexConfig with field-level merging
1162    fn merge_sparse_vector_index_config(
1163        default: &SparseVectorIndexConfig,
1164        user: &SparseVectorIndexConfig,
1165    ) -> SparseVectorIndexConfig {
1166        SparseVectorIndexConfig {
1167            embedding_function: user
1168                .embedding_function
1169                .clone()
1170                .or(default.embedding_function.clone()),
1171            source_key: user.source_key.clone().or(default.source_key.clone()),
1172            bm25: user.bm25.or(default.bm25),
1173        }
1174    }
1175
1176    /// Merge HNSW configurations with field-level merging
1177    fn merge_hnsw_configs(
1178        default_hnsw: Option<&HnswIndexConfig>,
1179        user_hnsw: Option<&HnswIndexConfig>,
1180    ) -> Option<HnswIndexConfig> {
1181        match (default_hnsw, user_hnsw) {
1182            (Some(default), Some(user)) => Some(HnswIndexConfig {
1183                ef_construction: user.ef_construction.or(default.ef_construction),
1184                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1185                ef_search: user.ef_search.or(default.ef_search),
1186                num_threads: user.num_threads.or(default.num_threads),
1187                batch_size: user.batch_size.or(default.batch_size),
1188                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1189                resize_factor: user.resize_factor.or(default.resize_factor),
1190            }),
1191            (Some(default), None) => Some(default.clone()),
1192            (None, Some(user)) => Some(user.clone()),
1193            (None, None) => None,
1194        }
1195    }
1196
1197    /// Merge SPANN configurations with field-level merging
1198    fn merge_spann_configs(
1199        default_spann: Option<&SpannIndexConfig>,
1200        user_spann: Option<&SpannIndexConfig>,
1201    ) -> Option<SpannIndexConfig> {
1202        match (default_spann, user_spann) {
1203            (Some(default), Some(user)) => Some(SpannIndexConfig {
1204                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1205                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1206                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1207                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1208                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1209                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1210                split_threshold: user.split_threshold.or(default.split_threshold),
1211                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1212                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1213                reassign_neighbor_count: user
1214                    .reassign_neighbor_count
1215                    .or(default.reassign_neighbor_count),
1216                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1217                num_centers_to_merge_to: user
1218                    .num_centers_to_merge_to
1219                    .or(default.num_centers_to_merge_to),
1220                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1221                ef_construction: user.ef_construction.or(default.ef_construction),
1222                ef_search: user.ef_search.or(default.ef_search),
1223                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1224            }),
1225            (Some(default), None) => Some(default.clone()),
1226            (None, Some(user)) => Some(user.clone()),
1227            (None, None) => None,
1228        }
1229    }
1230
1231    /// Reconcile Schema with InternalCollectionConfiguration
1232    ///
1233    /// Simple reconciliation logic:
1234    /// 1. If collection config is default → return schema (schema is source of truth)
1235    /// 2. If collection config is non-default and schema is default → override schema with collection config
1236    ///
1237    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1238    pub fn reconcile_with_collection_config(
1239        schema: &Schema,
1240        collection_config: &InternalCollectionConfiguration,
1241    ) -> Result<Schema, SchemaError> {
1242        // 1. Check if collection config is default
1243        if collection_config.is_default() {
1244            // Collection config is default → schema is source of truth
1245            return Ok(schema.clone());
1246        }
1247
1248        // 2. Collection config is non-default, schema must be default (already validated earlier)
1249        // Convert collection config to schema
1250        Self::convert_collection_config_to_schema(collection_config)
1251    }
1252
1253    pub fn reconcile_schema_and_config(
1254        schema: Option<&Schema>,
1255        configuration: Option<&InternalCollectionConfiguration>,
1256    ) -> Result<Schema, SchemaError> {
1257        // Early validation: check if both user-provided schema and config are non-default
1258        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1259            if !user_schema.is_default() && !config.is_default() {
1260                return Err(SchemaError::ConfigAndSchemaConflict);
1261            }
1262        }
1263
1264        let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1265        if let Some(config) = configuration {
1266            Self::reconcile_with_collection_config(&reconciled_schema, config)
1267        } else {
1268            Ok(reconciled_schema)
1269        }
1270    }
1271
1272    pub fn default_with_embedding_function(
1273        embedding_function: EmbeddingFunctionConfiguration,
1274    ) -> Schema {
1275        let mut schema = Schema::new_default(KnnIndex::Spann);
1276        if let Some(float_list) = &mut schema.defaults.float_list {
1277            if let Some(vector_index) = &mut float_list.vector_index {
1278                vector_index.config.embedding_function = Some(embedding_function.clone());
1279            }
1280        }
1281        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1282            if let Some(float_list) = &mut embedding_types.float_list {
1283                if let Some(vector_index) = &mut float_list.vector_index {
1284                    vector_index.config.embedding_function = Some(embedding_function);
1285                }
1286            }
1287        }
1288        schema
1289    }
1290
1291    /// Check if schema is default by checking each field individually
1292    pub fn is_default(&self) -> bool {
1293        // Check if defaults are default (field by field)
1294        if !Self::is_value_types_default(&self.defaults) {
1295            return false;
1296        }
1297
1298        for key in self.keys.keys() {
1299            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1300                return false;
1301            }
1302        }
1303
1304        // Check #embedding key
1305        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1306            if !Self::is_embedding_value_types_default(embedding_value) {
1307                return false;
1308            }
1309        }
1310
1311        // Check #document key
1312        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1313            if !Self::is_document_value_types_default(document_value) {
1314                return false;
1315            }
1316        }
1317
1318        true
1319    }
1320
1321    /// Check if ValueTypes (defaults) are in default state
1322    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1323        // Check string field
1324        if let Some(string) = &value_types.string {
1325            if let Some(string_inverted) = &string.string_inverted_index {
1326                if !string_inverted.enabled {
1327                    return false;
1328                }
1329                // Config is an empty struct, so no need to check it
1330            }
1331            if let Some(fts) = &string.fts_index {
1332                if fts.enabled {
1333                    return false;
1334                }
1335                // Config is an empty struct, so no need to check it
1336            }
1337        }
1338
1339        // Check float field
1340        if let Some(float) = &value_types.float {
1341            if let Some(float_inverted) = &float.float_inverted_index {
1342                if !float_inverted.enabled {
1343                    return false;
1344                }
1345                // Config is an empty struct, so no need to check it
1346            }
1347        }
1348
1349        // Check int field
1350        if let Some(int) = &value_types.int {
1351            if let Some(int_inverted) = &int.int_inverted_index {
1352                if !int_inverted.enabled {
1353                    return false;
1354                }
1355                // Config is an empty struct, so no need to check it
1356            }
1357        }
1358
1359        // Check boolean field
1360        if let Some(boolean) = &value_types.boolean {
1361            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1362                if !bool_inverted.enabled {
1363                    return false;
1364                }
1365                // Config is an empty struct, so no need to check it
1366            }
1367        }
1368
1369        // Check float_list field (vector index should be disabled)
1370        if let Some(float_list) = &value_types.float_list {
1371            if let Some(vector_index) = &float_list.vector_index {
1372                if vector_index.enabled {
1373                    return false;
1374                }
1375                // Check that the config has default structure
1376                // We allow space and embedding_function to vary, but check structure
1377                if vector_index.config.source_key.is_some() {
1378                    return false;
1379                }
1380                // Check that either hnsw or spann config is present (not both, not neither)
1381                // and that the config values are default
1382                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1383                    (Some(hnsw_config), None) => {
1384                        if !hnsw_config.is_default() {
1385                            return false;
1386                        }
1387                    }
1388                    (None, Some(spann_config)) => {
1389                        if !spann_config.is_default() {
1390                            return false;
1391                        }
1392                    }
1393                    (Some(_), Some(_)) => return false, // Both present
1394                    (None, None) => {}
1395                }
1396            }
1397        }
1398
1399        // Check sparse_vector field (should be disabled)
1400        if let Some(sparse_vector) = &value_types.sparse_vector {
1401            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1402                if sparse_index.enabled {
1403                    return false;
1404                }
1405                // Check config structure
1406                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1407                    return false;
1408                }
1409                if sparse_index.config.source_key.is_some() {
1410                    return false;
1411                }
1412                if let Some(bm25) = &sparse_index.config.bm25 {
1413                    if bm25 != &false {
1414                        return false;
1415                    }
1416                }
1417            }
1418        }
1419
1420        true
1421    }
1422
1423    /// Check if ValueTypes for #embedding key are in default state
1424    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1425        // For #embedding, only float_list should be set
1426        if value_types.string.is_some()
1427            || value_types.float.is_some()
1428            || value_types.int.is_some()
1429            || value_types.boolean.is_some()
1430            || value_types.sparse_vector.is_some()
1431        {
1432            return false;
1433        }
1434
1435        // Check float_list field (vector index should be enabled)
1436        if let Some(float_list) = &value_types.float_list {
1437            if let Some(vector_index) = &float_list.vector_index {
1438                if !vector_index.enabled {
1439                    return false;
1440                }
1441                // Check that embedding_function is default
1442                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1443                    return false;
1444                }
1445                // Check that source_key is #document
1446                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1447                    return false;
1448                }
1449                // Check that either hnsw or spann config is present (not both, not neither)
1450                // and that the config values are default
1451                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1452                    (Some(hnsw_config), None) => {
1453                        if !hnsw_config.is_default() {
1454                            return false;
1455                        }
1456                    }
1457                    (None, Some(spann_config)) => {
1458                        if !spann_config.is_default() {
1459                            return false;
1460                        }
1461                    }
1462                    (Some(_), Some(_)) => return false, // Both present
1463                    (None, None) => {}
1464                }
1465            }
1466        }
1467
1468        true
1469    }
1470
1471    /// Check if ValueTypes for #document key are in default state
1472    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1473        // For #document, only string should be set
1474        if value_types.float_list.is_some()
1475            || value_types.float.is_some()
1476            || value_types.int.is_some()
1477            || value_types.boolean.is_some()
1478            || value_types.sparse_vector.is_some()
1479        {
1480            return false;
1481        }
1482
1483        // Check string field
1484        if let Some(string) = &value_types.string {
1485            if let Some(fts) = &string.fts_index {
1486                if !fts.enabled {
1487                    return false;
1488                }
1489                // Config is an empty struct, so no need to check it
1490            }
1491            if let Some(string_inverted) = &string.string_inverted_index {
1492                if string_inverted.enabled {
1493                    return false;
1494                }
1495                // Config is an empty struct, so no need to check it
1496            }
1497        }
1498
1499        true
1500    }
1501
1502    /// Convert InternalCollectionConfiguration to Schema
1503    fn convert_collection_config_to_schema(
1504        collection_config: &InternalCollectionConfiguration,
1505    ) -> Result<Schema, SchemaError> {
1506        // Start with a default schema structure
1507        let mut schema = Schema::new_default(KnnIndex::Spann); // Default to HNSW, will be overridden
1508
1509        // Convert vector index configuration
1510        let vector_config = match &collection_config.vector_index {
1511            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1512                space: Some(hnsw_config.space.clone()),
1513                embedding_function: collection_config.embedding_function.clone(),
1514                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1515                hnsw: Some(HnswIndexConfig {
1516                    ef_construction: Some(hnsw_config.ef_construction),
1517                    max_neighbors: Some(hnsw_config.max_neighbors),
1518                    ef_search: Some(hnsw_config.ef_search),
1519                    num_threads: Some(hnsw_config.num_threads),
1520                    batch_size: Some(hnsw_config.batch_size),
1521                    sync_threshold: Some(hnsw_config.sync_threshold),
1522                    resize_factor: Some(hnsw_config.resize_factor),
1523                }),
1524                spann: None,
1525            },
1526            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1527                space: Some(spann_config.space.clone()),
1528                embedding_function: collection_config.embedding_function.clone(),
1529                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1530                hnsw: None,
1531                spann: Some(SpannIndexConfig {
1532                    search_nprobe: Some(spann_config.search_nprobe),
1533                    search_rng_factor: Some(spann_config.search_rng_factor),
1534                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1535                    nreplica_count: Some(spann_config.nreplica_count),
1536                    write_rng_factor: Some(spann_config.write_rng_factor),
1537                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1538                    split_threshold: Some(spann_config.split_threshold),
1539                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1540                    initial_lambda: Some(spann_config.initial_lambda),
1541                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1542                    merge_threshold: Some(spann_config.merge_threshold),
1543                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1544                    write_nprobe: Some(spann_config.write_nprobe),
1545                    ef_construction: Some(spann_config.ef_construction),
1546                    ef_search: Some(spann_config.ef_search),
1547                    max_neighbors: Some(spann_config.max_neighbors),
1548                }),
1549            },
1550        };
1551
1552        // Update defaults (keep enabled=false, just update the config)
1553        // This serves as the template for any new float_list fields
1554        if let Some(float_list) = &mut schema.defaults.float_list {
1555            if let Some(vector_index) = &mut float_list.vector_index {
1556                vector_index.config = vector_config.clone();
1557            }
1558        }
1559
1560        // Update the vector_index in the existing #embedding key override
1561        // Keep enabled=true (already set by new_default) and update the config
1562        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1563            if let Some(float_list) = &mut embedding_types.float_list {
1564                if let Some(vector_index) = &mut float_list.vector_index {
1565                    vector_index.config = vector_config;
1566                }
1567            }
1568        }
1569
1570        Ok(schema)
1571    }
1572
1573    /// Check if a specific metadata key-value should be indexed based on schema configuration
1574    pub fn is_metadata_type_index_enabled(
1575        &self,
1576        key: &str,
1577        value_type: MetadataValueType,
1578    ) -> Result<bool, SchemaError> {
1579        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1580
1581        match value_type {
1582            MetadataValueType::Bool => match &v_type.boolean {
1583                Some(bool_type) => match &bool_type.bool_inverted_index {
1584                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1585                    None => Err(SchemaError::MissingIndexConfiguration {
1586                        key: key.to_string(),
1587                        value_type: "bool".to_string(),
1588                    }),
1589                },
1590                None => match &self.defaults.boolean {
1591                    Some(bool_type) => match &bool_type.bool_inverted_index {
1592                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1593                        None => Err(SchemaError::MissingIndexConfiguration {
1594                            key: key.to_string(),
1595                            value_type: "bool".to_string(),
1596                        }),
1597                    },
1598                    None => Err(SchemaError::MissingIndexConfiguration {
1599                        key: key.to_string(),
1600                        value_type: "bool".to_string(),
1601                    }),
1602                },
1603            },
1604            MetadataValueType::Int => match &v_type.int {
1605                Some(int_type) => match &int_type.int_inverted_index {
1606                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1607                    None => Err(SchemaError::MissingIndexConfiguration {
1608                        key: key.to_string(),
1609                        value_type: "int".to_string(),
1610                    }),
1611                },
1612                None => match &self.defaults.int {
1613                    Some(int_type) => match &int_type.int_inverted_index {
1614                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1615                        None => Err(SchemaError::MissingIndexConfiguration {
1616                            key: key.to_string(),
1617                            value_type: "int".to_string(),
1618                        }),
1619                    },
1620                    None => Err(SchemaError::MissingIndexConfiguration {
1621                        key: key.to_string(),
1622                        value_type: "int".to_string(),
1623                    }),
1624                },
1625            },
1626            MetadataValueType::Float => match &v_type.float {
1627                Some(float_type) => match &float_type.float_inverted_index {
1628                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1629                    None => Err(SchemaError::MissingIndexConfiguration {
1630                        key: key.to_string(),
1631                        value_type: "float".to_string(),
1632                    }),
1633                },
1634                None => match &self.defaults.float {
1635                    Some(float_type) => match &float_type.float_inverted_index {
1636                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1637                        None => Err(SchemaError::MissingIndexConfiguration {
1638                            key: key.to_string(),
1639                            value_type: "float".to_string(),
1640                        }),
1641                    },
1642                    None => Err(SchemaError::MissingIndexConfiguration {
1643                        key: key.to_string(),
1644                        value_type: "float".to_string(),
1645                    }),
1646                },
1647            },
1648            MetadataValueType::Str => match &v_type.string {
1649                Some(string_type) => match &string_type.string_inverted_index {
1650                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1651                    None => Err(SchemaError::MissingIndexConfiguration {
1652                        key: key.to_string(),
1653                        value_type: "string".to_string(),
1654                    }),
1655                },
1656                None => match &self.defaults.string {
1657                    Some(string_type) => match &string_type.string_inverted_index {
1658                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1659                        None => Err(SchemaError::MissingIndexConfiguration {
1660                            key: key.to_string(),
1661                            value_type: "string".to_string(),
1662                        }),
1663                    },
1664                    None => Err(SchemaError::MissingIndexConfiguration {
1665                        key: key.to_string(),
1666                        value_type: "string".to_string(),
1667                    }),
1668                },
1669            },
1670            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1671                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1672                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1673                    None => Err(SchemaError::MissingIndexConfiguration {
1674                        key: key.to_string(),
1675                        value_type: "sparse_vector".to_string(),
1676                    }),
1677                },
1678                None => match &self.defaults.sparse_vector {
1679                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1680                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1681                        None => Err(SchemaError::MissingIndexConfiguration {
1682                            key: key.to_string(),
1683                            value_type: "sparse_vector".to_string(),
1684                        }),
1685                    },
1686                    None => Err(SchemaError::MissingIndexConfiguration {
1687                        key: key.to_string(),
1688                        value_type: "sparse_vector".to_string(),
1689                    }),
1690                },
1691            },
1692        }
1693    }
1694
1695    pub fn is_metadata_where_indexing_enabled(
1696        &self,
1697        where_clause: &Where,
1698    ) -> Result<(), FilterValidationError> {
1699        match where_clause {
1700            Where::Composite(composite) => {
1701                for child in &composite.children {
1702                    self.is_metadata_where_indexing_enabled(child)?;
1703                }
1704                Ok(())
1705            }
1706            Where::Document(_) => Ok(()),
1707            Where::Metadata(expression) => {
1708                let value_type = match &expression.comparison {
1709                    MetadataComparison::Primitive(_, value) => value.value_type(),
1710                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1711                };
1712                let is_enabled = self
1713                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1714                    .map_err(FilterValidationError::Schema)?;
1715                if !is_enabled {
1716                    return Err(FilterValidationError::IndexingDisabled {
1717                        key: expression.key.clone(),
1718                        value_type,
1719                    });
1720                }
1721                Ok(())
1722            }
1723        }
1724    }
1725
1726    pub fn is_knn_key_indexing_enabled(
1727        &self,
1728        key: &str,
1729        query: &QueryVector,
1730    ) -> Result<(), FilterValidationError> {
1731        match query {
1732            QueryVector::Sparse(_) => {
1733                let is_enabled = self
1734                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1735                    .map_err(FilterValidationError::Schema)?;
1736                if !is_enabled {
1737                    return Err(FilterValidationError::IndexingDisabled {
1738                        key: key.to_string(),
1739                        value_type: MetadataValueType::SparseVector,
1740                    });
1741                }
1742                Ok(())
1743            }
1744            QueryVector::Dense(_) => {
1745                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1746                // Dense vectors are always indexed
1747                Ok(())
1748            }
1749        }
1750    }
1751
1752    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1753        let value_types = self.keys.entry(key.to_string()).or_default();
1754        match value_type {
1755            MetadataValueType::Bool => {
1756                if value_types.boolean.is_none() {
1757                    value_types.boolean = self.defaults.boolean.clone();
1758                    return true;
1759                }
1760            }
1761            MetadataValueType::Int => {
1762                if value_types.int.is_none() {
1763                    value_types.int = self.defaults.int.clone();
1764                    return true;
1765                }
1766            }
1767            MetadataValueType::Float => {
1768                if value_types.float.is_none() {
1769                    value_types.float = self.defaults.float.clone();
1770                    return true;
1771                }
1772            }
1773            MetadataValueType::Str => {
1774                if value_types.string.is_none() {
1775                    value_types.string = self.defaults.string.clone();
1776                    return true;
1777                }
1778            }
1779            MetadataValueType::SparseVector => {
1780                if value_types.sparse_vector.is_none() {
1781                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
1782                    return true;
1783                }
1784            }
1785        }
1786        false
1787    }
1788
1789    // ========================================================================
1790    // BUILDER PATTERN METHODS
1791    // ========================================================================
1792
1793    /// Create an index configuration (builder pattern)
1794    ///
1795    /// This method allows fluent, chainable configuration of indexes on a schema.
1796    /// It matches the Python API's `.create_index()` method.
1797    ///
1798    /// # Arguments
1799    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
1800    /// * `config` - Index configuration to create
1801    ///
1802    /// # Returns
1803    /// `Self` for method chaining
1804    ///
1805    /// # Errors
1806    /// Returns error if:
1807    /// - Attempting to create index on special keys (`#document`, `#embedding`)
1808    /// - Invalid configuration (e.g., vector index on non-embedding key)
1809    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
1810    ///
1811    /// # Examples
1812    /// ```
1813    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space};
1814    ///
1815    /// # fn main() -> Result<(), SchemaBuilderError> {
1816    /// let schema = Schema::default()
1817    ///     .create_index(None, VectorIndexConfig {
1818    ///         space: Some(Space::Cosine),
1819    ///         embedding_function: None,
1820    ///         source_key: None,
1821    ///         hnsw: None,
1822    ///         spann: None,
1823    ///     }.into())?
1824    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
1825    /// # Ok(())
1826    /// # }
1827    /// ```
1828    pub fn create_index(
1829        mut self,
1830        key: Option<&str>,
1831        config: IndexConfig,
1832    ) -> Result<Self, SchemaBuilderError> {
1833        // Handle special cases: Vector and FTS (global configs only)
1834        match (&key, &config) {
1835            (None, IndexConfig::Vector(cfg)) => {
1836                self._set_vector_index_config_builder(cfg.clone());
1837                return Ok(self);
1838            }
1839            (None, IndexConfig::Fts(cfg)) => {
1840                self._set_fts_index_config_builder(cfg.clone());
1841                return Ok(self);
1842            }
1843            (Some(k), IndexConfig::Vector(_)) => {
1844                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
1845            }
1846            (Some(k), IndexConfig::Fts(_)) => {
1847                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
1848            }
1849            _ => {}
1850        }
1851
1852        // Validate special keys
1853        if let Some(k) = key {
1854            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1855                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1856                    key: k.to_string(),
1857                });
1858            }
1859        }
1860
1861        // Validate sparse vector requires key
1862        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
1863            return Err(SchemaBuilderError::SparseVectorRequiresKey);
1864        }
1865
1866        // Dispatch to appropriate helper
1867        match key {
1868            Some(k) => self._set_index_for_key_builder(k, config, true)?,
1869            None => self._set_index_in_defaults_builder(config, true)?,
1870        }
1871
1872        Ok(self)
1873    }
1874
1875    /// Delete/disable an index configuration (builder pattern)
1876    ///
1877    /// This method allows disabling indexes on a schema.
1878    /// It matches the Python API's `.delete_index()` method.
1879    ///
1880    /// # Arguments
1881    /// * `key` - Optional key name for per-key index. `None` applies to defaults
1882    /// * `config` - Index configuration to disable
1883    ///
1884    /// # Returns
1885    /// `Self` for method chaining
1886    ///
1887    /// # Errors
1888    /// Returns error if:
1889    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
1890    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
1891    ///
1892    /// # Examples
1893    /// ```
1894    /// use chroma_types::{Schema, StringInvertedIndexConfig};
1895    ///
1896    /// # fn main() -> Result<(), SchemaBuilderError> {
1897    /// let schema = Schema::default()
1898    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
1899    /// # Ok(())
1900    /// # }
1901    /// ```
1902    pub fn delete_index(
1903        mut self,
1904        key: Option<&str>,
1905        config: IndexConfig,
1906    ) -> Result<Self, SchemaBuilderError> {
1907        // Validate special keys
1908        if let Some(k) = key {
1909            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1910                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1911                    key: k.to_string(),
1912                });
1913            }
1914        }
1915
1916        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
1917        match &config {
1918            IndexConfig::Vector(_) => {
1919                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
1920            }
1921            IndexConfig::Fts(_) => {
1922                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
1923            }
1924            IndexConfig::SparseVector(_) => {
1925                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
1926            }
1927            _ => {}
1928        }
1929
1930        // Dispatch to appropriate helper (enabled=false)
1931        match key {
1932            Some(k) => self._set_index_for_key_builder(k, config, false)?,
1933            None => self._set_index_in_defaults_builder(config, false)?,
1934        }
1935
1936        Ok(self)
1937    }
1938
1939    /// Set vector index config globally (applies to #embedding)
1940    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
1941        // Update defaults (disabled, just config update)
1942        if let Some(float_list) = &mut self.defaults.float_list {
1943            if let Some(vector_index) = &mut float_list.vector_index {
1944                vector_index.config = config.clone();
1945            }
1946        }
1947
1948        // Update #embedding key (enabled, config update, preserve source_key=#document)
1949        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
1950            if let Some(float_list) = &mut embedding_types.float_list {
1951                if let Some(vector_index) = &mut float_list.vector_index {
1952                    let mut updated_config = config;
1953                    // Preserve source_key as #document
1954                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
1955                    vector_index.config = updated_config;
1956                }
1957            }
1958        }
1959    }
1960
1961    /// Set FTS index config globally (applies to #document)
1962    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
1963        // Update defaults (disabled, just config update)
1964        if let Some(string) = &mut self.defaults.string {
1965            if let Some(fts_index) = &mut string.fts_index {
1966                fts_index.config = config.clone();
1967            }
1968        }
1969
1970        // Update #document key (enabled, config update)
1971        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
1972            if let Some(string) = &mut document_types.string {
1973                if let Some(fts_index) = &mut string.fts_index {
1974                    fts_index.config = config;
1975                }
1976            }
1977        }
1978    }
1979
1980    /// Set index configuration for a specific key
1981    fn _set_index_for_key_builder(
1982        &mut self,
1983        key: &str,
1984        config: IndexConfig,
1985        enabled: bool,
1986    ) -> Result<(), SchemaBuilderError> {
1987        // Check for multiple sparse vector indexes BEFORE getting mutable reference
1988        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
1989            // Find existing sparse vector index
1990            let existing_key = self
1991                .keys
1992                .iter()
1993                .find(|(k, v)| {
1994                    k.as_str() != key
1995                        && v.sparse_vector
1996                            .as_ref()
1997                            .and_then(|sv| sv.sparse_vector_index.as_ref())
1998                            .map(|idx| idx.enabled)
1999                            .unwrap_or(false)
2000                })
2001                .map(|(k, _)| k.clone());
2002
2003            if let Some(existing_key) = existing_key {
2004                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2005            }
2006        }
2007
2008        // Get or create ValueTypes for this key
2009        let value_types = self.keys.entry(key.to_string()).or_default();
2010
2011        // Set the appropriate index based on config type
2012        match config {
2013            IndexConfig::Vector(_) => {
2014                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2015                    key: key.to_string(),
2016                });
2017            }
2018            IndexConfig::Fts(_) => {
2019                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2020                    key: key.to_string(),
2021                });
2022            }
2023            IndexConfig::SparseVector(cfg) => {
2024                value_types.sparse_vector = Some(SparseVectorValueType {
2025                    sparse_vector_index: Some(SparseVectorIndexType {
2026                        enabled,
2027                        config: cfg,
2028                    }),
2029                });
2030            }
2031            IndexConfig::StringInverted(cfg) => {
2032                if value_types.string.is_none() {
2033                    value_types.string = Some(StringValueType {
2034                        fts_index: None,
2035                        string_inverted_index: None,
2036                    });
2037                }
2038                if let Some(string) = &mut value_types.string {
2039                    string.string_inverted_index = Some(StringInvertedIndexType {
2040                        enabled,
2041                        config: cfg,
2042                    });
2043                }
2044            }
2045            IndexConfig::IntInverted(cfg) => {
2046                value_types.int = Some(IntValueType {
2047                    int_inverted_index: Some(IntInvertedIndexType {
2048                        enabled,
2049                        config: cfg,
2050                    }),
2051                });
2052            }
2053            IndexConfig::FloatInverted(cfg) => {
2054                value_types.float = Some(FloatValueType {
2055                    float_inverted_index: Some(FloatInvertedIndexType {
2056                        enabled,
2057                        config: cfg,
2058                    }),
2059                });
2060            }
2061            IndexConfig::BoolInverted(cfg) => {
2062                value_types.boolean = Some(BoolValueType {
2063                    bool_inverted_index: Some(BoolInvertedIndexType {
2064                        enabled,
2065                        config: cfg,
2066                    }),
2067                });
2068            }
2069        }
2070
2071        Ok(())
2072    }
2073
2074    /// Set index configuration in defaults
2075    fn _set_index_in_defaults_builder(
2076        &mut self,
2077        config: IndexConfig,
2078        enabled: bool,
2079    ) -> Result<(), SchemaBuilderError> {
2080        match config {
2081            IndexConfig::Vector(_) => {
2082                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2083                    key: "defaults".to_string(),
2084                });
2085            }
2086            IndexConfig::Fts(_) => {
2087                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2088                    key: "defaults".to_string(),
2089                });
2090            }
2091            IndexConfig::SparseVector(cfg) => {
2092                self.defaults.sparse_vector = Some(SparseVectorValueType {
2093                    sparse_vector_index: Some(SparseVectorIndexType {
2094                        enabled,
2095                        config: cfg,
2096                    }),
2097                });
2098            }
2099            IndexConfig::StringInverted(cfg) => {
2100                if self.defaults.string.is_none() {
2101                    self.defaults.string = Some(StringValueType {
2102                        fts_index: None,
2103                        string_inverted_index: None,
2104                    });
2105                }
2106                if let Some(string) = &mut self.defaults.string {
2107                    string.string_inverted_index = Some(StringInvertedIndexType {
2108                        enabled,
2109                        config: cfg,
2110                    });
2111                }
2112            }
2113            IndexConfig::IntInverted(cfg) => {
2114                self.defaults.int = Some(IntValueType {
2115                    int_inverted_index: Some(IntInvertedIndexType {
2116                        enabled,
2117                        config: cfg,
2118                    }),
2119                });
2120            }
2121            IndexConfig::FloatInverted(cfg) => {
2122                self.defaults.float = Some(FloatValueType {
2123                    float_inverted_index: Some(FloatInvertedIndexType {
2124                        enabled,
2125                        config: cfg,
2126                    }),
2127                });
2128            }
2129            IndexConfig::BoolInverted(cfg) => {
2130                self.defaults.boolean = Some(BoolValueType {
2131                    bool_inverted_index: Some(BoolInvertedIndexType {
2132                        enabled,
2133                        config: cfg,
2134                    }),
2135                });
2136            }
2137        }
2138
2139        Ok(())
2140    }
2141}
2142
2143// ============================================================================
2144// INDEX CONFIGURATION STRUCTURES
2145// ============================================================================
2146
2147#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2148#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2149#[serde(deny_unknown_fields)]
2150pub struct VectorIndexConfig {
2151    /// Vector space for similarity calculation (cosine, l2, ip)
2152    #[serde(skip_serializing_if = "Option::is_none")]
2153    pub space: Option<Space>,
2154    /// Embedding function configuration
2155    #[serde(skip_serializing_if = "Option::is_none")]
2156    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2157    /// Key to source the vector from
2158    #[serde(skip_serializing_if = "Option::is_none")]
2159    pub source_key: Option<String>,
2160    /// HNSW algorithm configuration
2161    #[serde(skip_serializing_if = "Option::is_none")]
2162    pub hnsw: Option<HnswIndexConfig>,
2163    /// SPANN algorithm configuration
2164    #[serde(skip_serializing_if = "Option::is_none")]
2165    pub spann: Option<SpannIndexConfig>,
2166}
2167
2168/// Configuration for HNSW vector index algorithm parameters
2169#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2170#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2171#[serde(deny_unknown_fields)]
2172pub struct HnswIndexConfig {
2173    #[serde(skip_serializing_if = "Option::is_none")]
2174    pub ef_construction: Option<usize>,
2175    #[serde(skip_serializing_if = "Option::is_none")]
2176    pub max_neighbors: Option<usize>,
2177    #[serde(skip_serializing_if = "Option::is_none")]
2178    pub ef_search: Option<usize>,
2179    #[serde(skip_serializing_if = "Option::is_none")]
2180    pub num_threads: Option<usize>,
2181    #[serde(skip_serializing_if = "Option::is_none")]
2182    #[validate(range(min = 2))]
2183    pub batch_size: Option<usize>,
2184    #[serde(skip_serializing_if = "Option::is_none")]
2185    #[validate(range(min = 2))]
2186    pub sync_threshold: Option<usize>,
2187    #[serde(skip_serializing_if = "Option::is_none")]
2188    pub resize_factor: Option<f64>,
2189}
2190
2191impl HnswIndexConfig {
2192    /// Check if this config has default values
2193    /// None values are considered default (not set by user)
2194    /// Note: We skip num_threads as it's variable based on available_parallelism
2195    pub fn is_default(&self) -> bool {
2196        if let Some(ef_construction) = self.ef_construction {
2197            if ef_construction != default_construction_ef() {
2198                return false;
2199            }
2200        }
2201        if let Some(max_neighbors) = self.max_neighbors {
2202            if max_neighbors != default_m() {
2203                return false;
2204            }
2205        }
2206        if let Some(ef_search) = self.ef_search {
2207            if ef_search != default_search_ef() {
2208                return false;
2209            }
2210        }
2211        if let Some(batch_size) = self.batch_size {
2212            if batch_size != default_batch_size() {
2213                return false;
2214            }
2215        }
2216        if let Some(sync_threshold) = self.sync_threshold {
2217            if sync_threshold != default_sync_threshold() {
2218                return false;
2219            }
2220        }
2221        if let Some(resize_factor) = self.resize_factor {
2222            if resize_factor != default_resize_factor() {
2223                return false;
2224            }
2225        }
2226        // Skip num_threads check as it's system-dependent
2227        true
2228    }
2229}
2230
2231/// Configuration for SPANN vector index algorithm parameters
2232#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2233#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2234#[serde(deny_unknown_fields)]
2235pub struct SpannIndexConfig {
2236    #[serde(skip_serializing_if = "Option::is_none")]
2237    #[validate(range(max = 128))]
2238    pub search_nprobe: Option<u32>,
2239    #[serde(skip_serializing_if = "Option::is_none")]
2240    #[validate(range(min = 1.0, max = 1.0))]
2241    pub search_rng_factor: Option<f32>,
2242    #[serde(skip_serializing_if = "Option::is_none")]
2243    #[validate(range(min = 5.0, max = 10.0))]
2244    pub search_rng_epsilon: Option<f32>,
2245    #[serde(skip_serializing_if = "Option::is_none")]
2246    #[validate(range(max = 8))]
2247    pub nreplica_count: Option<u32>,
2248    #[serde(skip_serializing_if = "Option::is_none")]
2249    #[validate(range(min = 1.0, max = 1.0))]
2250    pub write_rng_factor: Option<f32>,
2251    #[serde(skip_serializing_if = "Option::is_none")]
2252    #[validate(range(min = 5.0, max = 10.0))]
2253    pub write_rng_epsilon: Option<f32>,
2254    #[serde(skip_serializing_if = "Option::is_none")]
2255    #[validate(range(min = 50, max = 200))]
2256    pub split_threshold: Option<u32>,
2257    #[serde(skip_serializing_if = "Option::is_none")]
2258    #[validate(range(max = 1000))]
2259    pub num_samples_kmeans: Option<usize>,
2260    #[serde(skip_serializing_if = "Option::is_none")]
2261    #[validate(range(min = 100.0, max = 100.0))]
2262    pub initial_lambda: Option<f32>,
2263    #[serde(skip_serializing_if = "Option::is_none")]
2264    #[validate(range(max = 64))]
2265    pub reassign_neighbor_count: Option<u32>,
2266    #[serde(skip_serializing_if = "Option::is_none")]
2267    #[validate(range(min = 25, max = 100))]
2268    pub merge_threshold: Option<u32>,
2269    #[serde(skip_serializing_if = "Option::is_none")]
2270    #[validate(range(max = 8))]
2271    pub num_centers_to_merge_to: Option<u32>,
2272    #[serde(skip_serializing_if = "Option::is_none")]
2273    #[validate(range(max = 64))]
2274    pub write_nprobe: Option<u32>,
2275    #[serde(skip_serializing_if = "Option::is_none")]
2276    #[validate(range(max = 200))]
2277    pub ef_construction: Option<usize>,
2278    #[serde(skip_serializing_if = "Option::is_none")]
2279    #[validate(range(max = 200))]
2280    pub ef_search: Option<usize>,
2281    #[serde(skip_serializing_if = "Option::is_none")]
2282    #[validate(range(max = 64))]
2283    pub max_neighbors: Option<usize>,
2284}
2285
2286impl SpannIndexConfig {
2287    /// Check if this config has default values
2288    /// None values are considered default (not set by user)
2289    pub fn is_default(&self) -> bool {
2290        if let Some(search_nprobe) = self.search_nprobe {
2291            if search_nprobe != default_search_nprobe() {
2292                return false;
2293            }
2294        }
2295        if let Some(search_rng_factor) = self.search_rng_factor {
2296            if search_rng_factor != default_search_rng_factor() {
2297                return false;
2298            }
2299        }
2300        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2301            if search_rng_epsilon != default_search_rng_epsilon() {
2302                return false;
2303            }
2304        }
2305        if let Some(nreplica_count) = self.nreplica_count {
2306            if nreplica_count != default_nreplica_count() {
2307                return false;
2308            }
2309        }
2310        if let Some(write_rng_factor) = self.write_rng_factor {
2311            if write_rng_factor != default_write_rng_factor() {
2312                return false;
2313            }
2314        }
2315        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2316            if write_rng_epsilon != default_write_rng_epsilon() {
2317                return false;
2318            }
2319        }
2320        if let Some(split_threshold) = self.split_threshold {
2321            if split_threshold != default_split_threshold() {
2322                return false;
2323            }
2324        }
2325        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2326            if num_samples_kmeans != default_num_samples_kmeans() {
2327                return false;
2328            }
2329        }
2330        if let Some(initial_lambda) = self.initial_lambda {
2331            if initial_lambda != default_initial_lambda() {
2332                return false;
2333            }
2334        }
2335        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2336            if reassign_neighbor_count != default_reassign_neighbor_count() {
2337                return false;
2338            }
2339        }
2340        if let Some(merge_threshold) = self.merge_threshold {
2341            if merge_threshold != default_merge_threshold() {
2342                return false;
2343            }
2344        }
2345        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2346            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2347                return false;
2348            }
2349        }
2350        if let Some(write_nprobe) = self.write_nprobe {
2351            if write_nprobe != default_write_nprobe() {
2352                return false;
2353            }
2354        }
2355        if let Some(ef_construction) = self.ef_construction {
2356            if ef_construction != default_construction_ef_spann() {
2357                return false;
2358            }
2359        }
2360        if let Some(ef_search) = self.ef_search {
2361            if ef_search != default_search_ef_spann() {
2362                return false;
2363            }
2364        }
2365        if let Some(max_neighbors) = self.max_neighbors {
2366            if max_neighbors != default_m_spann() {
2367                return false;
2368            }
2369        }
2370        true
2371    }
2372}
2373
2374#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2375#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2376#[serde(deny_unknown_fields)]
2377pub struct SparseVectorIndexConfig {
2378    /// Embedding function configuration
2379    #[serde(skip_serializing_if = "Option::is_none")]
2380    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2381    /// Key to source the sparse vector from
2382    #[serde(skip_serializing_if = "Option::is_none")]
2383    pub source_key: Option<String>,
2384    /// Whether this embedding is BM25
2385    #[serde(skip_serializing_if = "Option::is_none")]
2386    pub bm25: Option<bool>,
2387}
2388
2389#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2390#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2391#[serde(deny_unknown_fields)]
2392pub struct FtsIndexConfig {
2393    // FTS index typically has no additional parameters
2394}
2395
2396#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2397#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2398#[serde(deny_unknown_fields)]
2399pub struct StringInvertedIndexConfig {
2400    // String inverted index typically has no additional parameters
2401}
2402
2403#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2404#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2405#[serde(deny_unknown_fields)]
2406pub struct IntInvertedIndexConfig {
2407    // Integer inverted index typically has no additional parameters
2408}
2409
2410#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2411#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2412#[serde(deny_unknown_fields)]
2413pub struct FloatInvertedIndexConfig {
2414    // Float inverted index typically has no additional parameters
2415}
2416
2417#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2418#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2419#[serde(deny_unknown_fields)]
2420pub struct BoolInvertedIndexConfig {
2421    // Boolean inverted index typically has no additional parameters
2422}
2423
2424// ============================================================================
2425// BUILDER PATTERN SUPPORT
2426// ============================================================================
2427
2428/// Union type for all index configurations (used by builder pattern)
2429#[derive(Clone, Debug)]
2430pub enum IndexConfig {
2431    Vector(VectorIndexConfig),
2432    SparseVector(SparseVectorIndexConfig),
2433    Fts(FtsIndexConfig),
2434    StringInverted(StringInvertedIndexConfig),
2435    IntInverted(IntInvertedIndexConfig),
2436    FloatInverted(FloatInvertedIndexConfig),
2437    BoolInverted(BoolInvertedIndexConfig),
2438}
2439
2440// Convenience From implementations for ergonomic usage
2441impl From<VectorIndexConfig> for IndexConfig {
2442    fn from(config: VectorIndexConfig) -> Self {
2443        IndexConfig::Vector(config)
2444    }
2445}
2446
2447impl From<SparseVectorIndexConfig> for IndexConfig {
2448    fn from(config: SparseVectorIndexConfig) -> Self {
2449        IndexConfig::SparseVector(config)
2450    }
2451}
2452
2453impl From<FtsIndexConfig> for IndexConfig {
2454    fn from(config: FtsIndexConfig) -> Self {
2455        IndexConfig::Fts(config)
2456    }
2457}
2458
2459impl From<StringInvertedIndexConfig> for IndexConfig {
2460    fn from(config: StringInvertedIndexConfig) -> Self {
2461        IndexConfig::StringInverted(config)
2462    }
2463}
2464
2465impl From<IntInvertedIndexConfig> for IndexConfig {
2466    fn from(config: IntInvertedIndexConfig) -> Self {
2467        IndexConfig::IntInverted(config)
2468    }
2469}
2470
2471impl From<FloatInvertedIndexConfig> for IndexConfig {
2472    fn from(config: FloatInvertedIndexConfig) -> Self {
2473        IndexConfig::FloatInverted(config)
2474    }
2475}
2476
2477impl From<BoolInvertedIndexConfig> for IndexConfig {
2478    fn from(config: BoolInvertedIndexConfig) -> Self {
2479        IndexConfig::BoolInverted(config)
2480    }
2481}
2482
2483#[cfg(test)]
2484mod tests {
2485    use super::*;
2486    use crate::hnsw_configuration::Space;
2487    use crate::metadata::SparseVector;
2488    use crate::{
2489        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2490    };
2491    use serde_json::json;
2492
2493    #[test]
2494    fn test_reconcile_with_defaults_none_user_schema() {
2495        // Test that when no user schema is provided, we get the default schema
2496        let result = Schema::reconcile_with_defaults(None).unwrap();
2497        let expected = Schema::new_default(KnnIndex::Spann);
2498        assert_eq!(result, expected);
2499    }
2500
2501    #[test]
2502    fn test_reconcile_with_defaults_empty_user_schema() {
2503        // Test merging with an empty user schema
2504        let user_schema = Schema {
2505            defaults: ValueTypes::default(),
2506            keys: HashMap::new(),
2507        };
2508
2509        let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2510        let expected = Schema::new_default(KnnIndex::Spann);
2511        assert_eq!(result, expected);
2512    }
2513
2514    #[test]
2515    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2516        // Test that user can override string inverted index enabled state
2517        let mut user_schema = Schema {
2518            defaults: ValueTypes::default(),
2519            keys: HashMap::new(),
2520        };
2521
2522        user_schema.defaults.string = Some(StringValueType {
2523            string_inverted_index: Some(StringInvertedIndexType {
2524                enabled: false, // Override default (true) to false
2525                config: StringInvertedIndexConfig {},
2526            }),
2527            fts_index: None,
2528        });
2529
2530        let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2531
2532        // Check that the user override took precedence
2533        assert!(
2534            !result
2535                .defaults
2536                .string
2537                .as_ref()
2538                .unwrap()
2539                .string_inverted_index
2540                .as_ref()
2541                .unwrap()
2542                .enabled
2543        );
2544        // Check that other defaults are still present
2545        assert!(result.defaults.float.is_some());
2546        assert!(result.defaults.int.is_some());
2547    }
2548
2549    #[test]
2550    fn test_reconcile_with_defaults_user_overrides_vector_config() {
2551        // Test field-level merging for vector configurations
2552        let mut user_schema = Schema {
2553            defaults: ValueTypes::default(),
2554            keys: HashMap::new(),
2555        };
2556
2557        user_schema.defaults.float_list = Some(FloatListValueType {
2558            vector_index: Some(VectorIndexType {
2559                enabled: true, // Enable vector index (default is false)
2560                config: VectorIndexConfig {
2561                    space: Some(Space::L2),                     // Override default space
2562                    embedding_function: None,                   // Will use default
2563                    source_key: Some("custom_key".to_string()), // Override default
2564                    hnsw: Some(HnswIndexConfig {
2565                        ef_construction: Some(500), // Override default
2566                        max_neighbors: None,        // Will use default
2567                        ef_search: None,            // Will use default
2568                        num_threads: None,
2569                        batch_size: None,
2570                        sync_threshold: None,
2571                        resize_factor: None,
2572                    }),
2573                    spann: None,
2574                },
2575            }),
2576        });
2577
2578        // Use HNSW defaults for this test so we have HNSW config to merge with
2579        let result = {
2580            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2581            let merged_defaults =
2582                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2583            let mut merged_keys = default_schema.keys.clone();
2584            for (key, user_value_types) in user_schema.keys {
2585                if let Some(default_value_types) = merged_keys.get(&key) {
2586                    let merged_value_types =
2587                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2588                    merged_keys.insert(key, merged_value_types);
2589                } else {
2590                    merged_keys.insert(key, user_value_types);
2591                }
2592            }
2593            Schema {
2594                defaults: merged_defaults,
2595                keys: merged_keys,
2596            }
2597        };
2598
2599        let vector_config = &result
2600            .defaults
2601            .float_list
2602            .as_ref()
2603            .unwrap()
2604            .vector_index
2605            .as_ref()
2606            .unwrap()
2607            .config;
2608
2609        // Check user overrides took precedence
2610        assert_eq!(vector_config.space, Some(Space::L2));
2611        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2612        assert_eq!(
2613            vector_config.hnsw.as_ref().unwrap().ef_construction,
2614            Some(500)
2615        );
2616
2617        // Check defaults were preserved for unspecified fields
2618        assert_eq!(vector_config.embedding_function, None);
2619        // Since user provided HNSW config, the default max_neighbors should be merged in
2620        assert_eq!(
2621            vector_config.hnsw.as_ref().unwrap().max_neighbors,
2622            Some(default_m())
2623        );
2624    }
2625
2626    #[test]
2627    fn test_reconcile_with_defaults_keys() {
2628        // Test that key overrides are properly merged
2629        let mut user_schema = Schema {
2630            defaults: ValueTypes::default(),
2631            keys: HashMap::new(),
2632        };
2633
2634        // Add a custom key override
2635        let custom_key_types = ValueTypes {
2636            string: Some(StringValueType {
2637                fts_index: Some(FtsIndexType {
2638                    enabled: true,
2639                    config: FtsIndexConfig {},
2640                }),
2641                string_inverted_index: Some(StringInvertedIndexType {
2642                    enabled: false,
2643                    config: StringInvertedIndexConfig {},
2644                }),
2645            }),
2646            ..Default::default()
2647        };
2648        user_schema
2649            .keys
2650            .insert("custom_key".to_string(), custom_key_types);
2651
2652        let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2653
2654        // Check that default key overrides are preserved
2655        assert!(result.keys.contains_key(EMBEDDING_KEY));
2656        assert!(result.keys.contains_key(DOCUMENT_KEY));
2657
2658        // Check that user key override was added
2659        assert!(result.keys.contains_key("custom_key"));
2660        let custom_override = result.keys.get("custom_key").unwrap();
2661        assert!(
2662            custom_override
2663                .string
2664                .as_ref()
2665                .unwrap()
2666                .fts_index
2667                .as_ref()
2668                .unwrap()
2669                .enabled
2670        );
2671    }
2672
2673    #[test]
2674    fn test_reconcile_with_defaults_override_existing_key() {
2675        // Test overriding an existing key override (like #embedding)
2676        let mut user_schema = Schema {
2677            defaults: ValueTypes::default(),
2678            keys: HashMap::new(),
2679        };
2680
2681        // Override the #embedding key with custom settings
2682        let embedding_override = ValueTypes {
2683            float_list: Some(FloatListValueType {
2684                vector_index: Some(VectorIndexType {
2685                    enabled: false, // Override default enabled=true to false
2686                    config: VectorIndexConfig {
2687                        space: Some(Space::Ip), // Override default space
2688                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2689                        source_key: Some("custom_embedding_key".to_string()),
2690                        hnsw: None,
2691                        spann: None,
2692                    },
2693                }),
2694            }),
2695            ..Default::default()
2696        };
2697        user_schema
2698            .keys
2699            .insert(EMBEDDING_KEY.to_string(), embedding_override);
2700
2701        let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2702
2703        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
2704        let vector_config = &embedding_config
2705            .float_list
2706            .as_ref()
2707            .unwrap()
2708            .vector_index
2709            .as_ref()
2710            .unwrap();
2711
2712        // Check user overrides took precedence
2713        assert!(!vector_config.enabled);
2714        assert_eq!(vector_config.config.space, Some(Space::Ip));
2715        assert_eq!(
2716            vector_config.config.source_key,
2717            Some("custom_embedding_key".to_string())
2718        );
2719    }
2720
2721    #[test]
2722    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
2723        let collection_config = InternalCollectionConfiguration {
2724            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2725                space: Space::Cosine,
2726                ef_construction: 128,
2727                ef_search: 96,
2728                max_neighbors: 42,
2729                num_threads: 8,
2730                resize_factor: 1.5,
2731                sync_threshold: 2_000,
2732                batch_size: 256,
2733            }),
2734            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2735                EmbeddingFunctionNewConfiguration {
2736                    name: "custom".to_string(),
2737                    config: json!({"alpha": 1}),
2738                },
2739            )),
2740        };
2741
2742        let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2743        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2744
2745        assert_eq!(reconstructed, collection_config);
2746    }
2747
2748    #[test]
2749    fn test_convert_schema_to_collection_config_spann_roundtrip() {
2750        let spann_config = InternalSpannConfiguration {
2751            space: Space::Cosine,
2752            search_nprobe: 11,
2753            search_rng_factor: 1.7,
2754            write_nprobe: 5,
2755            nreplica_count: 3,
2756            split_threshold: 150,
2757            merge_threshold: 80,
2758            ef_construction: 120,
2759            ef_search: 90,
2760            max_neighbors: 40,
2761            ..Default::default()
2762        };
2763
2764        let collection_config = InternalCollectionConfiguration {
2765            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
2766            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2767                EmbeddingFunctionNewConfiguration {
2768                    name: "custom".to_string(),
2769                    config: json!({"beta": true}),
2770                },
2771            )),
2772        };
2773
2774        let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2775        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2776
2777        assert_eq!(reconstructed, collection_config);
2778    }
2779
2780    #[test]
2781    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
2782        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2783        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
2784            if let Some(float_list) = &mut embedding.float_list {
2785                if let Some(vector_index) = &mut float_list.vector_index {
2786                    vector_index.config.spann = Some(SpannIndexConfig {
2787                        search_nprobe: Some(1),
2788                        search_rng_factor: Some(1.0),
2789                        search_rng_epsilon: Some(0.1),
2790                        nreplica_count: Some(1),
2791                        write_rng_factor: Some(1.0),
2792                        write_rng_epsilon: Some(0.1),
2793                        split_threshold: Some(100),
2794                        num_samples_kmeans: Some(10),
2795                        initial_lambda: Some(0.5),
2796                        reassign_neighbor_count: Some(10),
2797                        merge_threshold: Some(50),
2798                        num_centers_to_merge_to: Some(3),
2799                        write_nprobe: Some(1),
2800                        ef_construction: Some(50),
2801                        ef_search: Some(40),
2802                        max_neighbors: Some(20),
2803                    });
2804                }
2805            }
2806        }
2807
2808        let result = InternalCollectionConfiguration::try_from(&schema);
2809        assert!(result.is_err());
2810    }
2811
2812    #[test]
2813    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
2814        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2815        let before = schema.clone();
2816        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
2817        assert!(!modified);
2818        assert_eq!(schema, before);
2819    }
2820
2821    #[test]
2822    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
2823        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2824        assert!(!schema.keys.contains_key("custom_field"));
2825
2826        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2827
2828        assert!(modified);
2829        let entry = schema
2830            .keys
2831            .get("custom_field")
2832            .expect("expected new key override to be inserted");
2833        assert_eq!(entry.boolean, schema.defaults.boolean);
2834        assert!(entry.string.is_none());
2835        assert!(entry.int.is_none());
2836        assert!(entry.float.is_none());
2837        assert!(entry.float_list.is_none());
2838        assert!(entry.sparse_vector.is_none());
2839    }
2840
2841    #[test]
2842    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
2843        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2844        let initial_len = schema.keys.len();
2845        schema.keys.insert(
2846            "custom_field".to_string(),
2847            ValueTypes {
2848                string: schema.defaults.string.clone(),
2849                ..Default::default()
2850            },
2851        );
2852
2853        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2854
2855        assert!(modified);
2856        assert_eq!(schema.keys.len(), initial_len + 1);
2857        let entry = schema
2858            .keys
2859            .get("custom_field")
2860            .expect("expected key override to exist after ensure call");
2861        assert!(entry.string.is_some());
2862        assert_eq!(entry.boolean, schema.defaults.boolean);
2863    }
2864
2865    #[test]
2866    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
2867        let schema = Schema::new_default(KnnIndex::Spann);
2868        let result = schema.is_knn_key_indexing_enabled(
2869            "custom_sparse",
2870            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2871        );
2872
2873        let err = result.expect_err("expected indexing disabled error");
2874        match err {
2875            FilterValidationError::IndexingDisabled { key, value_type } => {
2876                assert_eq!(key, "custom_sparse");
2877                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
2878            }
2879            other => panic!("unexpected error variant: {other:?}"),
2880        }
2881    }
2882
2883    #[test]
2884    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
2885        let mut schema = Schema::new_default(KnnIndex::Spann);
2886        schema.keys.insert(
2887            "sparse_enabled".to_string(),
2888            ValueTypes {
2889                sparse_vector: Some(SparseVectorValueType {
2890                    sparse_vector_index: Some(SparseVectorIndexType {
2891                        enabled: true,
2892                        config: SparseVectorIndexConfig {
2893                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2894                            source_key: None,
2895                            bm25: None,
2896                        },
2897                    }),
2898                }),
2899                ..Default::default()
2900            },
2901        );
2902
2903        let result = schema.is_knn_key_indexing_enabled(
2904            "sparse_enabled",
2905            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2906        );
2907
2908        assert!(result.is_ok());
2909    }
2910
2911    #[test]
2912    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2913        let schema = Schema::new_default(KnnIndex::Spann);
2914        let result = schema.is_knn_key_indexing_enabled(
2915            EMBEDDING_KEY,
2916            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2917        );
2918
2919        assert!(result.is_ok());
2920    }
2921
2922    #[test]
2923    fn test_merge_hnsw_configs_field_level() {
2924        // Test field-level merging for HNSW configurations
2925        let default_hnsw = HnswIndexConfig {
2926            ef_construction: Some(200),
2927            max_neighbors: Some(16),
2928            ef_search: Some(10),
2929            num_threads: Some(4),
2930            batch_size: Some(100),
2931            sync_threshold: Some(1000),
2932            resize_factor: Some(1.2),
2933        };
2934
2935        let user_hnsw = HnswIndexConfig {
2936            ef_construction: Some(300), // Override
2937            max_neighbors: None,        // Will use default
2938            ef_search: Some(20),        // Override
2939            num_threads: None,          // Will use default
2940            batch_size: None,           // Will use default
2941            sync_threshold: Some(2000), // Override
2942            resize_factor: None,        // Will use default
2943        };
2944
2945        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2946
2947        // Check user overrides
2948        assert_eq!(result.ef_construction, Some(300));
2949        assert_eq!(result.ef_search, Some(20));
2950        assert_eq!(result.sync_threshold, Some(2000));
2951
2952        // Check defaults preserved
2953        assert_eq!(result.max_neighbors, Some(16));
2954        assert_eq!(result.num_threads, Some(4));
2955        assert_eq!(result.batch_size, Some(100));
2956        assert_eq!(result.resize_factor, Some(1.2));
2957    }
2958
2959    #[test]
2960    fn test_merge_spann_configs_field_level() {
2961        // Test field-level merging for SPANN configurations
2962        let default_spann = SpannIndexConfig {
2963            search_nprobe: Some(10),
2964            search_rng_factor: Some(1.0),  // Must be exactly 1.0
2965            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
2966            nreplica_count: Some(3),
2967            write_rng_factor: Some(1.0),  // Must be exactly 1.0
2968            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
2969            split_threshold: Some(100),   // Must be 50-200
2970            num_samples_kmeans: Some(100),
2971            initial_lambda: Some(100.0), // Must be exactly 100.0
2972            reassign_neighbor_count: Some(50),
2973            merge_threshold: Some(50),        // Must be 25-100
2974            num_centers_to_merge_to: Some(4), // Max is 8
2975            write_nprobe: Some(5),
2976            ef_construction: Some(100),
2977            ef_search: Some(10),
2978            max_neighbors: Some(16),
2979        };
2980
2981        let user_spann = SpannIndexConfig {
2982            search_nprobe: Some(20),       // Override
2983            search_rng_factor: None,       // Will use default
2984            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
2985            nreplica_count: None,          // Will use default
2986            write_rng_factor: None,
2987            write_rng_epsilon: None,
2988            split_threshold: Some(150), // Override (valid: 50-200)
2989            num_samples_kmeans: None,
2990            initial_lambda: None,
2991            reassign_neighbor_count: None,
2992            merge_threshold: None,
2993            num_centers_to_merge_to: None,
2994            write_nprobe: None,
2995            ef_construction: None,
2996            ef_search: None,
2997            max_neighbors: None,
2998        };
2999
3000        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3001
3002        // Check user overrides
3003        assert_eq!(result.search_nprobe, Some(20));
3004        assert_eq!(result.search_rng_epsilon, Some(8.0));
3005        assert_eq!(result.split_threshold, Some(150));
3006
3007        // Check defaults preserved
3008        assert_eq!(result.search_rng_factor, Some(1.0));
3009        assert_eq!(result.nreplica_count, Some(3));
3010        assert_eq!(result.initial_lambda, Some(100.0));
3011    }
3012
3013    #[test]
3014    fn test_spann_index_config_into_internal_configuration() {
3015        let config = SpannIndexConfig {
3016            search_nprobe: Some(33),
3017            search_rng_factor: Some(1.2),
3018            search_rng_epsilon: None,
3019            nreplica_count: None,
3020            write_rng_factor: Some(1.5),
3021            write_rng_epsilon: None,
3022            split_threshold: Some(75),
3023            num_samples_kmeans: None,
3024            initial_lambda: Some(0.9),
3025            reassign_neighbor_count: Some(40),
3026            merge_threshold: None,
3027            num_centers_to_merge_to: Some(4),
3028            write_nprobe: Some(60),
3029            ef_construction: Some(180),
3030            ef_search: Some(170),
3031            max_neighbors: Some(32),
3032        };
3033
3034        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3035        assert_eq!(with_space.space, Space::Cosine);
3036        assert_eq!(with_space.search_nprobe, 33);
3037        assert_eq!(with_space.search_rng_factor, 1.2);
3038        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3039        assert_eq!(with_space.write_rng_factor, 1.5);
3040        assert_eq!(with_space.write_nprobe, 60);
3041        assert_eq!(with_space.ef_construction, 180);
3042        assert_eq!(with_space.ef_search, 170);
3043        assert_eq!(with_space.max_neighbors, 32);
3044        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3045
3046        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3047        assert_eq!(default_space_config.space, default_space());
3048    }
3049
3050    #[test]
3051    fn test_merge_string_type_combinations() {
3052        // Test all combinations of default and user StringValueType
3053
3054        // Both Some - should merge
3055        let default = StringValueType {
3056            string_inverted_index: Some(StringInvertedIndexType {
3057                enabled: true,
3058                config: StringInvertedIndexConfig {},
3059            }),
3060            fts_index: Some(FtsIndexType {
3061                enabled: false,
3062                config: FtsIndexConfig {},
3063            }),
3064        };
3065
3066        let user = StringValueType {
3067            string_inverted_index: Some(StringInvertedIndexType {
3068                enabled: false, // Override
3069                config: StringInvertedIndexConfig {},
3070            }),
3071            fts_index: None, // Will use default
3072        };
3073
3074        let result = Schema::merge_string_type(Some(&default), Some(&user))
3075            .unwrap()
3076            .unwrap();
3077        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3078        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3079
3080        // Default Some, User None - should return default
3081        let result = Schema::merge_string_type(Some(&default), None)
3082            .unwrap()
3083            .unwrap();
3084        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3085
3086        // Default None, User Some - should return user
3087        let result = Schema::merge_string_type(None, Some(&user))
3088            .unwrap()
3089            .unwrap();
3090        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3091
3092        // Both None - should return None
3093        let result = Schema::merge_string_type(None, None).unwrap();
3094        assert!(result.is_none());
3095    }
3096
3097    #[test]
3098    fn test_merge_vector_index_config_comprehensive() {
3099        // Test comprehensive vector index config merging
3100        let default_config = VectorIndexConfig {
3101            space: Some(Space::Cosine),
3102            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3103            source_key: Some("default_key".to_string()),
3104            hnsw: Some(HnswIndexConfig {
3105                ef_construction: Some(200),
3106                max_neighbors: Some(16),
3107                ef_search: Some(10),
3108                num_threads: Some(4),
3109                batch_size: Some(100),
3110                sync_threshold: Some(1000),
3111                resize_factor: Some(1.2),
3112            }),
3113            spann: None,
3114        };
3115
3116        let user_config = VectorIndexConfig {
3117            space: Some(Space::L2),                   // Override
3118            embedding_function: None,                 // Will use default
3119            source_key: Some("user_key".to_string()), // Override
3120            hnsw: Some(HnswIndexConfig {
3121                ef_construction: Some(300), // Override
3122                max_neighbors: None,        // Will use default
3123                ef_search: None,            // Will use default
3124                num_threads: None,
3125                batch_size: None,
3126                sync_threshold: None,
3127                resize_factor: None,
3128            }),
3129            spann: Some(SpannIndexConfig {
3130                search_nprobe: Some(15),
3131                search_rng_factor: None,
3132                search_rng_epsilon: None,
3133                nreplica_count: None,
3134                write_rng_factor: None,
3135                write_rng_epsilon: None,
3136                split_threshold: None,
3137                num_samples_kmeans: None,
3138                initial_lambda: None,
3139                reassign_neighbor_count: None,
3140                merge_threshold: None,
3141                num_centers_to_merge_to: None,
3142                write_nprobe: None,
3143                ef_construction: None,
3144                ef_search: None,
3145                max_neighbors: None,
3146            }), // Add SPANN config
3147        };
3148
3149        let result = Schema::merge_vector_index_config(&default_config, &user_config);
3150
3151        // Check field-level merging
3152        assert_eq!(result.space, Some(Space::L2)); // User override
3153        assert_eq!(
3154            result.embedding_function,
3155            Some(EmbeddingFunctionConfiguration::Legacy)
3156        ); // Default preserved
3157        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3158
3159        // Check HNSW merging
3160        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3161        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3162
3163        // Check SPANN was added from user
3164        assert!(result.spann.is_some());
3165        assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
3166    }
3167
3168    #[test]
3169    fn test_merge_sparse_vector_index_config() {
3170        // Test sparse vector index config merging
3171        let default_config = SparseVectorIndexConfig {
3172            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3173            source_key: Some("default_sparse_key".to_string()),
3174            bm25: None,
3175        };
3176
3177        let user_config = SparseVectorIndexConfig {
3178            embedding_function: None,                        // Will use default
3179            source_key: Some("user_sparse_key".to_string()), // Override
3180            bm25: None,
3181        };
3182
3183        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3184
3185        // Check user override
3186        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3187        // Check default preserved
3188        assert_eq!(
3189            result.embedding_function,
3190            Some(EmbeddingFunctionConfiguration::Legacy)
3191        );
3192    }
3193
3194    #[test]
3195    fn test_complex_nested_merging_scenario() {
3196        // Test a complex scenario with multiple levels of merging
3197        let mut user_schema = Schema {
3198            defaults: ValueTypes::default(),
3199            keys: HashMap::new(),
3200        };
3201
3202        // Set up complex user defaults
3203        user_schema.defaults.string = Some(StringValueType {
3204            string_inverted_index: Some(StringInvertedIndexType {
3205                enabled: false,
3206                config: StringInvertedIndexConfig {},
3207            }),
3208            fts_index: Some(FtsIndexType {
3209                enabled: true,
3210                config: FtsIndexConfig {},
3211            }),
3212        });
3213
3214        user_schema.defaults.float_list = Some(FloatListValueType {
3215            vector_index: Some(VectorIndexType {
3216                enabled: true,
3217                config: VectorIndexConfig {
3218                    space: Some(Space::Ip),
3219                    embedding_function: None, // Will use default
3220                    source_key: Some("custom_vector_key".to_string()),
3221                    hnsw: Some(HnswIndexConfig {
3222                        ef_construction: Some(400),
3223                        max_neighbors: Some(32),
3224                        ef_search: None, // Will use default
3225                        num_threads: None,
3226                        batch_size: None,
3227                        sync_threshold: None,
3228                        resize_factor: None,
3229                    }),
3230                    spann: None,
3231                },
3232            }),
3233        });
3234
3235        // Set up key overrides
3236        let custom_key_override = ValueTypes {
3237            string: Some(StringValueType {
3238                fts_index: Some(FtsIndexType {
3239                    enabled: true,
3240                    config: FtsIndexConfig {},
3241                }),
3242                string_inverted_index: None,
3243            }),
3244            ..Default::default()
3245        };
3246        user_schema
3247            .keys
3248            .insert("custom_field".to_string(), custom_key_override);
3249
3250        // Use HNSW defaults for this test so we have HNSW config to merge with
3251        let result = {
3252            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3253            let merged_defaults =
3254                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
3255            let mut merged_keys = default_schema.keys.clone();
3256            for (key, user_value_types) in user_schema.keys {
3257                if let Some(default_value_types) = merged_keys.get(&key) {
3258                    let merged_value_types =
3259                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
3260                    merged_keys.insert(key, merged_value_types);
3261                } else {
3262                    merged_keys.insert(key, user_value_types);
3263                }
3264            }
3265            Schema {
3266                defaults: merged_defaults,
3267                keys: merged_keys,
3268            }
3269        };
3270
3271        // Verify complex merging worked correctly
3272
3273        // Check defaults merging
3274        assert!(
3275            !result
3276                .defaults
3277                .string
3278                .as_ref()
3279                .unwrap()
3280                .string_inverted_index
3281                .as_ref()
3282                .unwrap()
3283                .enabled
3284        );
3285        assert!(
3286            result
3287                .defaults
3288                .string
3289                .as_ref()
3290                .unwrap()
3291                .fts_index
3292                .as_ref()
3293                .unwrap()
3294                .enabled
3295        );
3296
3297        let vector_config = &result
3298            .defaults
3299            .float_list
3300            .as_ref()
3301            .unwrap()
3302            .vector_index
3303            .as_ref()
3304            .unwrap()
3305            .config;
3306        assert_eq!(vector_config.space, Some(Space::Ip));
3307        assert_eq!(vector_config.embedding_function, None); // Default preserved
3308        assert_eq!(
3309            vector_config.source_key,
3310            Some("custom_vector_key".to_string())
3311        );
3312        assert_eq!(
3313            vector_config.hnsw.as_ref().unwrap().ef_construction,
3314            Some(400)
3315        );
3316        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3317        assert_eq!(
3318            vector_config.hnsw.as_ref().unwrap().ef_search,
3319            Some(default_search_ef())
3320        ); // Default preserved
3321
3322        // Check key overrides
3323        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
3324        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
3325        assert!(result.keys.contains_key("custom_field")); // User added
3326
3327        let custom_override = result.keys.get("custom_field").unwrap();
3328        assert!(
3329            custom_override
3330                .string
3331                .as_ref()
3332                .unwrap()
3333                .fts_index
3334                .as_ref()
3335                .unwrap()
3336                .enabled
3337        );
3338        assert!(custom_override
3339            .string
3340            .as_ref()
3341            .unwrap()
3342            .string_inverted_index
3343            .is_none());
3344    }
3345
3346    #[test]
3347    fn test_reconcile_with_collection_config_default_config() {
3348        // Test that when collection config is default, schema is returned as-is
3349        let schema = Schema::new_default(KnnIndex::Hnsw);
3350        let collection_config = InternalCollectionConfiguration::default_hnsw();
3351
3352        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3353        assert_eq!(result, schema);
3354    }
3355
3356    #[test]
3357    fn test_reconcile_with_collection_config_both_non_default() {
3358        // Test that when both schema and collection config are non-default, it returns an error
3359        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3360        schema.defaults.string = Some(StringValueType {
3361            fts_index: Some(FtsIndexType {
3362                enabled: true,
3363                config: FtsIndexConfig {},
3364            }),
3365            string_inverted_index: None,
3366        });
3367
3368        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
3369        // Make collection config non-default by changing a parameter
3370        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
3371        {
3372            hnsw_config.ef_construction = 500; // Non-default value
3373        }
3374
3375        // Use reconcile_schema_and_config which has the early validation
3376        let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&collection_config));
3377        assert!(result.is_err());
3378        assert!(matches!(
3379            result.unwrap_err(),
3380            SchemaError::ConfigAndSchemaConflict
3381        ));
3382    }
3383
3384    #[test]
3385    fn test_reconcile_with_collection_config_hnsw_override() {
3386        // Test that non-default HNSW collection config overrides default schema
3387        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
3388
3389        let collection_config = InternalCollectionConfiguration {
3390            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3391                ef_construction: 300,
3392                max_neighbors: 32,
3393                ef_search: 50,
3394                num_threads: 8,
3395                batch_size: 200,
3396                sync_threshold: 2000,
3397                resize_factor: 1.5,
3398                space: Space::L2,
3399            }),
3400            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3401        };
3402
3403        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3404
3405        // Check that #embedding key override was created with the collection config settings
3406        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3407        let vector_index = embedding_override
3408            .float_list
3409            .as_ref()
3410            .unwrap()
3411            .vector_index
3412            .as_ref()
3413            .unwrap();
3414
3415        assert!(vector_index.enabled);
3416        assert_eq!(vector_index.config.space, Some(Space::L2));
3417        assert_eq!(
3418            vector_index.config.embedding_function,
3419            Some(EmbeddingFunctionConfiguration::Legacy)
3420        );
3421        assert_eq!(
3422            vector_index.config.source_key,
3423            Some(DOCUMENT_KEY.to_string())
3424        );
3425
3426        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
3427        assert_eq!(hnsw_config.ef_construction, Some(300));
3428        assert_eq!(hnsw_config.max_neighbors, Some(32));
3429        assert_eq!(hnsw_config.ef_search, Some(50));
3430        assert_eq!(hnsw_config.num_threads, Some(8));
3431        assert_eq!(hnsw_config.batch_size, Some(200));
3432        assert_eq!(hnsw_config.sync_threshold, Some(2000));
3433        assert_eq!(hnsw_config.resize_factor, Some(1.5));
3434
3435        assert!(vector_index.config.spann.is_none());
3436    }
3437
3438    #[test]
3439    fn test_reconcile_with_collection_config_spann_override() {
3440        // Test that non-default SPANN collection config overrides default schema
3441        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
3442
3443        let collection_config = InternalCollectionConfiguration {
3444            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
3445                search_nprobe: 20,
3446                search_rng_factor: 3.0,
3447                search_rng_epsilon: 0.2,
3448                nreplica_count: 5,
3449                write_rng_factor: 2.0,
3450                write_rng_epsilon: 0.1,
3451                split_threshold: 2000,
3452                num_samples_kmeans: 200,
3453                initial_lambda: 0.8,
3454                reassign_neighbor_count: 100,
3455                merge_threshold: 800,
3456                num_centers_to_merge_to: 20,
3457                write_nprobe: 10,
3458                ef_construction: 400,
3459                ef_search: 60,
3460                max_neighbors: 24,
3461                space: Space::Cosine,
3462            }),
3463            embedding_function: None,
3464        };
3465
3466        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3467
3468        // Check that #embedding key override was created with the collection config settings
3469        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3470        let vector_index = embedding_override
3471            .float_list
3472            .as_ref()
3473            .unwrap()
3474            .vector_index
3475            .as_ref()
3476            .unwrap();
3477
3478        assert!(vector_index.enabled);
3479        assert_eq!(vector_index.config.space, Some(Space::Cosine));
3480        assert_eq!(vector_index.config.embedding_function, None);
3481        assert_eq!(
3482            vector_index.config.source_key,
3483            Some(DOCUMENT_KEY.to_string())
3484        );
3485
3486        assert!(vector_index.config.hnsw.is_none());
3487
3488        let spann_config = vector_index.config.spann.as_ref().unwrap();
3489        assert_eq!(spann_config.search_nprobe, Some(20));
3490        assert_eq!(spann_config.search_rng_factor, Some(3.0));
3491        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
3492        assert_eq!(spann_config.nreplica_count, Some(5));
3493        assert_eq!(spann_config.write_rng_factor, Some(2.0));
3494        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
3495        assert_eq!(spann_config.split_threshold, Some(2000));
3496        assert_eq!(spann_config.num_samples_kmeans, Some(200));
3497        assert_eq!(spann_config.initial_lambda, Some(0.8));
3498        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
3499        assert_eq!(spann_config.merge_threshold, Some(800));
3500        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
3501        assert_eq!(spann_config.write_nprobe, Some(10));
3502        assert_eq!(spann_config.ef_construction, Some(400));
3503        assert_eq!(spann_config.ef_search, Some(60));
3504        assert_eq!(spann_config.max_neighbors, Some(24));
3505    }
3506
3507    #[test]
3508    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
3509        // Test that collection config updates BOTH defaults.float_list.vector_index
3510        // AND keys["embedding"].float_list.vector_index
3511        let schema = Schema::new_default(KnnIndex::Hnsw);
3512
3513        let collection_config = InternalCollectionConfiguration {
3514            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3515                ef_construction: 300,
3516                max_neighbors: 32,
3517                ef_search: 50,
3518                num_threads: 8,
3519                batch_size: 200,
3520                sync_threshold: 2000,
3521                resize_factor: 1.5,
3522                space: Space::L2,
3523            }),
3524            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3525        };
3526
3527        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3528
3529        // Check that defaults.float_list.vector_index was updated
3530        let defaults_vector_index = result
3531            .defaults
3532            .float_list
3533            .as_ref()
3534            .unwrap()
3535            .vector_index
3536            .as_ref()
3537            .unwrap();
3538
3539        // Should be disabled in defaults (template for new keys)
3540        assert!(!defaults_vector_index.enabled);
3541        // But config should be updated
3542        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
3543        assert_eq!(
3544            defaults_vector_index.config.embedding_function,
3545            Some(EmbeddingFunctionConfiguration::Legacy)
3546        );
3547        assert_eq!(
3548            defaults_vector_index.config.source_key,
3549            Some(DOCUMENT_KEY.to_string())
3550        );
3551        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
3552        assert_eq!(defaults_hnsw.ef_construction, Some(300));
3553        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
3554
3555        // Check that #embedding key override was also updated
3556        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3557        let embedding_vector_index = embedding_override
3558            .float_list
3559            .as_ref()
3560            .unwrap()
3561            .vector_index
3562            .as_ref()
3563            .unwrap();
3564
3565        // Should be enabled on #embedding
3566        assert!(embedding_vector_index.enabled);
3567        // Config should match defaults
3568        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
3569        assert_eq!(
3570            embedding_vector_index.config.embedding_function,
3571            Some(EmbeddingFunctionConfiguration::Legacy)
3572        );
3573        assert_eq!(
3574            embedding_vector_index.config.source_key,
3575            Some(DOCUMENT_KEY.to_string())
3576        );
3577        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
3578        assert_eq!(embedding_hnsw.ef_construction, Some(300));
3579        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
3580    }
3581
3582    #[test]
3583    fn test_is_schema_default() {
3584        // Test that actual default schemas are correctly identified
3585        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
3586        assert!(default_hnsw_schema.is_default());
3587
3588        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
3589        assert!(default_spann_schema.is_default());
3590
3591        // Test that a modified default schema is not considered default
3592        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
3593        // Make a clear modification - change the string inverted index enabled state
3594        if let Some(ref mut string_type) = modified_schema.defaults.string {
3595            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
3596                string_inverted.enabled = false; // Default is true, so this should make it non-default
3597            }
3598        }
3599        assert!(!modified_schema.is_default());
3600
3601        // Test that schema with additional key overrides is not default
3602        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
3603        schema_with_extra_overrides
3604            .keys
3605            .insert("custom_key".to_string(), ValueTypes::default());
3606        assert!(!schema_with_extra_overrides.is_default());
3607    }
3608
3609    #[test]
3610    fn test_add_merges_keys_by_value_type() {
3611        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3612        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3613
3614        let string_override = ValueTypes {
3615            string: Some(StringValueType {
3616                string_inverted_index: Some(StringInvertedIndexType {
3617                    enabled: true,
3618                    config: StringInvertedIndexConfig {},
3619                }),
3620                fts_index: None,
3621            }),
3622            ..Default::default()
3623        };
3624        schema_a
3625            .keys
3626            .insert("custom_field".to_string(), string_override);
3627
3628        let float_override = ValueTypes {
3629            float: Some(FloatValueType {
3630                float_inverted_index: Some(FloatInvertedIndexType {
3631                    enabled: true,
3632                    config: FloatInvertedIndexConfig {},
3633                }),
3634            }),
3635            ..Default::default()
3636        };
3637        schema_b
3638            .keys
3639            .insert("custom_field".to_string(), float_override);
3640
3641        let merged = schema_a.merge(&schema_b).unwrap();
3642        let merged_override = merged.keys.get("custom_field").unwrap();
3643
3644        assert!(merged_override.string.is_some());
3645        assert!(merged_override.float.is_some());
3646        assert!(
3647            merged_override
3648                .string
3649                .as_ref()
3650                .unwrap()
3651                .string_inverted_index
3652                .as_ref()
3653                .unwrap()
3654                .enabled
3655        );
3656        assert!(
3657            merged_override
3658                .float
3659                .as_ref()
3660                .unwrap()
3661                .float_inverted_index
3662                .as_ref()
3663                .unwrap()
3664                .enabled
3665        );
3666    }
3667
3668    #[test]
3669    fn test_add_rejects_different_defaults() {
3670        let schema_a = Schema::new_default(KnnIndex::Hnsw);
3671        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3672
3673        if let Some(string_type) = schema_b.defaults.string.as_mut() {
3674            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
3675                string_index.enabled = false;
3676            }
3677        }
3678
3679        let err = schema_a.merge(&schema_b).unwrap_err();
3680        assert!(matches!(err, SchemaError::DefaultsMismatch));
3681    }
3682
3683    #[test]
3684    fn test_add_detects_conflicting_value_type_configuration() {
3685        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3686        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3687
3688        let string_override_enabled = ValueTypes {
3689            string: Some(StringValueType {
3690                string_inverted_index: Some(StringInvertedIndexType {
3691                    enabled: true,
3692                    config: StringInvertedIndexConfig {},
3693                }),
3694                fts_index: None,
3695            }),
3696            ..Default::default()
3697        };
3698        schema_a
3699            .keys
3700            .insert("custom_field".to_string(), string_override_enabled);
3701
3702        let string_override_disabled = ValueTypes {
3703            string: Some(StringValueType {
3704                string_inverted_index: Some(StringInvertedIndexType {
3705                    enabled: false,
3706                    config: StringInvertedIndexConfig {},
3707                }),
3708                fts_index: None,
3709            }),
3710            ..Default::default()
3711        };
3712        schema_b
3713            .keys
3714            .insert("custom_field".to_string(), string_override_disabled);
3715
3716        let err = schema_a.merge(&schema_b).unwrap_err();
3717        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
3718    }
3719
3720    // TODO(Sanket): Remove this test once deployed
3721    #[test]
3722    fn test_backward_compatibility_aliases() {
3723        // Test that old format with # and $ prefixes and key_overrides can be deserialized
3724        let old_format_json = r###"{
3725            "defaults": {
3726                "#string": {
3727                    "$fts_index": {
3728                        "enabled": true,
3729                        "config": {}
3730                    }
3731                },
3732                "#int": {
3733                    "$int_inverted_index": {
3734                        "enabled": true,
3735                        "config": {}
3736                    }
3737                },
3738                "#float_list": {
3739                    "$vector_index": {
3740                        "enabled": true,
3741                        "config": {
3742                            "spann": {
3743                                "search_nprobe": 10
3744                            }
3745                        }
3746                    }
3747                }
3748            },
3749            "key_overrides": {
3750                "#document": {
3751                    "#string": {
3752                        "$fts_index": {
3753                            "enabled": false,
3754                            "config": {}
3755                        }
3756                    }
3757                }
3758            }
3759        }"###;
3760
3761        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
3762
3763        // Test that new format without prefixes and keys can be deserialized
3764        let new_format_json = r###"{
3765            "defaults": {
3766                "string": {
3767                    "fts_index": {
3768                        "enabled": true,
3769                        "config": {}
3770                    }
3771                },
3772                "int": {
3773                    "int_inverted_index": {
3774                        "enabled": true,
3775                        "config": {}
3776                    }
3777                },
3778                "float_list": {
3779                    "vector_index": {
3780                        "enabled": true,
3781                        "config": {
3782                            "spann": {
3783                                "search_nprobe": 10
3784                            }
3785                        }
3786                    }
3787                }
3788            },
3789            "keys": {
3790                "#document": {
3791                    "string": {
3792                        "fts_index": {
3793                            "enabled": false,
3794                            "config": {}
3795                        }
3796                    }
3797                }
3798            }
3799        }"###;
3800
3801        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
3802
3803        // Both should deserialize to the same structure
3804        assert_eq!(schema_from_old, schema_from_new);
3805
3806        // Verify the deserialized content is correct
3807        assert!(schema_from_old.defaults.string.is_some());
3808        assert!(schema_from_old
3809            .defaults
3810            .string
3811            .as_ref()
3812            .unwrap()
3813            .fts_index
3814            .is_some());
3815        assert!(
3816            schema_from_old
3817                .defaults
3818                .string
3819                .as_ref()
3820                .unwrap()
3821                .fts_index
3822                .as_ref()
3823                .unwrap()
3824                .enabled
3825        );
3826
3827        assert!(schema_from_old.defaults.int.is_some());
3828        assert!(schema_from_old
3829            .defaults
3830            .int
3831            .as_ref()
3832            .unwrap()
3833            .int_inverted_index
3834            .is_some());
3835
3836        assert!(schema_from_old.defaults.float_list.is_some());
3837        assert!(schema_from_old
3838            .defaults
3839            .float_list
3840            .as_ref()
3841            .unwrap()
3842            .vector_index
3843            .is_some());
3844
3845        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
3846        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
3847        assert!(doc_override.string.is_some());
3848        assert!(
3849            !doc_override
3850                .string
3851                .as_ref()
3852                .unwrap()
3853                .fts_index
3854                .as_ref()
3855                .unwrap()
3856                .enabled
3857        );
3858
3859        // Test that serialization always outputs the new format (without prefixes)
3860        let serialized = serde_json::to_string(&schema_from_old).unwrap();
3861
3862        // Should contain new format keys
3863        assert!(serialized.contains(r#""keys":"#));
3864        assert!(serialized.contains(r#""string":"#));
3865        assert!(serialized.contains(r#""fts_index":"#));
3866        assert!(serialized.contains(r#""int_inverted_index":"#));
3867        assert!(serialized.contains(r#""vector_index":"#));
3868
3869        // Should NOT contain old format keys
3870        assert!(!serialized.contains(r#""key_overrides":"#));
3871        assert!(!serialized.contains(r###""#string":"###));
3872        assert!(!serialized.contains(r###""$fts_index":"###));
3873        assert!(!serialized.contains(r###""$int_inverted_index":"###));
3874        assert!(!serialized.contains(r###""$vector_index":"###));
3875    }
3876
3877    #[test]
3878    fn test_hnsw_index_config_validation() {
3879        use validator::Validate;
3880
3881        // Valid configuration - should pass
3882        let valid_config = HnswIndexConfig {
3883            batch_size: Some(10),
3884            sync_threshold: Some(100),
3885            ef_construction: Some(100),
3886            max_neighbors: Some(16),
3887            ..Default::default()
3888        };
3889        assert!(valid_config.validate().is_ok());
3890
3891        // Invalid: batch_size too small (min 2)
3892        let invalid_batch_size = HnswIndexConfig {
3893            batch_size: Some(1),
3894            ..Default::default()
3895        };
3896        assert!(invalid_batch_size.validate().is_err());
3897
3898        // Invalid: sync_threshold too small (min 2)
3899        let invalid_sync_threshold = HnswIndexConfig {
3900            sync_threshold: Some(1),
3901            ..Default::default()
3902        };
3903        assert!(invalid_sync_threshold.validate().is_err());
3904
3905        // Valid: boundary values (exactly 2) should pass
3906        let boundary_config = HnswIndexConfig {
3907            batch_size: Some(2),
3908            sync_threshold: Some(2),
3909            ..Default::default()
3910        };
3911        assert!(boundary_config.validate().is_ok());
3912
3913        // Valid: None values should pass validation
3914        let all_none_config = HnswIndexConfig {
3915            ..Default::default()
3916        };
3917        assert!(all_none_config.validate().is_ok());
3918
3919        // Valid: fields without validation can be any value
3920        let other_fields_config = HnswIndexConfig {
3921            ef_construction: Some(1),
3922            max_neighbors: Some(1),
3923            ef_search: Some(1),
3924            num_threads: Some(1),
3925            resize_factor: Some(0.1),
3926            ..Default::default()
3927        };
3928        assert!(other_fields_config.validate().is_ok());
3929    }
3930
3931    #[test]
3932    fn test_spann_index_config_validation() {
3933        use validator::Validate;
3934
3935        // Valid configuration - should pass
3936        let valid_config = SpannIndexConfig {
3937            write_nprobe: Some(32),
3938            nreplica_count: Some(4),
3939            split_threshold: Some(100),
3940            merge_threshold: Some(50),
3941            reassign_neighbor_count: Some(32),
3942            num_centers_to_merge_to: Some(4),
3943            ef_construction: Some(100),
3944            ef_search: Some(100),
3945            max_neighbors: Some(32),
3946            search_rng_factor: Some(1.0),
3947            write_rng_factor: Some(1.0),
3948            search_rng_epsilon: Some(7.5),
3949            write_rng_epsilon: Some(7.5),
3950            ..Default::default()
3951        };
3952        assert!(valid_config.validate().is_ok());
3953
3954        // Invalid: write_nprobe too large (max 64)
3955        let invalid_write_nprobe = SpannIndexConfig {
3956            write_nprobe: Some(200),
3957            ..Default::default()
3958        };
3959        assert!(invalid_write_nprobe.validate().is_err());
3960
3961        // Invalid: split_threshold too small (min 50)
3962        let invalid_split_threshold = SpannIndexConfig {
3963            split_threshold: Some(10),
3964            ..Default::default()
3965        };
3966        assert!(invalid_split_threshold.validate().is_err());
3967
3968        // Invalid: split_threshold too large (max 200)
3969        let invalid_split_threshold_high = SpannIndexConfig {
3970            split_threshold: Some(250),
3971            ..Default::default()
3972        };
3973        assert!(invalid_split_threshold_high.validate().is_err());
3974
3975        // Invalid: nreplica_count too large (max 8)
3976        let invalid_nreplica = SpannIndexConfig {
3977            nreplica_count: Some(10),
3978            ..Default::default()
3979        };
3980        assert!(invalid_nreplica.validate().is_err());
3981
3982        // Invalid: reassign_neighbor_count too large (max 64)
3983        let invalid_reassign = SpannIndexConfig {
3984            reassign_neighbor_count: Some(100),
3985            ..Default::default()
3986        };
3987        assert!(invalid_reassign.validate().is_err());
3988
3989        // Invalid: merge_threshold out of range (min 25, max 100)
3990        let invalid_merge_threshold_low = SpannIndexConfig {
3991            merge_threshold: Some(5),
3992            ..Default::default()
3993        };
3994        assert!(invalid_merge_threshold_low.validate().is_err());
3995
3996        let invalid_merge_threshold_high = SpannIndexConfig {
3997            merge_threshold: Some(150),
3998            ..Default::default()
3999        };
4000        assert!(invalid_merge_threshold_high.validate().is_err());
4001
4002        // Invalid: num_centers_to_merge_to too large (max 8)
4003        let invalid_num_centers = SpannIndexConfig {
4004            num_centers_to_merge_to: Some(10),
4005            ..Default::default()
4006        };
4007        assert!(invalid_num_centers.validate().is_err());
4008
4009        // Invalid: ef_construction too large (max 200)
4010        let invalid_ef_construction = SpannIndexConfig {
4011            ef_construction: Some(300),
4012            ..Default::default()
4013        };
4014        assert!(invalid_ef_construction.validate().is_err());
4015
4016        // Invalid: ef_search too large (max 200)
4017        let invalid_ef_search = SpannIndexConfig {
4018            ef_search: Some(300),
4019            ..Default::default()
4020        };
4021        assert!(invalid_ef_search.validate().is_err());
4022
4023        // Invalid: max_neighbors too large (max 64)
4024        let invalid_max_neighbors = SpannIndexConfig {
4025            max_neighbors: Some(100),
4026            ..Default::default()
4027        };
4028        assert!(invalid_max_neighbors.validate().is_err());
4029
4030        // Invalid: search_nprobe too large (max 128)
4031        let invalid_search_nprobe = SpannIndexConfig {
4032            search_nprobe: Some(200),
4033            ..Default::default()
4034        };
4035        assert!(invalid_search_nprobe.validate().is_err());
4036
4037        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
4038        let invalid_search_rng_factor_low = SpannIndexConfig {
4039            search_rng_factor: Some(0.9),
4040            ..Default::default()
4041        };
4042        assert!(invalid_search_rng_factor_low.validate().is_err());
4043
4044        let invalid_search_rng_factor_high = SpannIndexConfig {
4045            search_rng_factor: Some(1.1),
4046            ..Default::default()
4047        };
4048        assert!(invalid_search_rng_factor_high.validate().is_err());
4049
4050        // Valid: search_rng_factor exactly 1.0
4051        let valid_search_rng_factor = SpannIndexConfig {
4052            search_rng_factor: Some(1.0),
4053            ..Default::default()
4054        };
4055        assert!(valid_search_rng_factor.validate().is_ok());
4056
4057        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
4058        let invalid_search_rng_epsilon_low = SpannIndexConfig {
4059            search_rng_epsilon: Some(4.0),
4060            ..Default::default()
4061        };
4062        assert!(invalid_search_rng_epsilon_low.validate().is_err());
4063
4064        let invalid_search_rng_epsilon_high = SpannIndexConfig {
4065            search_rng_epsilon: Some(11.0),
4066            ..Default::default()
4067        };
4068        assert!(invalid_search_rng_epsilon_high.validate().is_err());
4069
4070        // Valid: search_rng_epsilon within range
4071        let valid_search_rng_epsilon = SpannIndexConfig {
4072            search_rng_epsilon: Some(7.5),
4073            ..Default::default()
4074        };
4075        assert!(valid_search_rng_epsilon.validate().is_ok());
4076
4077        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
4078        let invalid_write_rng_factor_low = SpannIndexConfig {
4079            write_rng_factor: Some(0.9),
4080            ..Default::default()
4081        };
4082        assert!(invalid_write_rng_factor_low.validate().is_err());
4083
4084        let invalid_write_rng_factor_high = SpannIndexConfig {
4085            write_rng_factor: Some(1.1),
4086            ..Default::default()
4087        };
4088        assert!(invalid_write_rng_factor_high.validate().is_err());
4089
4090        // Valid: write_rng_factor exactly 1.0
4091        let valid_write_rng_factor = SpannIndexConfig {
4092            write_rng_factor: Some(1.0),
4093            ..Default::default()
4094        };
4095        assert!(valid_write_rng_factor.validate().is_ok());
4096
4097        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
4098        let invalid_write_rng_epsilon_low = SpannIndexConfig {
4099            write_rng_epsilon: Some(4.0),
4100            ..Default::default()
4101        };
4102        assert!(invalid_write_rng_epsilon_low.validate().is_err());
4103
4104        let invalid_write_rng_epsilon_high = SpannIndexConfig {
4105            write_rng_epsilon: Some(11.0),
4106            ..Default::default()
4107        };
4108        assert!(invalid_write_rng_epsilon_high.validate().is_err());
4109
4110        // Valid: write_rng_epsilon within range
4111        let valid_write_rng_epsilon = SpannIndexConfig {
4112            write_rng_epsilon: Some(7.5),
4113            ..Default::default()
4114        };
4115        assert!(valid_write_rng_epsilon.validate().is_ok());
4116
4117        // Invalid: num_samples_kmeans too large (max 1000)
4118        let invalid_num_samples_kmeans = SpannIndexConfig {
4119            num_samples_kmeans: Some(1500),
4120            ..Default::default()
4121        };
4122        assert!(invalid_num_samples_kmeans.validate().is_err());
4123
4124        // Valid: num_samples_kmeans within range
4125        let valid_num_samples_kmeans = SpannIndexConfig {
4126            num_samples_kmeans: Some(500),
4127            ..Default::default()
4128        };
4129        assert!(valid_num_samples_kmeans.validate().is_ok());
4130
4131        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
4132        let invalid_initial_lambda_high = SpannIndexConfig {
4133            initial_lambda: Some(150.0),
4134            ..Default::default()
4135        };
4136        assert!(invalid_initial_lambda_high.validate().is_err());
4137
4138        let invalid_initial_lambda_low = SpannIndexConfig {
4139            initial_lambda: Some(50.0),
4140            ..Default::default()
4141        };
4142        assert!(invalid_initial_lambda_low.validate().is_err());
4143
4144        // Valid: initial_lambda exactly 100.0
4145        let valid_initial_lambda = SpannIndexConfig {
4146            initial_lambda: Some(100.0),
4147            ..Default::default()
4148        };
4149        assert!(valid_initial_lambda.validate().is_ok());
4150
4151        // Valid: None values should pass validation
4152        let all_none_config = SpannIndexConfig {
4153            ..Default::default()
4154        };
4155        assert!(all_none_config.validate().is_ok());
4156    }
4157
4158    #[test]
4159    fn test_builder_pattern_crud_workflow() {
4160        // Test comprehensive CRUD workflow using the builder pattern
4161
4162        // CREATE: Build a schema with multiple indexes
4163        let schema = Schema::new_default(KnnIndex::Hnsw)
4164            .create_index(
4165                None,
4166                IndexConfig::Vector(VectorIndexConfig {
4167                    space: Some(Space::Cosine),
4168                    embedding_function: None,
4169                    source_key: None,
4170                    hnsw: Some(HnswIndexConfig {
4171                        ef_construction: Some(200),
4172                        max_neighbors: Some(32),
4173                        ef_search: Some(50),
4174                        num_threads: None,
4175                        batch_size: None,
4176                        sync_threshold: None,
4177                        resize_factor: None,
4178                    }),
4179                    spann: None,
4180                }),
4181            )
4182            .expect("vector config should succeed")
4183            .create_index(
4184                Some("category"),
4185                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4186            )
4187            .expect("string inverted on key should succeed")
4188            .create_index(
4189                Some("year"),
4190                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4191            )
4192            .expect("int inverted on key should succeed")
4193            .create_index(
4194                Some("rating"),
4195                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
4196            )
4197            .expect("float inverted on key should succeed")
4198            .create_index(
4199                Some("is_active"),
4200                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
4201            )
4202            .expect("bool inverted on key should succeed");
4203
4204        // READ: Verify the schema was built correctly
4205        // Check vector config
4206        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4207        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4208        assert!(embedding.float_list.is_some());
4209        let vector_index = embedding
4210            .float_list
4211            .as_ref()
4212            .unwrap()
4213            .vector_index
4214            .as_ref()
4215            .unwrap();
4216        assert!(vector_index.enabled);
4217        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4218        assert_eq!(
4219            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
4220            Some(200)
4221        );
4222
4223        // Check per-key indexes
4224        assert!(schema.keys.contains_key("category"));
4225        assert!(schema.keys.contains_key("year"));
4226        assert!(schema.keys.contains_key("rating"));
4227        assert!(schema.keys.contains_key("is_active"));
4228
4229        // Verify category string inverted index
4230        let category = schema.keys.get("category").unwrap();
4231        assert!(category.string.is_some());
4232        let string_idx = category
4233            .string
4234            .as_ref()
4235            .unwrap()
4236            .string_inverted_index
4237            .as_ref()
4238            .unwrap();
4239        assert!(string_idx.enabled);
4240
4241        // Verify year int inverted index
4242        let year = schema.keys.get("year").unwrap();
4243        assert!(year.int.is_some());
4244        let int_idx = year
4245            .int
4246            .as_ref()
4247            .unwrap()
4248            .int_inverted_index
4249            .as_ref()
4250            .unwrap();
4251        assert!(int_idx.enabled);
4252
4253        // UPDATE/DELETE: Disable some indexes
4254        let schema = schema
4255            .delete_index(
4256                Some("category"),
4257                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4258            )
4259            .expect("delete string inverted should succeed")
4260            .delete_index(
4261                Some("year"),
4262                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4263            )
4264            .expect("delete int inverted should succeed");
4265
4266        // VERIFY DELETE: Check that indexes were disabled
4267        let category = schema.keys.get("category").unwrap();
4268        let string_idx = category
4269            .string
4270            .as_ref()
4271            .unwrap()
4272            .string_inverted_index
4273            .as_ref()
4274            .unwrap();
4275        assert!(!string_idx.enabled); // Should be disabled now
4276
4277        let year = schema.keys.get("year").unwrap();
4278        let int_idx = year
4279            .int
4280            .as_ref()
4281            .unwrap()
4282            .int_inverted_index
4283            .as_ref()
4284            .unwrap();
4285        assert!(!int_idx.enabled); // Should be disabled now
4286
4287        // Verify other indexes still enabled
4288        let rating = schema.keys.get("rating").unwrap();
4289        let float_idx = rating
4290            .float
4291            .as_ref()
4292            .unwrap()
4293            .float_inverted_index
4294            .as_ref()
4295            .unwrap();
4296        assert!(float_idx.enabled); // Should still be enabled
4297
4298        let is_active = schema.keys.get("is_active").unwrap();
4299        let bool_idx = is_active
4300            .boolean
4301            .as_ref()
4302            .unwrap()
4303            .bool_inverted_index
4304            .as_ref()
4305            .unwrap();
4306        assert!(bool_idx.enabled); // Should still be enabled
4307    }
4308
4309    #[test]
4310    fn test_builder_create_index_validation_errors() {
4311        // Test all validation errors for create_index() as documented in the docstring:
4312        // - Attempting to create index on special keys (#document, #embedding)
4313        // - Invalid configuration (e.g., vector index on non-embedding key)
4314        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
4315
4316        // Error: Vector index on specific key (must be global)
4317        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4318            Some("my_vectors"),
4319            IndexConfig::Vector(VectorIndexConfig {
4320                space: Some(Space::L2),
4321                embedding_function: None,
4322                source_key: None,
4323                hnsw: None,
4324                spann: None,
4325            }),
4326        );
4327        assert!(result.is_err());
4328        assert!(matches!(
4329            result.unwrap_err(),
4330            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
4331        ));
4332
4333        // Error: FTS index on specific key (must be global)
4334        let result = Schema::new_default(KnnIndex::Hnsw)
4335            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
4336        assert!(result.is_err());
4337        assert!(matches!(
4338            result.unwrap_err(),
4339            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
4340        ));
4341
4342        // Error: Cannot create index on special key #document
4343        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4344            Some(DOCUMENT_KEY),
4345            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4346        );
4347        assert!(result.is_err());
4348        assert!(matches!(
4349            result.unwrap_err(),
4350            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4351        ));
4352
4353        // Error: Cannot create index on special key #embedding
4354        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4355            Some(EMBEDDING_KEY),
4356            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4357        );
4358        assert!(result.is_err());
4359        assert!(matches!(
4360            result.unwrap_err(),
4361            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4362        ));
4363
4364        // Error: Sparse vector without key (must specify key)
4365        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4366            None,
4367            IndexConfig::SparseVector(SparseVectorIndexConfig {
4368                embedding_function: None,
4369                source_key: None,
4370                bm25: None,
4371            }),
4372        );
4373        assert!(result.is_err());
4374        assert!(matches!(
4375            result.unwrap_err(),
4376            SchemaBuilderError::SparseVectorRequiresKey
4377        ));
4378
4379        // Error: Multiple sparse vector indexes (only one allowed per collection)
4380        let result = Schema::new_default(KnnIndex::Hnsw)
4381            .create_index(
4382                Some("sparse1"),
4383                IndexConfig::SparseVector(SparseVectorIndexConfig {
4384                    embedding_function: None,
4385                    source_key: None,
4386                    bm25: None,
4387                }),
4388            )
4389            .expect("first sparse should succeed")
4390            .create_index(
4391                Some("sparse2"),
4392                IndexConfig::SparseVector(SparseVectorIndexConfig {
4393                    embedding_function: None,
4394                    source_key: None,
4395                    bm25: None,
4396                }),
4397            );
4398        assert!(result.is_err());
4399        assert!(matches!(
4400            result.unwrap_err(),
4401            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
4402        ));
4403    }
4404
4405    #[test]
4406    fn test_builder_delete_index_validation_errors() {
4407        // Test all validation errors for delete_index() as documented in the docstring:
4408        // - Attempting to delete index on special keys (#document, #embedding)
4409        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
4410
4411        // Error: Delete on special key #embedding
4412        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4413            Some(EMBEDDING_KEY),
4414            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4415        );
4416        assert!(result.is_err());
4417        assert!(matches!(
4418            result.unwrap_err(),
4419            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4420        ));
4421
4422        // Error: Delete on special key #document
4423        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4424            Some(DOCUMENT_KEY),
4425            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4426        );
4427        assert!(result.is_err());
4428        assert!(matches!(
4429            result.unwrap_err(),
4430            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4431        ));
4432
4433        // Error: Delete vector index (not currently supported)
4434        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4435            None,
4436            IndexConfig::Vector(VectorIndexConfig {
4437                space: None,
4438                embedding_function: None,
4439                source_key: None,
4440                hnsw: None,
4441                spann: None,
4442            }),
4443        );
4444        assert!(result.is_err());
4445        assert!(matches!(
4446            result.unwrap_err(),
4447            SchemaBuilderError::VectorIndexDeletionNotSupported
4448        ));
4449
4450        // Error: Delete FTS index (not currently supported)
4451        let result = Schema::new_default(KnnIndex::Hnsw)
4452            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
4453        assert!(result.is_err());
4454        assert!(matches!(
4455            result.unwrap_err(),
4456            SchemaBuilderError::FtsIndexDeletionNotSupported
4457        ));
4458
4459        // Error: Delete sparse vector index (not currently supported)
4460        let result = Schema::new_default(KnnIndex::Hnsw)
4461            .create_index(
4462                Some("sparse"),
4463                IndexConfig::SparseVector(SparseVectorIndexConfig {
4464                    embedding_function: None,
4465                    source_key: None,
4466                    bm25: None,
4467                }),
4468            )
4469            .expect("create should succeed")
4470            .delete_index(
4471                Some("sparse"),
4472                IndexConfig::SparseVector(SparseVectorIndexConfig {
4473                    embedding_function: None,
4474                    source_key: None,
4475                    bm25: None,
4476                }),
4477            );
4478        assert!(result.is_err());
4479        assert!(matches!(
4480            result.unwrap_err(),
4481            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
4482        ));
4483    }
4484
4485    #[test]
4486    fn test_builder_pattern_chaining() {
4487        // Test complex chaining scenario
4488        let schema = Schema::new_default(KnnIndex::Hnsw)
4489            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
4490            .unwrap()
4491            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4492            .unwrap()
4493            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
4494            .unwrap()
4495            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
4496            .unwrap()
4497            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4498            .unwrap()
4499            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
4500            .unwrap();
4501
4502        // Verify tag1 is enabled
4503        assert!(
4504            schema
4505                .keys
4506                .get("tag1")
4507                .unwrap()
4508                .string
4509                .as_ref()
4510                .unwrap()
4511                .string_inverted_index
4512                .as_ref()
4513                .unwrap()
4514                .enabled
4515        );
4516
4517        // Verify tag2 is disabled
4518        assert!(
4519            !schema
4520                .keys
4521                .get("tag2")
4522                .unwrap()
4523                .string
4524                .as_ref()
4525                .unwrap()
4526                .string_inverted_index
4527                .as_ref()
4528                .unwrap()
4529                .enabled
4530        );
4531
4532        // Verify tag3 is enabled
4533        assert!(
4534            schema
4535                .keys
4536                .get("tag3")
4537                .unwrap()
4538                .string
4539                .as_ref()
4540                .unwrap()
4541                .string_inverted_index
4542                .as_ref()
4543                .unwrap()
4544                .enabled
4545        );
4546
4547        // Verify count is enabled
4548        assert!(
4549            schema
4550                .keys
4551                .get("count")
4552                .unwrap()
4553                .int
4554                .as_ref()
4555                .unwrap()
4556                .int_inverted_index
4557                .as_ref()
4558                .unwrap()
4559                .enabled
4560        );
4561
4562        // Verify score is enabled
4563        assert!(
4564            schema
4565                .keys
4566                .get("score")
4567                .unwrap()
4568                .float
4569                .as_ref()
4570                .unwrap()
4571                .float_inverted_index
4572                .as_ref()
4573                .unwrap()
4574                .enabled
4575        );
4576    }
4577
4578    #[test]
4579    fn test_schema_default_matches_python() {
4580        // Test that Schema::default() matches Python's Schema() behavior exactly
4581        let schema = Schema::default();
4582
4583        // ============================================================================
4584        // VERIFY DEFAULTS (match Python's _initialize_defaults)
4585        // ============================================================================
4586
4587        // String defaults: FTS disabled, string inverted enabled
4588        assert!(schema.defaults.string.is_some());
4589        let string = schema.defaults.string.as_ref().unwrap();
4590        assert!(!string.fts_index.as_ref().unwrap().enabled);
4591        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
4592
4593        // Float list defaults: vector index disabled
4594        assert!(schema.defaults.float_list.is_some());
4595        let float_list = schema.defaults.float_list.as_ref().unwrap();
4596        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
4597        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
4598        assert_eq!(vector_config.space, None); // Python leaves as None
4599        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
4600        assert_eq!(vector_config.spann, None); // Python doesn't specify
4601        assert_eq!(vector_config.source_key, None);
4602
4603        // Sparse vector defaults: disabled
4604        assert!(schema.defaults.sparse_vector.is_some());
4605        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
4606        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
4607
4608        // Int defaults: inverted index enabled
4609        assert!(schema.defaults.int.is_some());
4610        assert!(
4611            schema
4612                .defaults
4613                .int
4614                .as_ref()
4615                .unwrap()
4616                .int_inverted_index
4617                .as_ref()
4618                .unwrap()
4619                .enabled
4620        );
4621
4622        // Float defaults: inverted index enabled
4623        assert!(schema.defaults.float.is_some());
4624        assert!(
4625            schema
4626                .defaults
4627                .float
4628                .as_ref()
4629                .unwrap()
4630                .float_inverted_index
4631                .as_ref()
4632                .unwrap()
4633                .enabled
4634        );
4635
4636        // Bool defaults: inverted index enabled
4637        assert!(schema.defaults.boolean.is_some());
4638        assert!(
4639            schema
4640                .defaults
4641                .boolean
4642                .as_ref()
4643                .unwrap()
4644                .bool_inverted_index
4645                .as_ref()
4646                .unwrap()
4647                .enabled
4648        );
4649
4650        // ============================================================================
4651        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
4652        // ============================================================================
4653
4654        // #document: FTS enabled, string inverted disabled
4655        assert!(schema.keys.contains_key(DOCUMENT_KEY));
4656        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
4657        assert!(doc.string.is_some());
4658        assert!(
4659            doc.string
4660                .as_ref()
4661                .unwrap()
4662                .fts_index
4663                .as_ref()
4664                .unwrap()
4665                .enabled
4666        );
4667        assert!(
4668            !doc.string
4669                .as_ref()
4670                .unwrap()
4671                .string_inverted_index
4672                .as_ref()
4673                .unwrap()
4674                .enabled
4675        );
4676
4677        // #embedding: vector index enabled with source_key=#document
4678        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4679        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4680        assert!(embedding.float_list.is_some());
4681        let vec_idx = embedding
4682            .float_list
4683            .as_ref()
4684            .unwrap()
4685            .vector_index
4686            .as_ref()
4687            .unwrap();
4688        assert!(vec_idx.enabled);
4689        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
4690        assert_eq!(vec_idx.config.space, None); // Python leaves as None
4691        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
4692        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
4693
4694        // Verify only these two special keys exist
4695        assert_eq!(schema.keys.len(), 2);
4696    }
4697
4698    #[test]
4699    fn test_schema_default_works_with_builder() {
4700        // Test that Schema::default() can be used with builder pattern
4701        let schema = Schema::default()
4702            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
4703            .expect("should succeed");
4704
4705        // Verify the new index was added
4706        assert!(schema.keys.contains_key("category"));
4707        assert!(schema.keys.contains_key(DOCUMENT_KEY));
4708        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4709        assert_eq!(schema.keys.len(), 3);
4710    }
4711}