chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8    EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14    default_batch_size, default_construction_ef, default_construction_ef_spann,
15    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21    InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25    fn code(&self) -> ErrorCodes {
26        ErrorCodes::Internal
27    }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33    MissingIndexConfiguration { key: String, value_type: String },
34    #[error("Schema reconciliation failed: {reason}")]
35    InvalidSchema { reason: String },
36    #[error("Cannot set both collection config and schema simultaneously")]
37    ConfigAndSchemaConflict,
38    #[error("Cannot merge schemas with differing defaults")]
39    DefaultsMismatch,
40    #[error("Conflicting configuration for {context}")]
41    ConfigurationConflict { context: String },
42    #[error("Invalid HNSW configuration: {0}")]
43    InvalidHnswConfig(validator::ValidationErrors),
44    #[error("Invalid SPANN configuration: {0}")]
45    InvalidSpannConfig(validator::ValidationErrors),
46    #[error(transparent)]
47    Builder(#[from] SchemaBuilderError),
48}
49
50#[derive(Debug, Error)]
51pub enum SchemaBuilderError {
52    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
53    VectorIndexMustBeGlobal { key: String },
54    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
55    FtsIndexMustBeGlobal { key: String },
56    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
57    SpecialKeyModificationNotAllowed { key: String },
58    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
59    SparseVectorRequiresKey,
60    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
61    MultipleSparseVectorIndexes { existing_key: String },
62    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
63    VectorIndexDeletionNotSupported,
64    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
65    FtsIndexDeletionNotSupported,
66    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
67    SparseVectorIndexDeletionNotSupported,
68}
69
70#[derive(Debug, Error)]
71pub enum FilterValidationError {
72    #[error(
73        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
74    )]
75    IndexingDisabled {
76        key: String,
77        value_type: MetadataValueType,
78    },
79    #[error(transparent)]
80    Schema(#[from] SchemaError),
81}
82
83impl ChromaError for SchemaBuilderError {
84    fn code(&self) -> ErrorCodes {
85        ErrorCodes::InvalidArgument
86    }
87}
88
89impl ChromaError for FilterValidationError {
90    fn code(&self) -> ErrorCodes {
91        match self {
92            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
93            FilterValidationError::Schema(_) => ErrorCodes::Internal,
94        }
95    }
96}
97
98// ============================================================================
99// SCHEMA CONSTANTS
100// ============================================================================
101// These constants must match the Python constants in chromadb/api/types.py
102
103// Value type name constants
104pub const STRING_VALUE_NAME: &str = "string";
105pub const INT_VALUE_NAME: &str = "int";
106pub const BOOL_VALUE_NAME: &str = "bool";
107pub const FLOAT_VALUE_NAME: &str = "float";
108pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
109pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
110
111// Index type name constants
112pub const FTS_INDEX_NAME: &str = "fts_index";
113pub const VECTOR_INDEX_NAME: &str = "vector_index";
114pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
115pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
116pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
117pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
118pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
119
120// Special metadata keys - must match Python constants in chromadb/api/types.py
121pub const DOCUMENT_KEY: &str = "#document";
122pub const EMBEDDING_KEY: &str = "#embedding";
123
124// ============================================================================
125// SCHEMA STRUCTURES
126// ============================================================================
127
128/// Schema representation for collection index configurations
129///
130/// This represents the server-side schema structure used for index management
131
132#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
133#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
134pub struct Schema {
135    /// Default index configurations for each value type
136    pub defaults: ValueTypes,
137    /// Key-specific index overrides
138    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
139    #[serde(rename = "keys", alias = "key_overrides")]
140    pub keys: HashMap<String, ValueTypes>,
141}
142
143impl Default for Schema {
144    /// Create a default Schema that matches Python's behavior exactly.
145    ///
146    /// Python creates a Schema with:
147    /// - All inverted indexes enabled by default (string, int, float, bool)
148    /// - Vector and FTS indexes disabled in defaults
149    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
150    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
151    ///
152    /// # Examples
153    /// ```
154    /// use chroma_types::Schema;
155    ///
156    /// let schema = Schema::default();
157    /// assert!(schema.keys.contains_key("#document"));
158    /// assert!(schema.keys.contains_key("#embedding"));
159    /// ```
160    fn default() -> Self {
161        // Initialize defaults - match Python's _initialize_defaults()
162        let defaults = ValueTypes {
163            string: Some(StringValueType {
164                fts_index: Some(FtsIndexType {
165                    enabled: false,
166                    config: FtsIndexConfig {},
167                }),
168                string_inverted_index: Some(StringInvertedIndexType {
169                    enabled: true,
170                    config: StringInvertedIndexConfig {},
171                }),
172            }),
173            float_list: Some(FloatListValueType {
174                vector_index: Some(VectorIndexType {
175                    enabled: false,
176                    config: VectorIndexConfig {
177                        space: None, // Python leaves as None (resolved on serialization)
178                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
179                        source_key: None,
180                        hnsw: None,  // Python doesn't specify
181                        spann: None, // Python doesn't specify
182                    },
183                }),
184            }),
185            sparse_vector: Some(SparseVectorValueType {
186                sparse_vector_index: Some(SparseVectorIndexType {
187                    enabled: false,
188                    config: SparseVectorIndexConfig {
189                        embedding_function: None,
190                        source_key: None,
191                        bm25: None,
192                    },
193                }),
194            }),
195            int: Some(IntValueType {
196                int_inverted_index: Some(IntInvertedIndexType {
197                    enabled: true,
198                    config: IntInvertedIndexConfig {},
199                }),
200            }),
201            float: Some(FloatValueType {
202                float_inverted_index: Some(FloatInvertedIndexType {
203                    enabled: true,
204                    config: FloatInvertedIndexConfig {},
205                }),
206            }),
207            boolean: Some(BoolValueType {
208                bool_inverted_index: Some(BoolInvertedIndexType {
209                    enabled: true,
210                    config: BoolInvertedIndexConfig {},
211                }),
212            }),
213        };
214
215        // Initialize key-specific overrides - match Python's _initialize_keys()
216        let mut keys = HashMap::new();
217
218        // #document: FTS enabled, string inverted disabled
219        keys.insert(
220            DOCUMENT_KEY.to_string(),
221            ValueTypes {
222                string: Some(StringValueType {
223                    fts_index: Some(FtsIndexType {
224                        enabled: true,
225                        config: FtsIndexConfig {},
226                    }),
227                    string_inverted_index: Some(StringInvertedIndexType {
228                        enabled: false,
229                        config: StringInvertedIndexConfig {},
230                    }),
231                }),
232                ..Default::default()
233            },
234        );
235
236        // #embedding: Vector index enabled with source_key=#document
237        keys.insert(
238            EMBEDDING_KEY.to_string(),
239            ValueTypes {
240                float_list: Some(FloatListValueType {
241                    vector_index: Some(VectorIndexType {
242                        enabled: true,
243                        config: VectorIndexConfig {
244                            space: None, // Python leaves as None (resolved on serialization)
245                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
246                            source_key: Some(DOCUMENT_KEY.to_string()),
247                            hnsw: None,  // Python doesn't specify
248                            spann: None, // Python doesn't specify
249                        },
250                    }),
251                }),
252                ..Default::default()
253            },
254        );
255
256        Schema { defaults, keys }
257    }
258}
259
260pub fn is_embedding_function_default(
261    embedding_function: &Option<EmbeddingFunctionConfiguration>,
262) -> bool {
263    match embedding_function {
264        None => true,
265        Some(embedding_function) => embedding_function.is_default(),
266    }
267}
268
269/// Check if space is default (None means default, or if present, should be default space)
270pub fn is_space_default(space: &Option<Space>) -> bool {
271    match space {
272        None => true,                     // None means default
273        Some(s) => *s == default_space(), // If present, check if it's the default space
274    }
275}
276
277/// Check if HNSW config is default
278pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
279    hnsw_config.ef_construction == Some(default_construction_ef())
280        && hnsw_config.ef_search == Some(default_search_ef())
281        && hnsw_config.max_neighbors == Some(default_m())
282        && hnsw_config.num_threads == Some(default_num_threads())
283        && hnsw_config.batch_size == Some(default_batch_size())
284        && hnsw_config.sync_threshold == Some(default_sync_threshold())
285        && hnsw_config.resize_factor == Some(default_resize_factor())
286}
287
288// ============================================================================
289// NEW STRONGLY-TYPED SCHEMA STRUCTURES
290// ============================================================================
291
292/// Strongly-typed value type configurations
293/// Contains optional configurations for each supported value type
294#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
295#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
296pub struct ValueTypes {
297    #[serde(
298        rename = "string",
299        alias = "#string",
300        skip_serializing_if = "Option::is_none"
301    )] // STRING_VALUE_NAME
302    pub string: Option<StringValueType>,
303
304    #[serde(
305        rename = "float_list",
306        alias = "#float_list",
307        skip_serializing_if = "Option::is_none"
308    )]
309    // FLOAT_LIST_VALUE_NAME
310    pub float_list: Option<FloatListValueType>,
311
312    #[serde(
313        rename = "sparse_vector",
314        alias = "#sparse_vector",
315        skip_serializing_if = "Option::is_none"
316    )]
317    // SPARSE_VECTOR_VALUE_NAME
318    pub sparse_vector: Option<SparseVectorValueType>,
319
320    #[serde(
321        rename = "int",
322        alias = "#int",
323        skip_serializing_if = "Option::is_none"
324    )] // INT_VALUE_NAME
325    pub int: Option<IntValueType>,
326
327    #[serde(
328        rename = "float",
329        alias = "#float",
330        skip_serializing_if = "Option::is_none"
331    )] // FLOAT_VALUE_NAME
332    pub float: Option<FloatValueType>,
333
334    #[serde(
335        rename = "bool",
336        alias = "#bool",
337        skip_serializing_if = "Option::is_none"
338    )] // BOOL_VALUE_NAME
339    pub boolean: Option<BoolValueType>,
340}
341
342/// String value type index configurations
343#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
344#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
345pub struct StringValueType {
346    #[serde(
347        rename = "fts_index",
348        alias = "$fts_index",
349        skip_serializing_if = "Option::is_none"
350    )] // FTS_INDEX_NAME
351    pub fts_index: Option<FtsIndexType>,
352
353    #[serde(
354        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
355        alias = "$string_inverted_index",
356        skip_serializing_if = "Option::is_none"
357    )]
358    pub string_inverted_index: Option<StringInvertedIndexType>,
359}
360
361/// Float list value type index configurations (for vectors)
362#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
363#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
364pub struct FloatListValueType {
365    #[serde(
366        rename = "vector_index",
367        alias = "$vector_index",
368        skip_serializing_if = "Option::is_none"
369    )] // VECTOR_INDEX_NAME
370    pub vector_index: Option<VectorIndexType>,
371}
372
373/// Sparse vector value type index configurations
374#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
375#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
376pub struct SparseVectorValueType {
377    #[serde(
378        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
379        alias = "$sparse_vector_index",
380        skip_serializing_if = "Option::is_none"
381    )]
382    pub sparse_vector_index: Option<SparseVectorIndexType>,
383}
384
385/// Integer value type index configurations
386#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
387#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
388pub struct IntValueType {
389    #[serde(
390        rename = "int_inverted_index",
391        alias = "$int_inverted_index",
392        skip_serializing_if = "Option::is_none"
393    )]
394    // INT_INVERTED_INDEX_NAME
395    pub int_inverted_index: Option<IntInvertedIndexType>,
396}
397
398/// Float value type index configurations
399#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
400#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
401pub struct FloatValueType {
402    #[serde(
403        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
404        alias = "$float_inverted_index",
405        skip_serializing_if = "Option::is_none"
406    )]
407    pub float_inverted_index: Option<FloatInvertedIndexType>,
408}
409
410/// Boolean value type index configurations
411#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
412#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
413pub struct BoolValueType {
414    #[serde(
415        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
416        alias = "$bool_inverted_index",
417        skip_serializing_if = "Option::is_none"
418    )]
419    pub bool_inverted_index: Option<BoolInvertedIndexType>,
420}
421
422// Individual index type structs with enabled status and config
423#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
424#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
425pub struct FtsIndexType {
426    pub enabled: bool,
427    pub config: FtsIndexConfig,
428}
429
430#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
431#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
432pub struct VectorIndexType {
433    pub enabled: bool,
434    pub config: VectorIndexConfig,
435}
436
437#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
438#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
439pub struct SparseVectorIndexType {
440    pub enabled: bool,
441    pub config: SparseVectorIndexConfig,
442}
443
444#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
445#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
446pub struct StringInvertedIndexType {
447    pub enabled: bool,
448    pub config: StringInvertedIndexConfig,
449}
450
451#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
452#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
453pub struct IntInvertedIndexType {
454    pub enabled: bool,
455    pub config: IntInvertedIndexConfig,
456}
457
458#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
459#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
460pub struct FloatInvertedIndexType {
461    pub enabled: bool,
462    pub config: FloatInvertedIndexConfig,
463}
464
465#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
466#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
467pub struct BoolInvertedIndexType {
468    pub enabled: bool,
469    pub config: BoolInvertedIndexConfig,
470}
471
472impl Schema {
473    /// Create a new Schema with strongly-typed default configurations
474    pub fn new_default(default_knn_index: KnnIndex) -> Self {
475        // Vector index disabled on all keys except #embedding.
476        let vector_config = VectorIndexType {
477            enabled: false,
478            config: VectorIndexConfig {
479                space: Some(default_space()),
480                embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
481                source_key: None,
482                hnsw: match default_knn_index {
483                    KnnIndex::Hnsw => Some(HnswIndexConfig {
484                        ef_construction: Some(default_construction_ef()),
485                        max_neighbors: Some(default_m()),
486                        ef_search: Some(default_search_ef()),
487                        num_threads: Some(default_num_threads()),
488                        batch_size: Some(default_batch_size()),
489                        sync_threshold: Some(default_sync_threshold()),
490                        resize_factor: Some(default_resize_factor()),
491                    }),
492                    KnnIndex::Spann => None,
493                },
494                spann: match default_knn_index {
495                    KnnIndex::Hnsw => None,
496                    KnnIndex::Spann => Some(SpannIndexConfig {
497                        search_nprobe: Some(default_search_nprobe()),
498                        search_rng_factor: Some(default_search_rng_factor()),
499                        search_rng_epsilon: Some(default_search_rng_epsilon()),
500                        nreplica_count: Some(default_nreplica_count()),
501                        write_rng_factor: Some(default_write_rng_factor()),
502                        write_rng_epsilon: Some(default_write_rng_epsilon()),
503                        split_threshold: Some(default_split_threshold()),
504                        num_samples_kmeans: Some(default_num_samples_kmeans()),
505                        initial_lambda: Some(default_initial_lambda()),
506                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
507                        merge_threshold: Some(default_merge_threshold()),
508                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
509                        write_nprobe: Some(default_write_nprobe()),
510                        ef_construction: Some(default_construction_ef_spann()),
511                        ef_search: Some(default_search_ef_spann()),
512                        max_neighbors: Some(default_m_spann()),
513                    }),
514                },
515            },
516        };
517
518        // Initialize defaults struct directly instead of using Default::default() + field assignments
519        let defaults = ValueTypes {
520            string: Some(StringValueType {
521                string_inverted_index: Some(StringInvertedIndexType {
522                    enabled: true,
523                    config: StringInvertedIndexConfig {},
524                }),
525                fts_index: Some(FtsIndexType {
526                    enabled: false,
527                    config: FtsIndexConfig {},
528                }),
529            }),
530            float: Some(FloatValueType {
531                float_inverted_index: Some(FloatInvertedIndexType {
532                    enabled: true,
533                    config: FloatInvertedIndexConfig {},
534                }),
535            }),
536            int: Some(IntValueType {
537                int_inverted_index: Some(IntInvertedIndexType {
538                    enabled: true,
539                    config: IntInvertedIndexConfig {},
540                }),
541            }),
542            boolean: Some(BoolValueType {
543                bool_inverted_index: Some(BoolInvertedIndexType {
544                    enabled: true,
545                    config: BoolInvertedIndexConfig {},
546                }),
547            }),
548            float_list: Some(FloatListValueType {
549                vector_index: Some(vector_config),
550            }),
551            sparse_vector: Some(SparseVectorValueType {
552                sparse_vector_index: Some(SparseVectorIndexType {
553                    enabled: false,
554                    config: SparseVectorIndexConfig {
555                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
556                        source_key: None,
557                        bm25: Some(false),
558                    },
559                }),
560            }),
561        };
562
563        // Set up key overrides
564        let mut keys = HashMap::new();
565
566        // Enable vector index for #embedding.
567        let embedding_defaults = ValueTypes {
568            float_list: Some(FloatListValueType {
569                vector_index: Some(VectorIndexType {
570                    enabled: true,
571                    config: VectorIndexConfig {
572                        space: Some(default_space()),
573                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
574                        source_key: Some(DOCUMENT_KEY.to_string()),
575                        hnsw: match default_knn_index {
576                            KnnIndex::Hnsw => Some(HnswIndexConfig {
577                                ef_construction: Some(default_construction_ef()),
578                                max_neighbors: Some(default_m()),
579                                ef_search: Some(default_search_ef()),
580                                num_threads: Some(default_num_threads()),
581                                batch_size: Some(default_batch_size()),
582                                sync_threshold: Some(default_sync_threshold()),
583                                resize_factor: Some(default_resize_factor()),
584                            }),
585                            KnnIndex::Spann => None,
586                        },
587                        spann: match default_knn_index {
588                            KnnIndex::Hnsw => None,
589                            KnnIndex::Spann => Some(SpannIndexConfig {
590                                search_nprobe: Some(default_search_nprobe()),
591                                search_rng_factor: Some(default_search_rng_factor()),
592                                search_rng_epsilon: Some(default_search_rng_epsilon()),
593                                nreplica_count: Some(default_nreplica_count()),
594                                write_rng_factor: Some(default_write_rng_factor()),
595                                write_rng_epsilon: Some(default_write_rng_epsilon()),
596                                split_threshold: Some(default_split_threshold()),
597                                num_samples_kmeans: Some(default_num_samples_kmeans()),
598                                initial_lambda: Some(default_initial_lambda()),
599                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
600                                merge_threshold: Some(default_merge_threshold()),
601                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
602                                write_nprobe: Some(default_write_nprobe()),
603                                ef_construction: Some(default_construction_ef_spann()),
604                                ef_search: Some(default_search_ef_spann()),
605                                max_neighbors: Some(default_m_spann()),
606                            }),
607                        },
608                    },
609                }),
610            }),
611            ..Default::default()
612        };
613        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
614
615        // Document defaults - initialize directly instead of Default::default() + field assignment
616        let document_defaults = ValueTypes {
617            string: Some(StringValueType {
618                fts_index: Some(FtsIndexType {
619                    enabled: true,
620                    config: FtsIndexConfig {},
621                }),
622                string_inverted_index: Some(StringInvertedIndexType {
623                    enabled: false,
624                    config: StringInvertedIndexConfig {},
625                }),
626            }),
627            ..Default::default()
628        };
629        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
630
631        Schema { defaults, keys }
632    }
633
634    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
635        let to_internal = |vector_index: &VectorIndexType| {
636            let space = vector_index.config.space.clone();
637            vector_index
638                .config
639                .spann
640                .clone()
641                .map(|config| (space.as_ref(), &config).into())
642        };
643
644        self.keys
645            .get(EMBEDDING_KEY)
646            .and_then(|value_types| value_types.float_list.as_ref())
647            .and_then(|float_list| float_list.vector_index.as_ref())
648            .and_then(to_internal)
649            .or_else(|| {
650                self.defaults
651                    .float_list
652                    .as_ref()
653                    .and_then(|float_list| float_list.vector_index.as_ref())
654                    .and_then(to_internal)
655            })
656    }
657
658    /// Reconcile user-provided schema with system defaults
659    ///
660    /// This method merges user configurations with system defaults, ensuring that:
661    /// - User overrides take precedence over defaults
662    /// - Missing user configurations fall back to system defaults
663    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
664    pub fn reconcile_with_defaults(user_schema: Option<Schema>) -> Result<Self, SchemaError> {
665        let default_schema = Schema::new_default(KnnIndex::Spann);
666
667        match user_schema {
668            Some(user) => {
669                // Merge defaults with user overrides
670                let merged_defaults =
671                    Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
672
673                // Merge key overrides
674                let mut merged_keys = default_schema.keys.clone();
675                for (key, user_value_types) in user.keys {
676                    if let Some(default_value_types) = merged_keys.get(&key) {
677                        // Merge with existing default key override
678                        let merged_value_types =
679                            Self::merge_value_types(default_value_types, &user_value_types)?;
680                        merged_keys.insert(key, merged_value_types);
681                    } else {
682                        // New key override from user
683                        merged_keys.insert(key, user_value_types);
684                    }
685                }
686
687                Ok(Schema {
688                    defaults: merged_defaults,
689                    keys: merged_keys,
690                })
691            }
692            None => Ok(default_schema),
693        }
694    }
695
696    /// Merge two schemas together, combining key overrides when possible.
697    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
698        if self.defaults != other.defaults {
699            return Err(SchemaError::DefaultsMismatch);
700        }
701
702        let mut keys = self.keys.clone();
703
704        for (key, other_value_types) in &other.keys {
705            if let Some(existing) = keys.get(key).cloned() {
706                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
707                keys.insert(key.clone(), merged);
708            } else {
709                keys.insert(key.clone(), other_value_types.clone());
710            }
711        }
712
713        Ok(Schema {
714            defaults: self.defaults.clone(),
715            keys,
716        })
717    }
718
719    fn merge_override_value_types(
720        key: &str,
721        left: &ValueTypes,
722        right: &ValueTypes,
723    ) -> Result<ValueTypes, SchemaError> {
724        Ok(ValueTypes {
725            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
726            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
727            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
728            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
729            float_list: Self::merge_float_list_override(
730                key,
731                left.float_list.as_ref(),
732                right.float_list.as_ref(),
733            )?,
734            sparse_vector: Self::merge_sparse_vector_override(
735                key,
736                left.sparse_vector.as_ref(),
737                right.sparse_vector.as_ref(),
738            )?,
739        })
740    }
741
742    fn merge_string_override(
743        key: &str,
744        left: Option<&StringValueType>,
745        right: Option<&StringValueType>,
746    ) -> Result<Option<StringValueType>, SchemaError> {
747        match (left, right) {
748            (Some(l), Some(r)) => Ok(Some(StringValueType {
749                string_inverted_index: Self::merge_index_or_error(
750                    l.string_inverted_index.as_ref(),
751                    r.string_inverted_index.as_ref(),
752                    &format!("key '{key}' string.string_inverted_index"),
753                )?,
754                fts_index: Self::merge_index_or_error(
755                    l.fts_index.as_ref(),
756                    r.fts_index.as_ref(),
757                    &format!("key '{key}' string.fts_index"),
758                )?,
759            })),
760            (Some(l), None) => Ok(Some(l.clone())),
761            (None, Some(r)) => Ok(Some(r.clone())),
762            (None, None) => Ok(None),
763        }
764    }
765
766    fn merge_float_override(
767        key: &str,
768        left: Option<&FloatValueType>,
769        right: Option<&FloatValueType>,
770    ) -> Result<Option<FloatValueType>, SchemaError> {
771        match (left, right) {
772            (Some(l), Some(r)) => Ok(Some(FloatValueType {
773                float_inverted_index: Self::merge_index_or_error(
774                    l.float_inverted_index.as_ref(),
775                    r.float_inverted_index.as_ref(),
776                    &format!("key '{key}' float.float_inverted_index"),
777                )?,
778            })),
779            (Some(l), None) => Ok(Some(l.clone())),
780            (None, Some(r)) => Ok(Some(r.clone())),
781            (None, None) => Ok(None),
782        }
783    }
784
785    fn merge_int_override(
786        key: &str,
787        left: Option<&IntValueType>,
788        right: Option<&IntValueType>,
789    ) -> Result<Option<IntValueType>, SchemaError> {
790        match (left, right) {
791            (Some(l), Some(r)) => Ok(Some(IntValueType {
792                int_inverted_index: Self::merge_index_or_error(
793                    l.int_inverted_index.as_ref(),
794                    r.int_inverted_index.as_ref(),
795                    &format!("key '{key}' int.int_inverted_index"),
796                )?,
797            })),
798            (Some(l), None) => Ok(Some(l.clone())),
799            (None, Some(r)) => Ok(Some(r.clone())),
800            (None, None) => Ok(None),
801        }
802    }
803
804    fn merge_bool_override(
805        key: &str,
806        left: Option<&BoolValueType>,
807        right: Option<&BoolValueType>,
808    ) -> Result<Option<BoolValueType>, SchemaError> {
809        match (left, right) {
810            (Some(l), Some(r)) => Ok(Some(BoolValueType {
811                bool_inverted_index: Self::merge_index_or_error(
812                    l.bool_inverted_index.as_ref(),
813                    r.bool_inverted_index.as_ref(),
814                    &format!("key '{key}' bool.bool_inverted_index"),
815                )?,
816            })),
817            (Some(l), None) => Ok(Some(l.clone())),
818            (None, Some(r)) => Ok(Some(r.clone())),
819            (None, None) => Ok(None),
820        }
821    }
822
823    fn merge_float_list_override(
824        key: &str,
825        left: Option<&FloatListValueType>,
826        right: Option<&FloatListValueType>,
827    ) -> Result<Option<FloatListValueType>, SchemaError> {
828        match (left, right) {
829            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
830                vector_index: Self::merge_index_or_error(
831                    l.vector_index.as_ref(),
832                    r.vector_index.as_ref(),
833                    &format!("key '{key}' float_list.vector_index"),
834                )?,
835            })),
836            (Some(l), None) => Ok(Some(l.clone())),
837            (None, Some(r)) => Ok(Some(r.clone())),
838            (None, None) => Ok(None),
839        }
840    }
841
842    fn merge_sparse_vector_override(
843        key: &str,
844        left: Option<&SparseVectorValueType>,
845        right: Option<&SparseVectorValueType>,
846    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
847        match (left, right) {
848            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
849                sparse_vector_index: Self::merge_index_or_error(
850                    l.sparse_vector_index.as_ref(),
851                    r.sparse_vector_index.as_ref(),
852                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
853                )?,
854            })),
855            (Some(l), None) => Ok(Some(l.clone())),
856            (None, Some(r)) => Ok(Some(r.clone())),
857            (None, None) => Ok(None),
858        }
859    }
860
861    fn merge_index_or_error<T: Clone + PartialEq>(
862        left: Option<&T>,
863        right: Option<&T>,
864        context: &str,
865    ) -> Result<Option<T>, SchemaError> {
866        match (left, right) {
867            (Some(l), Some(r)) => {
868                if l == r {
869                    Ok(Some(l.clone()))
870                } else {
871                    Err(SchemaError::ConfigurationConflict {
872                        context: context.to_string(),
873                    })
874                }
875            }
876            (Some(l), None) => Ok(Some(l.clone())),
877            (None, Some(r)) => Ok(Some(r.clone())),
878            (None, None) => Ok(None),
879        }
880    }
881
882    /// Merge two ValueTypes with field-level merging
883    /// User values take precedence over default values
884    fn merge_value_types(
885        default: &ValueTypes,
886        user: &ValueTypes,
887    ) -> Result<ValueTypes, SchemaError> {
888        // Merge float_list first
889        let float_list =
890            Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
891
892        // Validate the merged float_list (covers all merge cases)
893        if let Some(ref fl) = float_list {
894            Self::validate_float_list_value_type(fl)?;
895        }
896
897        Ok(ValueTypes {
898            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
899            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
900            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
901            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
902            float_list,
903            sparse_vector: Self::merge_sparse_vector_type(
904                default.sparse_vector.as_ref(),
905                user.sparse_vector.as_ref(),
906            )?,
907        })
908    }
909
910    /// Merge StringValueType configurations
911    fn merge_string_type(
912        default: Option<&StringValueType>,
913        user: Option<&StringValueType>,
914    ) -> Result<Option<StringValueType>, SchemaError> {
915        match (default, user) {
916            (Some(default), Some(user)) => Ok(Some(StringValueType {
917                string_inverted_index: Self::merge_string_inverted_index_type(
918                    default.string_inverted_index.as_ref(),
919                    user.string_inverted_index.as_ref(),
920                )?,
921                fts_index: Self::merge_fts_index_type(
922                    default.fts_index.as_ref(),
923                    user.fts_index.as_ref(),
924                )?,
925            })),
926            (Some(default), None) => Ok(Some(default.clone())),
927            (None, Some(user)) => Ok(Some(user.clone())),
928            (None, None) => Ok(None),
929        }
930    }
931
932    /// Merge FloatValueType configurations
933    fn merge_float_type(
934        default: Option<&FloatValueType>,
935        user: Option<&FloatValueType>,
936    ) -> Result<Option<FloatValueType>, SchemaError> {
937        match (default, user) {
938            (Some(default), Some(user)) => Ok(Some(FloatValueType {
939                float_inverted_index: Self::merge_float_inverted_index_type(
940                    default.float_inverted_index.as_ref(),
941                    user.float_inverted_index.as_ref(),
942                )?,
943            })),
944            (Some(default), None) => Ok(Some(default.clone())),
945            (None, Some(user)) => Ok(Some(user.clone())),
946            (None, None) => Ok(None),
947        }
948    }
949
950    /// Merge IntValueType configurations
951    fn merge_int_type(
952        default: Option<&IntValueType>,
953        user: Option<&IntValueType>,
954    ) -> Result<Option<IntValueType>, SchemaError> {
955        match (default, user) {
956            (Some(default), Some(user)) => Ok(Some(IntValueType {
957                int_inverted_index: Self::merge_int_inverted_index_type(
958                    default.int_inverted_index.as_ref(),
959                    user.int_inverted_index.as_ref(),
960                )?,
961            })),
962            (Some(default), None) => Ok(Some(default.clone())),
963            (None, Some(user)) => Ok(Some(user.clone())),
964            (None, None) => Ok(None),
965        }
966    }
967
968    /// Merge BoolValueType configurations
969    fn merge_bool_type(
970        default: Option<&BoolValueType>,
971        user: Option<&BoolValueType>,
972    ) -> Result<Option<BoolValueType>, SchemaError> {
973        match (default, user) {
974            (Some(default), Some(user)) => Ok(Some(BoolValueType {
975                bool_inverted_index: Self::merge_bool_inverted_index_type(
976                    default.bool_inverted_index.as_ref(),
977                    user.bool_inverted_index.as_ref(),
978                )?,
979            })),
980            (Some(default), None) => Ok(Some(default.clone())),
981            (None, Some(user)) => Ok(Some(user.clone())),
982            (None, None) => Ok(None),
983        }
984    }
985
986    /// Merge FloatListValueType configurations
987    fn merge_float_list_type(
988        default: Option<&FloatListValueType>,
989        user: Option<&FloatListValueType>,
990    ) -> Option<FloatListValueType> {
991        match (default, user) {
992            (Some(default), Some(user)) => Some(FloatListValueType {
993                vector_index: Self::merge_vector_index_type(
994                    default.vector_index.as_ref(),
995                    user.vector_index.as_ref(),
996                ),
997            }),
998            (Some(default), None) => Some(default.clone()),
999            (None, Some(user)) => Some(user.clone()),
1000            (None, None) => None,
1001        }
1002    }
1003
1004    /// Merge SparseVectorValueType configurations
1005    fn merge_sparse_vector_type(
1006        default: Option<&SparseVectorValueType>,
1007        user: Option<&SparseVectorValueType>,
1008    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1009        match (default, user) {
1010            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1011                sparse_vector_index: Self::merge_sparse_vector_index_type(
1012                    default.sparse_vector_index.as_ref(),
1013                    user.sparse_vector_index.as_ref(),
1014                )?,
1015            })),
1016            (Some(default), None) => Ok(Some(default.clone())),
1017            (None, Some(user)) => Ok(Some(user.clone())),
1018            (None, None) => Ok(None),
1019        }
1020    }
1021
1022    /// Merge individual index type configurations
1023    fn merge_string_inverted_index_type(
1024        default: Option<&StringInvertedIndexType>,
1025        user: Option<&StringInvertedIndexType>,
1026    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1027        match (default, user) {
1028            (Some(_default), Some(user)) => {
1029                Ok(Some(StringInvertedIndexType {
1030                    enabled: user.enabled,       // User enabled state takes precedence
1031                    config: user.config.clone(), // User config takes precedence
1032                }))
1033            }
1034            (Some(default), None) => Ok(Some(default.clone())),
1035            (None, Some(user)) => Ok(Some(user.clone())),
1036            (None, None) => Ok(None),
1037        }
1038    }
1039
1040    fn merge_fts_index_type(
1041        default: Option<&FtsIndexType>,
1042        user: Option<&FtsIndexType>,
1043    ) -> Result<Option<FtsIndexType>, SchemaError> {
1044        match (default, user) {
1045            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1046                enabled: user.enabled,
1047                config: user.config.clone(),
1048            })),
1049            (Some(default), None) => Ok(Some(default.clone())),
1050            (None, Some(user)) => Ok(Some(user.clone())),
1051            (None, None) => Ok(None),
1052        }
1053    }
1054
1055    fn merge_float_inverted_index_type(
1056        default: Option<&FloatInvertedIndexType>,
1057        user: Option<&FloatInvertedIndexType>,
1058    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1059        match (default, user) {
1060            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1061                enabled: user.enabled,
1062                config: user.config.clone(),
1063            })),
1064            (Some(default), None) => Ok(Some(default.clone())),
1065            (None, Some(user)) => Ok(Some(user.clone())),
1066            (None, None) => Ok(None),
1067        }
1068    }
1069
1070    fn merge_int_inverted_index_type(
1071        default: Option<&IntInvertedIndexType>,
1072        user: Option<&IntInvertedIndexType>,
1073    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1074        match (default, user) {
1075            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1076                enabled: user.enabled,
1077                config: user.config.clone(),
1078            })),
1079            (Some(default), None) => Ok(Some(default.clone())),
1080            (None, Some(user)) => Ok(Some(user.clone())),
1081            (None, None) => Ok(None),
1082        }
1083    }
1084
1085    fn merge_bool_inverted_index_type(
1086        default: Option<&BoolInvertedIndexType>,
1087        user: Option<&BoolInvertedIndexType>,
1088    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1089        match (default, user) {
1090            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1091                enabled: user.enabled,
1092                config: user.config.clone(),
1093            })),
1094            (Some(default), None) => Ok(Some(default.clone())),
1095            (None, Some(user)) => Ok(Some(user.clone())),
1096            (None, None) => Ok(None),
1097        }
1098    }
1099
1100    fn merge_vector_index_type(
1101        default: Option<&VectorIndexType>,
1102        user: Option<&VectorIndexType>,
1103    ) -> Option<VectorIndexType> {
1104        match (default, user) {
1105            (Some(default), Some(user)) => Some(VectorIndexType {
1106                enabled: user.enabled,
1107                config: Self::merge_vector_index_config(&default.config, &user.config),
1108            }),
1109            (Some(default), None) => Some(default.clone()),
1110            (None, Some(user)) => Some(user.clone()),
1111            (None, None) => None,
1112        }
1113    }
1114
1115    fn merge_sparse_vector_index_type(
1116        default: Option<&SparseVectorIndexType>,
1117        user: Option<&SparseVectorIndexType>,
1118    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1119        match (default, user) {
1120            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1121                enabled: user.enabled,
1122                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1123            })),
1124            (Some(default), None) => Ok(Some(default.clone())),
1125            (None, Some(user)) => Ok(Some(user.clone())),
1126            (None, None) => Ok(None),
1127        }
1128    }
1129
1130    /// Validate FloatListValueType vector index configurations
1131    /// This validates HNSW and SPANN configs within the merged float_list
1132    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1133        if let Some(vector_index) = &float_list.vector_index {
1134            if let Some(hnsw) = &vector_index.config.hnsw {
1135                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1136            }
1137            if let Some(spann) = &vector_index.config.spann {
1138                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1139            }
1140        }
1141        Ok(())
1142    }
1143
1144    /// Merge VectorIndexConfig with field-level merging
1145    fn merge_vector_index_config(
1146        default: &VectorIndexConfig,
1147        user: &VectorIndexConfig,
1148    ) -> VectorIndexConfig {
1149        VectorIndexConfig {
1150            space: user.space.clone().or(default.space.clone()),
1151            embedding_function: user
1152                .embedding_function
1153                .clone()
1154                .or(default.embedding_function.clone()),
1155            source_key: user.source_key.clone().or(default.source_key.clone()),
1156            hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1157            spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1158        }
1159    }
1160
1161    /// Merge SparseVectorIndexConfig with field-level merging
1162    fn merge_sparse_vector_index_config(
1163        default: &SparseVectorIndexConfig,
1164        user: &SparseVectorIndexConfig,
1165    ) -> SparseVectorIndexConfig {
1166        SparseVectorIndexConfig {
1167            embedding_function: user
1168                .embedding_function
1169                .clone()
1170                .or(default.embedding_function.clone()),
1171            source_key: user.source_key.clone().or(default.source_key.clone()),
1172            bm25: user.bm25.or(default.bm25),
1173        }
1174    }
1175
1176    /// Merge HNSW configurations with field-level merging
1177    fn merge_hnsw_configs(
1178        default_hnsw: Option<&HnswIndexConfig>,
1179        user_hnsw: Option<&HnswIndexConfig>,
1180    ) -> Option<HnswIndexConfig> {
1181        match (default_hnsw, user_hnsw) {
1182            (Some(default), Some(user)) => Some(HnswIndexConfig {
1183                ef_construction: user.ef_construction.or(default.ef_construction),
1184                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1185                ef_search: user.ef_search.or(default.ef_search),
1186                num_threads: user.num_threads.or(default.num_threads),
1187                batch_size: user.batch_size.or(default.batch_size),
1188                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1189                resize_factor: user.resize_factor.or(default.resize_factor),
1190            }),
1191            (Some(default), None) => Some(default.clone()),
1192            (None, Some(user)) => Some(user.clone()),
1193            (None, None) => None,
1194        }
1195    }
1196
1197    /// Merge SPANN configurations with field-level merging
1198    fn merge_spann_configs(
1199        default_spann: Option<&SpannIndexConfig>,
1200        user_spann: Option<&SpannIndexConfig>,
1201    ) -> Option<SpannIndexConfig> {
1202        match (default_spann, user_spann) {
1203            (Some(default), Some(user)) => Some(SpannIndexConfig {
1204                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1205                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1206                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1207                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1208                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1209                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1210                split_threshold: user.split_threshold.or(default.split_threshold),
1211                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1212                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1213                reassign_neighbor_count: user
1214                    .reassign_neighbor_count
1215                    .or(default.reassign_neighbor_count),
1216                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1217                num_centers_to_merge_to: user
1218                    .num_centers_to_merge_to
1219                    .or(default.num_centers_to_merge_to),
1220                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1221                ef_construction: user.ef_construction.or(default.ef_construction),
1222                ef_search: user.ef_search.or(default.ef_search),
1223                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1224            }),
1225            (Some(default), None) => Some(default.clone()),
1226            (None, Some(user)) => Some(user.clone()),
1227            (None, None) => None,
1228        }
1229    }
1230
1231    /// Reconcile Schema with InternalCollectionConfiguration
1232    ///
1233    /// Simple reconciliation logic:
1234    /// 1. If collection config is default → return schema (schema is source of truth)
1235    /// 2. If collection config is non-default and schema is non-default → error (both set)
1236    /// 3. If collection config is non-default and schema is default → override schema with collection config
1237    pub fn reconcile_with_collection_config(
1238        schema: Schema,
1239        collection_config: InternalCollectionConfiguration,
1240    ) -> Result<Schema, SchemaError> {
1241        // 1. Check if collection config is default
1242        if collection_config.is_default() {
1243            // Collection config is default → schema is source of truth
1244            return Ok(schema);
1245        }
1246
1247        // 2. Collection config is non-default, check if schema is also non-default
1248        if !Self::is_schema_default(&schema) {
1249            // Both are non-default → error
1250            return Err(SchemaError::ConfigAndSchemaConflict);
1251        }
1252
1253        // 3. Collection config is non-default, schema is default → override schema with collection config
1254        Self::convert_collection_config_to_schema(collection_config)
1255    }
1256
1257    pub fn reconcile_schema_and_config(
1258        schema: Option<Schema>,
1259        configuration: Option<InternalCollectionConfiguration>,
1260    ) -> Result<Schema, SchemaError> {
1261        let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1262        if let Some(config) = configuration {
1263            Self::reconcile_with_collection_config(reconciled_schema, config)
1264        } else {
1265            Ok(reconciled_schema)
1266        }
1267    }
1268
1269    pub fn default_with_embedding_function(
1270        embedding_function: EmbeddingFunctionConfiguration,
1271    ) -> Schema {
1272        let mut schema = Schema::new_default(KnnIndex::Spann);
1273        if let Some(float_list) = &mut schema.defaults.float_list {
1274            if let Some(vector_index) = &mut float_list.vector_index {
1275                vector_index.config.embedding_function = Some(embedding_function.clone());
1276            }
1277        }
1278        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1279            if let Some(float_list) = &mut embedding_types.float_list {
1280                if let Some(vector_index) = &mut float_list.vector_index {
1281                    vector_index.config.embedding_function = Some(embedding_function);
1282                }
1283            }
1284        }
1285        schema
1286    }
1287
1288    /// Check if schema is default by comparing it word-by-word with new_default
1289    fn is_schema_default(schema: &Schema) -> bool {
1290        // Compare with both possible default schemas (HNSW and SPANN)
1291        let default_hnsw = Schema::new_default(KnnIndex::Hnsw);
1292        let default_spann = Schema::new_default(KnnIndex::Spann);
1293
1294        schema == &default_hnsw || schema == &default_spann
1295    }
1296
1297    /// Convert InternalCollectionConfiguration to Schema
1298    fn convert_collection_config_to_schema(
1299        collection_config: InternalCollectionConfiguration,
1300    ) -> Result<Schema, SchemaError> {
1301        // Start with a default schema structure
1302        let mut schema = Schema::new_default(KnnIndex::Spann); // Default to HNSW, will be overridden
1303
1304        // Convert vector index configuration
1305        let vector_config = match collection_config.vector_index {
1306            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1307                space: Some(hnsw_config.space),
1308                embedding_function: collection_config.embedding_function,
1309                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1310                hnsw: Some(HnswIndexConfig {
1311                    ef_construction: Some(hnsw_config.ef_construction),
1312                    max_neighbors: Some(hnsw_config.max_neighbors),
1313                    ef_search: Some(hnsw_config.ef_search),
1314                    num_threads: Some(hnsw_config.num_threads),
1315                    batch_size: Some(hnsw_config.batch_size),
1316                    sync_threshold: Some(hnsw_config.sync_threshold),
1317                    resize_factor: Some(hnsw_config.resize_factor),
1318                }),
1319                spann: None,
1320            },
1321            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1322                space: Some(spann_config.space),
1323                embedding_function: collection_config.embedding_function,
1324                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1325                hnsw: None,
1326                spann: Some(SpannIndexConfig {
1327                    search_nprobe: Some(spann_config.search_nprobe),
1328                    search_rng_factor: Some(spann_config.search_rng_factor),
1329                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1330                    nreplica_count: Some(spann_config.nreplica_count),
1331                    write_rng_factor: Some(spann_config.write_rng_factor),
1332                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1333                    split_threshold: Some(spann_config.split_threshold),
1334                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1335                    initial_lambda: Some(spann_config.initial_lambda),
1336                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1337                    merge_threshold: Some(spann_config.merge_threshold),
1338                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1339                    write_nprobe: Some(spann_config.write_nprobe),
1340                    ef_construction: Some(spann_config.ef_construction),
1341                    ef_search: Some(spann_config.ef_search),
1342                    max_neighbors: Some(spann_config.max_neighbors),
1343                }),
1344            },
1345        };
1346
1347        // Update defaults (keep enabled=false, just update the config)
1348        // This serves as the template for any new float_list fields
1349        if let Some(float_list) = &mut schema.defaults.float_list {
1350            if let Some(vector_index) = &mut float_list.vector_index {
1351                vector_index.config = vector_config.clone();
1352            }
1353        }
1354
1355        // Update the vector_index in the existing #embedding key override
1356        // Keep enabled=true (already set by new_default) and update the config
1357        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1358            if let Some(float_list) = &mut embedding_types.float_list {
1359                if let Some(vector_index) = &mut float_list.vector_index {
1360                    vector_index.config = vector_config;
1361                }
1362            }
1363        }
1364
1365        Ok(schema)
1366    }
1367
1368    /// Check if a specific metadata key-value should be indexed based on schema configuration
1369    pub fn is_metadata_type_index_enabled(
1370        &self,
1371        key: &str,
1372        value_type: MetadataValueType,
1373    ) -> Result<bool, SchemaError> {
1374        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1375
1376        match value_type {
1377            MetadataValueType::Bool => match &v_type.boolean {
1378                Some(bool_type) => match &bool_type.bool_inverted_index {
1379                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1380                    None => Err(SchemaError::MissingIndexConfiguration {
1381                        key: key.to_string(),
1382                        value_type: "bool".to_string(),
1383                    }),
1384                },
1385                None => match &self.defaults.boolean {
1386                    Some(bool_type) => match &bool_type.bool_inverted_index {
1387                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1388                        None => Err(SchemaError::MissingIndexConfiguration {
1389                            key: key.to_string(),
1390                            value_type: "bool".to_string(),
1391                        }),
1392                    },
1393                    None => Err(SchemaError::MissingIndexConfiguration {
1394                        key: key.to_string(),
1395                        value_type: "bool".to_string(),
1396                    }),
1397                },
1398            },
1399            MetadataValueType::Int => match &v_type.int {
1400                Some(int_type) => match &int_type.int_inverted_index {
1401                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1402                    None => Err(SchemaError::MissingIndexConfiguration {
1403                        key: key.to_string(),
1404                        value_type: "int".to_string(),
1405                    }),
1406                },
1407                None => match &self.defaults.int {
1408                    Some(int_type) => match &int_type.int_inverted_index {
1409                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1410                        None => Err(SchemaError::MissingIndexConfiguration {
1411                            key: key.to_string(),
1412                            value_type: "int".to_string(),
1413                        }),
1414                    },
1415                    None => Err(SchemaError::MissingIndexConfiguration {
1416                        key: key.to_string(),
1417                        value_type: "int".to_string(),
1418                    }),
1419                },
1420            },
1421            MetadataValueType::Float => match &v_type.float {
1422                Some(float_type) => match &float_type.float_inverted_index {
1423                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1424                    None => Err(SchemaError::MissingIndexConfiguration {
1425                        key: key.to_string(),
1426                        value_type: "float".to_string(),
1427                    }),
1428                },
1429                None => match &self.defaults.float {
1430                    Some(float_type) => match &float_type.float_inverted_index {
1431                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1432                        None => Err(SchemaError::MissingIndexConfiguration {
1433                            key: key.to_string(),
1434                            value_type: "float".to_string(),
1435                        }),
1436                    },
1437                    None => Err(SchemaError::MissingIndexConfiguration {
1438                        key: key.to_string(),
1439                        value_type: "float".to_string(),
1440                    }),
1441                },
1442            },
1443            MetadataValueType::Str => match &v_type.string {
1444                Some(string_type) => match &string_type.string_inverted_index {
1445                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1446                    None => Err(SchemaError::MissingIndexConfiguration {
1447                        key: key.to_string(),
1448                        value_type: "string".to_string(),
1449                    }),
1450                },
1451                None => match &self.defaults.string {
1452                    Some(string_type) => match &string_type.string_inverted_index {
1453                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1454                        None => Err(SchemaError::MissingIndexConfiguration {
1455                            key: key.to_string(),
1456                            value_type: "string".to_string(),
1457                        }),
1458                    },
1459                    None => Err(SchemaError::MissingIndexConfiguration {
1460                        key: key.to_string(),
1461                        value_type: "string".to_string(),
1462                    }),
1463                },
1464            },
1465            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1466                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1467                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1468                    None => Err(SchemaError::MissingIndexConfiguration {
1469                        key: key.to_string(),
1470                        value_type: "sparse_vector".to_string(),
1471                    }),
1472                },
1473                None => match &self.defaults.sparse_vector {
1474                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1475                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1476                        None => Err(SchemaError::MissingIndexConfiguration {
1477                            key: key.to_string(),
1478                            value_type: "sparse_vector".to_string(),
1479                        }),
1480                    },
1481                    None => Err(SchemaError::MissingIndexConfiguration {
1482                        key: key.to_string(),
1483                        value_type: "sparse_vector".to_string(),
1484                    }),
1485                },
1486            },
1487        }
1488    }
1489
1490    pub fn is_metadata_where_indexing_enabled(
1491        &self,
1492        where_clause: &Where,
1493    ) -> Result<(), FilterValidationError> {
1494        match where_clause {
1495            Where::Composite(composite) => {
1496                for child in &composite.children {
1497                    self.is_metadata_where_indexing_enabled(child)?;
1498                }
1499                Ok(())
1500            }
1501            Where::Document(_) => Ok(()),
1502            Where::Metadata(expression) => {
1503                let value_type = match &expression.comparison {
1504                    MetadataComparison::Primitive(_, value) => value.value_type(),
1505                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1506                };
1507                let is_enabled = self
1508                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1509                    .map_err(FilterValidationError::Schema)?;
1510                if !is_enabled {
1511                    return Err(FilterValidationError::IndexingDisabled {
1512                        key: expression.key.clone(),
1513                        value_type,
1514                    });
1515                }
1516                Ok(())
1517            }
1518        }
1519    }
1520
1521    pub fn is_knn_key_indexing_enabled(
1522        &self,
1523        key: &str,
1524        query: &QueryVector,
1525    ) -> Result<(), FilterValidationError> {
1526        match query {
1527            QueryVector::Sparse(_) => {
1528                let is_enabled = self
1529                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1530                    .map_err(FilterValidationError::Schema)?;
1531                if !is_enabled {
1532                    return Err(FilterValidationError::IndexingDisabled {
1533                        key: key.to_string(),
1534                        value_type: MetadataValueType::SparseVector,
1535                    });
1536                }
1537                Ok(())
1538            }
1539            QueryVector::Dense(_) => {
1540                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1541                // Dense vectors are always indexed
1542                Ok(())
1543            }
1544        }
1545    }
1546
1547    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1548        let value_types = self.keys.entry(key.to_string()).or_default();
1549        match value_type {
1550            MetadataValueType::Bool => {
1551                if value_types.boolean.is_none() {
1552                    value_types.boolean = self.defaults.boolean.clone();
1553                    return true;
1554                }
1555            }
1556            MetadataValueType::Int => {
1557                if value_types.int.is_none() {
1558                    value_types.int = self.defaults.int.clone();
1559                    return true;
1560                }
1561            }
1562            MetadataValueType::Float => {
1563                if value_types.float.is_none() {
1564                    value_types.float = self.defaults.float.clone();
1565                    return true;
1566                }
1567            }
1568            MetadataValueType::Str => {
1569                if value_types.string.is_none() {
1570                    value_types.string = self.defaults.string.clone();
1571                    return true;
1572                }
1573            }
1574            MetadataValueType::SparseVector => {
1575                if value_types.sparse_vector.is_none() {
1576                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
1577                    return true;
1578                }
1579            }
1580        }
1581        false
1582    }
1583
1584    // ========================================================================
1585    // BUILDER PATTERN METHODS
1586    // ========================================================================
1587
1588    /// Create an index configuration (builder pattern)
1589    ///
1590    /// This method allows fluent, chainable configuration of indexes on a schema.
1591    /// It matches the Python API's `.create_index()` method.
1592    ///
1593    /// # Arguments
1594    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
1595    /// * `config` - Index configuration to create
1596    ///
1597    /// # Returns
1598    /// `Self` for method chaining
1599    ///
1600    /// # Errors
1601    /// Returns error if:
1602    /// - Attempting to create index on special keys (`#document`, `#embedding`)
1603    /// - Invalid configuration (e.g., vector index on non-embedding key)
1604    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
1605    ///
1606    /// # Examples
1607    /// ```
1608    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space};
1609    ///
1610    /// # fn main() -> Result<(), SchemaBuilderError> {
1611    /// let schema = Schema::default()
1612    ///     .create_index(None, VectorIndexConfig {
1613    ///         space: Some(Space::Cosine),
1614    ///         embedding_function: None,
1615    ///         source_key: None,
1616    ///         hnsw: None,
1617    ///         spann: None,
1618    ///     }.into())?
1619    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
1620    /// # Ok(())
1621    /// # }
1622    /// ```
1623    pub fn create_index(
1624        mut self,
1625        key: Option<&str>,
1626        config: IndexConfig,
1627    ) -> Result<Self, SchemaBuilderError> {
1628        // Handle special cases: Vector and FTS (global configs only)
1629        match (&key, &config) {
1630            (None, IndexConfig::Vector(cfg)) => {
1631                self._set_vector_index_config_builder(cfg.clone());
1632                return Ok(self);
1633            }
1634            (None, IndexConfig::Fts(cfg)) => {
1635                self._set_fts_index_config_builder(cfg.clone());
1636                return Ok(self);
1637            }
1638            (Some(k), IndexConfig::Vector(_)) => {
1639                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
1640            }
1641            (Some(k), IndexConfig::Fts(_)) => {
1642                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
1643            }
1644            _ => {}
1645        }
1646
1647        // Validate special keys
1648        if let Some(k) = key {
1649            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1650                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1651                    key: k.to_string(),
1652                });
1653            }
1654        }
1655
1656        // Validate sparse vector requires key
1657        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
1658            return Err(SchemaBuilderError::SparseVectorRequiresKey);
1659        }
1660
1661        // Dispatch to appropriate helper
1662        match key {
1663            Some(k) => self._set_index_for_key_builder(k, config, true)?,
1664            None => self._set_index_in_defaults_builder(config, true)?,
1665        }
1666
1667        Ok(self)
1668    }
1669
1670    /// Delete/disable an index configuration (builder pattern)
1671    ///
1672    /// This method allows disabling indexes on a schema.
1673    /// It matches the Python API's `.delete_index()` method.
1674    ///
1675    /// # Arguments
1676    /// * `key` - Optional key name for per-key index. `None` applies to defaults
1677    /// * `config` - Index configuration to disable
1678    ///
1679    /// # Returns
1680    /// `Self` for method chaining
1681    ///
1682    /// # Errors
1683    /// Returns error if:
1684    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
1685    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
1686    ///
1687    /// # Examples
1688    /// ```
1689    /// use chroma_types::{Schema, StringInvertedIndexConfig};
1690    ///
1691    /// # fn main() -> Result<(), SchemaBuilderError> {
1692    /// let schema = Schema::default()
1693    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
1694    /// # Ok(())
1695    /// # }
1696    /// ```
1697    pub fn delete_index(
1698        mut self,
1699        key: Option<&str>,
1700        config: IndexConfig,
1701    ) -> Result<Self, SchemaBuilderError> {
1702        // Validate special keys
1703        if let Some(k) = key {
1704            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1705                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1706                    key: k.to_string(),
1707                });
1708            }
1709        }
1710
1711        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
1712        match &config {
1713            IndexConfig::Vector(_) => {
1714                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
1715            }
1716            IndexConfig::Fts(_) => {
1717                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
1718            }
1719            IndexConfig::SparseVector(_) => {
1720                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
1721            }
1722            _ => {}
1723        }
1724
1725        // Dispatch to appropriate helper (enabled=false)
1726        match key {
1727            Some(k) => self._set_index_for_key_builder(k, config, false)?,
1728            None => self._set_index_in_defaults_builder(config, false)?,
1729        }
1730
1731        Ok(self)
1732    }
1733
1734    /// Set vector index config globally (applies to #embedding)
1735    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
1736        // Update defaults (disabled, just config update)
1737        if let Some(float_list) = &mut self.defaults.float_list {
1738            if let Some(vector_index) = &mut float_list.vector_index {
1739                vector_index.config = config.clone();
1740            }
1741        }
1742
1743        // Update #embedding key (enabled, config update, preserve source_key=#document)
1744        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
1745            if let Some(float_list) = &mut embedding_types.float_list {
1746                if let Some(vector_index) = &mut float_list.vector_index {
1747                    let mut updated_config = config;
1748                    // Preserve source_key as #document
1749                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
1750                    vector_index.config = updated_config;
1751                }
1752            }
1753        }
1754    }
1755
1756    /// Set FTS index config globally (applies to #document)
1757    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
1758        // Update defaults (disabled, just config update)
1759        if let Some(string) = &mut self.defaults.string {
1760            if let Some(fts_index) = &mut string.fts_index {
1761                fts_index.config = config.clone();
1762            }
1763        }
1764
1765        // Update #document key (enabled, config update)
1766        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
1767            if let Some(string) = &mut document_types.string {
1768                if let Some(fts_index) = &mut string.fts_index {
1769                    fts_index.config = config;
1770                }
1771            }
1772        }
1773    }
1774
1775    /// Set index configuration for a specific key
1776    fn _set_index_for_key_builder(
1777        &mut self,
1778        key: &str,
1779        config: IndexConfig,
1780        enabled: bool,
1781    ) -> Result<(), SchemaBuilderError> {
1782        // Check for multiple sparse vector indexes BEFORE getting mutable reference
1783        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
1784            // Find existing sparse vector index
1785            let existing_key = self
1786                .keys
1787                .iter()
1788                .find(|(k, v)| {
1789                    k.as_str() != key
1790                        && v.sparse_vector
1791                            .as_ref()
1792                            .and_then(|sv| sv.sparse_vector_index.as_ref())
1793                            .map(|idx| idx.enabled)
1794                            .unwrap_or(false)
1795                })
1796                .map(|(k, _)| k.clone());
1797
1798            if let Some(existing_key) = existing_key {
1799                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
1800            }
1801        }
1802
1803        // Get or create ValueTypes for this key
1804        let value_types = self.keys.entry(key.to_string()).or_default();
1805
1806        // Set the appropriate index based on config type
1807        match config {
1808            IndexConfig::Vector(_) => {
1809                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
1810                    key: key.to_string(),
1811                });
1812            }
1813            IndexConfig::Fts(_) => {
1814                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
1815                    key: key.to_string(),
1816                });
1817            }
1818            IndexConfig::SparseVector(cfg) => {
1819                value_types.sparse_vector = Some(SparseVectorValueType {
1820                    sparse_vector_index: Some(SparseVectorIndexType {
1821                        enabled,
1822                        config: cfg,
1823                    }),
1824                });
1825            }
1826            IndexConfig::StringInverted(cfg) => {
1827                if value_types.string.is_none() {
1828                    value_types.string = Some(StringValueType {
1829                        fts_index: None,
1830                        string_inverted_index: None,
1831                    });
1832                }
1833                if let Some(string) = &mut value_types.string {
1834                    string.string_inverted_index = Some(StringInvertedIndexType {
1835                        enabled,
1836                        config: cfg,
1837                    });
1838                }
1839            }
1840            IndexConfig::IntInverted(cfg) => {
1841                value_types.int = Some(IntValueType {
1842                    int_inverted_index: Some(IntInvertedIndexType {
1843                        enabled,
1844                        config: cfg,
1845                    }),
1846                });
1847            }
1848            IndexConfig::FloatInverted(cfg) => {
1849                value_types.float = Some(FloatValueType {
1850                    float_inverted_index: Some(FloatInvertedIndexType {
1851                        enabled,
1852                        config: cfg,
1853                    }),
1854                });
1855            }
1856            IndexConfig::BoolInverted(cfg) => {
1857                value_types.boolean = Some(BoolValueType {
1858                    bool_inverted_index: Some(BoolInvertedIndexType {
1859                        enabled,
1860                        config: cfg,
1861                    }),
1862                });
1863            }
1864        }
1865
1866        Ok(())
1867    }
1868
1869    /// Set index configuration in defaults
1870    fn _set_index_in_defaults_builder(
1871        &mut self,
1872        config: IndexConfig,
1873        enabled: bool,
1874    ) -> Result<(), SchemaBuilderError> {
1875        match config {
1876            IndexConfig::Vector(_) => {
1877                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
1878                    key: "defaults".to_string(),
1879                });
1880            }
1881            IndexConfig::Fts(_) => {
1882                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
1883                    key: "defaults".to_string(),
1884                });
1885            }
1886            IndexConfig::SparseVector(cfg) => {
1887                self.defaults.sparse_vector = Some(SparseVectorValueType {
1888                    sparse_vector_index: Some(SparseVectorIndexType {
1889                        enabled,
1890                        config: cfg,
1891                    }),
1892                });
1893            }
1894            IndexConfig::StringInverted(cfg) => {
1895                if self.defaults.string.is_none() {
1896                    self.defaults.string = Some(StringValueType {
1897                        fts_index: None,
1898                        string_inverted_index: None,
1899                    });
1900                }
1901                if let Some(string) = &mut self.defaults.string {
1902                    string.string_inverted_index = Some(StringInvertedIndexType {
1903                        enabled,
1904                        config: cfg,
1905                    });
1906                }
1907            }
1908            IndexConfig::IntInverted(cfg) => {
1909                self.defaults.int = Some(IntValueType {
1910                    int_inverted_index: Some(IntInvertedIndexType {
1911                        enabled,
1912                        config: cfg,
1913                    }),
1914                });
1915            }
1916            IndexConfig::FloatInverted(cfg) => {
1917                self.defaults.float = Some(FloatValueType {
1918                    float_inverted_index: Some(FloatInvertedIndexType {
1919                        enabled,
1920                        config: cfg,
1921                    }),
1922                });
1923            }
1924            IndexConfig::BoolInverted(cfg) => {
1925                self.defaults.boolean = Some(BoolValueType {
1926                    bool_inverted_index: Some(BoolInvertedIndexType {
1927                        enabled,
1928                        config: cfg,
1929                    }),
1930                });
1931            }
1932        }
1933
1934        Ok(())
1935    }
1936}
1937
1938// ============================================================================
1939// INDEX CONFIGURATION STRUCTURES
1940// ============================================================================
1941
1942#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1943#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1944#[serde(deny_unknown_fields)]
1945pub struct VectorIndexConfig {
1946    /// Vector space for similarity calculation (cosine, l2, ip)
1947    #[serde(skip_serializing_if = "Option::is_none")]
1948    pub space: Option<Space>,
1949    /// Embedding function configuration
1950    #[serde(skip_serializing_if = "Option::is_none")]
1951    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1952    /// Key to source the vector from
1953    #[serde(skip_serializing_if = "Option::is_none")]
1954    pub source_key: Option<String>,
1955    /// HNSW algorithm configuration
1956    #[serde(skip_serializing_if = "Option::is_none")]
1957    pub hnsw: Option<HnswIndexConfig>,
1958    /// SPANN algorithm configuration
1959    #[serde(skip_serializing_if = "Option::is_none")]
1960    pub spann: Option<SpannIndexConfig>,
1961}
1962
1963/// Configuration for HNSW vector index algorithm parameters
1964#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1965#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1966#[serde(deny_unknown_fields)]
1967pub struct HnswIndexConfig {
1968    #[serde(skip_serializing_if = "Option::is_none")]
1969    pub ef_construction: Option<usize>,
1970    #[serde(skip_serializing_if = "Option::is_none")]
1971    pub max_neighbors: Option<usize>,
1972    #[serde(skip_serializing_if = "Option::is_none")]
1973    pub ef_search: Option<usize>,
1974    #[serde(skip_serializing_if = "Option::is_none")]
1975    pub num_threads: Option<usize>,
1976    #[serde(skip_serializing_if = "Option::is_none")]
1977    #[validate(range(min = 2))]
1978    pub batch_size: Option<usize>,
1979    #[serde(skip_serializing_if = "Option::is_none")]
1980    #[validate(range(min = 2))]
1981    pub sync_threshold: Option<usize>,
1982    #[serde(skip_serializing_if = "Option::is_none")]
1983    pub resize_factor: Option<f64>,
1984}
1985
1986/// Configuration for SPANN vector index algorithm parameters
1987#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1988#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1989#[serde(deny_unknown_fields)]
1990pub struct SpannIndexConfig {
1991    #[serde(skip_serializing_if = "Option::is_none")]
1992    #[validate(range(max = 128))]
1993    pub search_nprobe: Option<u32>,
1994    #[serde(skip_serializing_if = "Option::is_none")]
1995    #[validate(range(min = 1.0, max = 1.0))]
1996    pub search_rng_factor: Option<f32>,
1997    #[serde(skip_serializing_if = "Option::is_none")]
1998    #[validate(range(min = 5.0, max = 10.0))]
1999    pub search_rng_epsilon: Option<f32>,
2000    #[serde(skip_serializing_if = "Option::is_none")]
2001    #[validate(range(max = 8))]
2002    pub nreplica_count: Option<u32>,
2003    #[serde(skip_serializing_if = "Option::is_none")]
2004    #[validate(range(min = 1.0, max = 1.0))]
2005    pub write_rng_factor: Option<f32>,
2006    #[serde(skip_serializing_if = "Option::is_none")]
2007    #[validate(range(min = 5.0, max = 10.0))]
2008    pub write_rng_epsilon: Option<f32>,
2009    #[serde(skip_serializing_if = "Option::is_none")]
2010    #[validate(range(min = 50, max = 200))]
2011    pub split_threshold: Option<u32>,
2012    #[serde(skip_serializing_if = "Option::is_none")]
2013    #[validate(range(max = 1000))]
2014    pub num_samples_kmeans: Option<usize>,
2015    #[serde(skip_serializing_if = "Option::is_none")]
2016    #[validate(range(min = 100.0, max = 100.0))]
2017    pub initial_lambda: Option<f32>,
2018    #[serde(skip_serializing_if = "Option::is_none")]
2019    #[validate(range(max = 64))]
2020    pub reassign_neighbor_count: Option<u32>,
2021    #[serde(skip_serializing_if = "Option::is_none")]
2022    #[validate(range(min = 25, max = 100))]
2023    pub merge_threshold: Option<u32>,
2024    #[serde(skip_serializing_if = "Option::is_none")]
2025    #[validate(range(max = 8))]
2026    pub num_centers_to_merge_to: Option<u32>,
2027    #[serde(skip_serializing_if = "Option::is_none")]
2028    #[validate(range(max = 64))]
2029    pub write_nprobe: Option<u32>,
2030    #[serde(skip_serializing_if = "Option::is_none")]
2031    #[validate(range(max = 200))]
2032    pub ef_construction: Option<usize>,
2033    #[serde(skip_serializing_if = "Option::is_none")]
2034    #[validate(range(max = 200))]
2035    pub ef_search: Option<usize>,
2036    #[serde(skip_serializing_if = "Option::is_none")]
2037    #[validate(range(max = 64))]
2038    pub max_neighbors: Option<usize>,
2039}
2040
2041#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2042#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2043#[serde(deny_unknown_fields)]
2044pub struct SparseVectorIndexConfig {
2045    /// Embedding function configuration
2046    #[serde(skip_serializing_if = "Option::is_none")]
2047    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2048    /// Key to source the sparse vector from
2049    #[serde(skip_serializing_if = "Option::is_none")]
2050    pub source_key: Option<String>,
2051    /// Whether this embedding is BM25
2052    #[serde(skip_serializing_if = "Option::is_none")]
2053    pub bm25: Option<bool>,
2054}
2055
2056#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2057#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2058#[serde(deny_unknown_fields)]
2059pub struct FtsIndexConfig {
2060    // FTS index typically has no additional parameters
2061}
2062
2063#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2064#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2065#[serde(deny_unknown_fields)]
2066pub struct StringInvertedIndexConfig {
2067    // String inverted index typically has no additional parameters
2068}
2069
2070#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2071#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2072#[serde(deny_unknown_fields)]
2073pub struct IntInvertedIndexConfig {
2074    // Integer inverted index typically has no additional parameters
2075}
2076
2077#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2078#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2079#[serde(deny_unknown_fields)]
2080pub struct FloatInvertedIndexConfig {
2081    // Float inverted index typically has no additional parameters
2082}
2083
2084#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2085#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2086#[serde(deny_unknown_fields)]
2087pub struct BoolInvertedIndexConfig {
2088    // Boolean inverted index typically has no additional parameters
2089}
2090
2091// ============================================================================
2092// BUILDER PATTERN SUPPORT
2093// ============================================================================
2094
2095/// Union type for all index configurations (used by builder pattern)
2096#[derive(Clone, Debug)]
2097pub enum IndexConfig {
2098    Vector(VectorIndexConfig),
2099    SparseVector(SparseVectorIndexConfig),
2100    Fts(FtsIndexConfig),
2101    StringInverted(StringInvertedIndexConfig),
2102    IntInverted(IntInvertedIndexConfig),
2103    FloatInverted(FloatInvertedIndexConfig),
2104    BoolInverted(BoolInvertedIndexConfig),
2105}
2106
2107// Convenience From implementations for ergonomic usage
2108impl From<VectorIndexConfig> for IndexConfig {
2109    fn from(config: VectorIndexConfig) -> Self {
2110        IndexConfig::Vector(config)
2111    }
2112}
2113
2114impl From<SparseVectorIndexConfig> for IndexConfig {
2115    fn from(config: SparseVectorIndexConfig) -> Self {
2116        IndexConfig::SparseVector(config)
2117    }
2118}
2119
2120impl From<FtsIndexConfig> for IndexConfig {
2121    fn from(config: FtsIndexConfig) -> Self {
2122        IndexConfig::Fts(config)
2123    }
2124}
2125
2126impl From<StringInvertedIndexConfig> for IndexConfig {
2127    fn from(config: StringInvertedIndexConfig) -> Self {
2128        IndexConfig::StringInverted(config)
2129    }
2130}
2131
2132impl From<IntInvertedIndexConfig> for IndexConfig {
2133    fn from(config: IntInvertedIndexConfig) -> Self {
2134        IndexConfig::IntInverted(config)
2135    }
2136}
2137
2138impl From<FloatInvertedIndexConfig> for IndexConfig {
2139    fn from(config: FloatInvertedIndexConfig) -> Self {
2140        IndexConfig::FloatInverted(config)
2141    }
2142}
2143
2144impl From<BoolInvertedIndexConfig> for IndexConfig {
2145    fn from(config: BoolInvertedIndexConfig) -> Self {
2146        IndexConfig::BoolInverted(config)
2147    }
2148}
2149
2150#[cfg(test)]
2151mod tests {
2152    use super::*;
2153    use crate::hnsw_configuration::Space;
2154    use crate::metadata::SparseVector;
2155    use crate::{
2156        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2157    };
2158    use serde_json::json;
2159
2160    #[test]
2161    fn test_reconcile_with_defaults_none_user_schema() {
2162        // Test that when no user schema is provided, we get the default schema
2163        let result = Schema::reconcile_with_defaults(None).unwrap();
2164        let expected = Schema::new_default(KnnIndex::Spann);
2165        assert_eq!(result, expected);
2166    }
2167
2168    #[test]
2169    fn test_reconcile_with_defaults_empty_user_schema() {
2170        // Test merging with an empty user schema
2171        let user_schema = Schema {
2172            defaults: ValueTypes::default(),
2173            keys: HashMap::new(),
2174        };
2175
2176        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2177        let expected = Schema::new_default(KnnIndex::Spann);
2178        assert_eq!(result, expected);
2179    }
2180
2181    #[test]
2182    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2183        // Test that user can override string inverted index enabled state
2184        let mut user_schema = Schema {
2185            defaults: ValueTypes::default(),
2186            keys: HashMap::new(),
2187        };
2188
2189        user_schema.defaults.string = Some(StringValueType {
2190            string_inverted_index: Some(StringInvertedIndexType {
2191                enabled: false, // Override default (true) to false
2192                config: StringInvertedIndexConfig {},
2193            }),
2194            fts_index: None,
2195        });
2196
2197        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2198
2199        // Check that the user override took precedence
2200        assert!(
2201            !result
2202                .defaults
2203                .string
2204                .as_ref()
2205                .unwrap()
2206                .string_inverted_index
2207                .as_ref()
2208                .unwrap()
2209                .enabled
2210        );
2211        // Check that other defaults are still present
2212        assert!(result.defaults.float.is_some());
2213        assert!(result.defaults.int.is_some());
2214    }
2215
2216    #[test]
2217    fn test_reconcile_with_defaults_user_overrides_vector_config() {
2218        // Test field-level merging for vector configurations
2219        let mut user_schema = Schema {
2220            defaults: ValueTypes::default(),
2221            keys: HashMap::new(),
2222        };
2223
2224        user_schema.defaults.float_list = Some(FloatListValueType {
2225            vector_index: Some(VectorIndexType {
2226                enabled: true, // Enable vector index (default is false)
2227                config: VectorIndexConfig {
2228                    space: Some(Space::L2),                     // Override default space
2229                    embedding_function: None,                   // Will use default
2230                    source_key: Some("custom_key".to_string()), // Override default
2231                    hnsw: Some(HnswIndexConfig {
2232                        ef_construction: Some(500), // Override default
2233                        max_neighbors: None,        // Will use default
2234                        ef_search: None,            // Will use default
2235                        num_threads: None,
2236                        batch_size: None,
2237                        sync_threshold: None,
2238                        resize_factor: None,
2239                    }),
2240                    spann: None,
2241                },
2242            }),
2243        });
2244
2245        // Use HNSW defaults for this test so we have HNSW config to merge with
2246        let result = {
2247            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2248            let merged_defaults =
2249                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2250            let mut merged_keys = default_schema.keys.clone();
2251            for (key, user_value_types) in user_schema.keys {
2252                if let Some(default_value_types) = merged_keys.get(&key) {
2253                    let merged_value_types =
2254                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2255                    merged_keys.insert(key, merged_value_types);
2256                } else {
2257                    merged_keys.insert(key, user_value_types);
2258                }
2259            }
2260            Schema {
2261                defaults: merged_defaults,
2262                keys: merged_keys,
2263            }
2264        };
2265
2266        let vector_config = &result
2267            .defaults
2268            .float_list
2269            .as_ref()
2270            .unwrap()
2271            .vector_index
2272            .as_ref()
2273            .unwrap()
2274            .config;
2275
2276        // Check user overrides took precedence
2277        assert_eq!(vector_config.space, Some(Space::L2));
2278        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2279        assert_eq!(
2280            vector_config.hnsw.as_ref().unwrap().ef_construction,
2281            Some(500)
2282        );
2283
2284        // Check defaults were preserved for unspecified fields
2285        assert_eq!(
2286            vector_config.embedding_function,
2287            Some(EmbeddingFunctionConfiguration::Legacy)
2288        );
2289        // Since user provided HNSW config, the default max_neighbors should be merged in
2290        assert_eq!(
2291            vector_config.hnsw.as_ref().unwrap().max_neighbors,
2292            Some(default_m())
2293        );
2294    }
2295
2296    #[test]
2297    fn test_reconcile_with_defaults_keys() {
2298        // Test that key overrides are properly merged
2299        let mut user_schema = Schema {
2300            defaults: ValueTypes::default(),
2301            keys: HashMap::new(),
2302        };
2303
2304        // Add a custom key override
2305        let custom_key_types = ValueTypes {
2306            string: Some(StringValueType {
2307                fts_index: Some(FtsIndexType {
2308                    enabled: true,
2309                    config: FtsIndexConfig {},
2310                }),
2311                string_inverted_index: Some(StringInvertedIndexType {
2312                    enabled: false,
2313                    config: StringInvertedIndexConfig {},
2314                }),
2315            }),
2316            ..Default::default()
2317        };
2318        user_schema
2319            .keys
2320            .insert("custom_key".to_string(), custom_key_types);
2321
2322        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2323
2324        // Check that default key overrides are preserved
2325        assert!(result.keys.contains_key(EMBEDDING_KEY));
2326        assert!(result.keys.contains_key(DOCUMENT_KEY));
2327
2328        // Check that user key override was added
2329        assert!(result.keys.contains_key("custom_key"));
2330        let custom_override = result.keys.get("custom_key").unwrap();
2331        assert!(
2332            custom_override
2333                .string
2334                .as_ref()
2335                .unwrap()
2336                .fts_index
2337                .as_ref()
2338                .unwrap()
2339                .enabled
2340        );
2341    }
2342
2343    #[test]
2344    fn test_reconcile_with_defaults_override_existing_key() {
2345        // Test overriding an existing key override (like #embedding)
2346        let mut user_schema = Schema {
2347            defaults: ValueTypes::default(),
2348            keys: HashMap::new(),
2349        };
2350
2351        // Override the #embedding key with custom settings
2352        let embedding_override = ValueTypes {
2353            float_list: Some(FloatListValueType {
2354                vector_index: Some(VectorIndexType {
2355                    enabled: false, // Override default enabled=true to false
2356                    config: VectorIndexConfig {
2357                        space: Some(Space::Ip), // Override default space
2358                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2359                        source_key: Some("custom_embedding_key".to_string()),
2360                        hnsw: None,
2361                        spann: None,
2362                    },
2363                }),
2364            }),
2365            ..Default::default()
2366        };
2367        user_schema
2368            .keys
2369            .insert(EMBEDDING_KEY.to_string(), embedding_override);
2370
2371        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2372
2373        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
2374        let vector_config = &embedding_config
2375            .float_list
2376            .as_ref()
2377            .unwrap()
2378            .vector_index
2379            .as_ref()
2380            .unwrap();
2381
2382        // Check user overrides took precedence
2383        assert!(!vector_config.enabled);
2384        assert_eq!(vector_config.config.space, Some(Space::Ip));
2385        assert_eq!(
2386            vector_config.config.source_key,
2387            Some("custom_embedding_key".to_string())
2388        );
2389    }
2390
2391    #[test]
2392    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
2393        let collection_config = InternalCollectionConfiguration {
2394            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2395                space: Space::Cosine,
2396                ef_construction: 128,
2397                ef_search: 96,
2398                max_neighbors: 42,
2399                num_threads: 8,
2400                resize_factor: 1.5,
2401                sync_threshold: 2_000,
2402                batch_size: 256,
2403            }),
2404            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2405                EmbeddingFunctionNewConfiguration {
2406                    name: "custom".to_string(),
2407                    config: json!({"alpha": 1}),
2408                },
2409            )),
2410        };
2411
2412        let schema =
2413            Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
2414        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2415
2416        assert_eq!(reconstructed, collection_config);
2417    }
2418
2419    #[test]
2420    fn test_convert_schema_to_collection_config_spann_roundtrip() {
2421        let spann_config = InternalSpannConfiguration {
2422            space: Space::Cosine,
2423            search_nprobe: 11,
2424            search_rng_factor: 1.7,
2425            write_nprobe: 5,
2426            nreplica_count: 3,
2427            split_threshold: 150,
2428            merge_threshold: 80,
2429            ef_construction: 120,
2430            ef_search: 90,
2431            max_neighbors: 40,
2432            ..Default::default()
2433        };
2434
2435        let collection_config = InternalCollectionConfiguration {
2436            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
2437            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2438                EmbeddingFunctionNewConfiguration {
2439                    name: "custom".to_string(),
2440                    config: json!({"beta": true}),
2441                },
2442            )),
2443        };
2444
2445        let schema =
2446            Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
2447        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2448
2449        assert_eq!(reconstructed, collection_config);
2450    }
2451
2452    #[test]
2453    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
2454        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2455        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
2456            if let Some(float_list) = &mut embedding.float_list {
2457                if let Some(vector_index) = &mut float_list.vector_index {
2458                    vector_index.config.spann = Some(SpannIndexConfig {
2459                        search_nprobe: Some(1),
2460                        search_rng_factor: Some(1.0),
2461                        search_rng_epsilon: Some(0.1),
2462                        nreplica_count: Some(1),
2463                        write_rng_factor: Some(1.0),
2464                        write_rng_epsilon: Some(0.1),
2465                        split_threshold: Some(100),
2466                        num_samples_kmeans: Some(10),
2467                        initial_lambda: Some(0.5),
2468                        reassign_neighbor_count: Some(10),
2469                        merge_threshold: Some(50),
2470                        num_centers_to_merge_to: Some(3),
2471                        write_nprobe: Some(1),
2472                        ef_construction: Some(50),
2473                        ef_search: Some(40),
2474                        max_neighbors: Some(20),
2475                    });
2476                }
2477            }
2478        }
2479
2480        let result = InternalCollectionConfiguration::try_from(&schema);
2481        assert!(result.is_err());
2482    }
2483
2484    #[test]
2485    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
2486        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2487        let before = schema.clone();
2488        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
2489        assert!(!modified);
2490        assert_eq!(schema, before);
2491    }
2492
2493    #[test]
2494    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
2495        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2496        assert!(!schema.keys.contains_key("custom_field"));
2497
2498        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2499
2500        assert!(modified);
2501        let entry = schema
2502            .keys
2503            .get("custom_field")
2504            .expect("expected new key override to be inserted");
2505        assert_eq!(entry.boolean, schema.defaults.boolean);
2506        assert!(entry.string.is_none());
2507        assert!(entry.int.is_none());
2508        assert!(entry.float.is_none());
2509        assert!(entry.float_list.is_none());
2510        assert!(entry.sparse_vector.is_none());
2511    }
2512
2513    #[test]
2514    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
2515        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2516        let initial_len = schema.keys.len();
2517        schema.keys.insert(
2518            "custom_field".to_string(),
2519            ValueTypes {
2520                string: schema.defaults.string.clone(),
2521                ..Default::default()
2522            },
2523        );
2524
2525        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2526
2527        assert!(modified);
2528        assert_eq!(schema.keys.len(), initial_len + 1);
2529        let entry = schema
2530            .keys
2531            .get("custom_field")
2532            .expect("expected key override to exist after ensure call");
2533        assert!(entry.string.is_some());
2534        assert_eq!(entry.boolean, schema.defaults.boolean);
2535    }
2536
2537    #[test]
2538    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
2539        let schema = Schema::new_default(KnnIndex::Spann);
2540        let result = schema.is_knn_key_indexing_enabled(
2541            "custom_sparse",
2542            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2543        );
2544
2545        let err = result.expect_err("expected indexing disabled error");
2546        match err {
2547            FilterValidationError::IndexingDisabled { key, value_type } => {
2548                assert_eq!(key, "custom_sparse");
2549                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
2550            }
2551            other => panic!("unexpected error variant: {other:?}"),
2552        }
2553    }
2554
2555    #[test]
2556    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
2557        let mut schema = Schema::new_default(KnnIndex::Spann);
2558        schema.keys.insert(
2559            "sparse_enabled".to_string(),
2560            ValueTypes {
2561                sparse_vector: Some(SparseVectorValueType {
2562                    sparse_vector_index: Some(SparseVectorIndexType {
2563                        enabled: true,
2564                        config: SparseVectorIndexConfig {
2565                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2566                            source_key: None,
2567                            bm25: None,
2568                        },
2569                    }),
2570                }),
2571                ..Default::default()
2572            },
2573        );
2574
2575        let result = schema.is_knn_key_indexing_enabled(
2576            "sparse_enabled",
2577            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2578        );
2579
2580        assert!(result.is_ok());
2581    }
2582
2583    #[test]
2584    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2585        let schema = Schema::new_default(KnnIndex::Spann);
2586        let result = schema.is_knn_key_indexing_enabled(
2587            EMBEDDING_KEY,
2588            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2589        );
2590
2591        assert!(result.is_ok());
2592    }
2593
2594    #[test]
2595    fn test_merge_hnsw_configs_field_level() {
2596        // Test field-level merging for HNSW configurations
2597        let default_hnsw = HnswIndexConfig {
2598            ef_construction: Some(200),
2599            max_neighbors: Some(16),
2600            ef_search: Some(10),
2601            num_threads: Some(4),
2602            batch_size: Some(100),
2603            sync_threshold: Some(1000),
2604            resize_factor: Some(1.2),
2605        };
2606
2607        let user_hnsw = HnswIndexConfig {
2608            ef_construction: Some(300), // Override
2609            max_neighbors: None,        // Will use default
2610            ef_search: Some(20),        // Override
2611            num_threads: None,          // Will use default
2612            batch_size: None,           // Will use default
2613            sync_threshold: Some(2000), // Override
2614            resize_factor: None,        // Will use default
2615        };
2616
2617        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2618
2619        // Check user overrides
2620        assert_eq!(result.ef_construction, Some(300));
2621        assert_eq!(result.ef_search, Some(20));
2622        assert_eq!(result.sync_threshold, Some(2000));
2623
2624        // Check defaults preserved
2625        assert_eq!(result.max_neighbors, Some(16));
2626        assert_eq!(result.num_threads, Some(4));
2627        assert_eq!(result.batch_size, Some(100));
2628        assert_eq!(result.resize_factor, Some(1.2));
2629    }
2630
2631    #[test]
2632    fn test_merge_spann_configs_field_level() {
2633        // Test field-level merging for SPANN configurations
2634        let default_spann = SpannIndexConfig {
2635            search_nprobe: Some(10),
2636            search_rng_factor: Some(1.0),  // Must be exactly 1.0
2637            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
2638            nreplica_count: Some(3),
2639            write_rng_factor: Some(1.0),  // Must be exactly 1.0
2640            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
2641            split_threshold: Some(100),   // Must be 50-200
2642            num_samples_kmeans: Some(100),
2643            initial_lambda: Some(100.0), // Must be exactly 100.0
2644            reassign_neighbor_count: Some(50),
2645            merge_threshold: Some(50),        // Must be 25-100
2646            num_centers_to_merge_to: Some(4), // Max is 8
2647            write_nprobe: Some(5),
2648            ef_construction: Some(100),
2649            ef_search: Some(10),
2650            max_neighbors: Some(16),
2651        };
2652
2653        let user_spann = SpannIndexConfig {
2654            search_nprobe: Some(20),       // Override
2655            search_rng_factor: None,       // Will use default
2656            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
2657            nreplica_count: None,          // Will use default
2658            write_rng_factor: None,
2659            write_rng_epsilon: None,
2660            split_threshold: Some(150), // Override (valid: 50-200)
2661            num_samples_kmeans: None,
2662            initial_lambda: None,
2663            reassign_neighbor_count: None,
2664            merge_threshold: None,
2665            num_centers_to_merge_to: None,
2666            write_nprobe: None,
2667            ef_construction: None,
2668            ef_search: None,
2669            max_neighbors: None,
2670        };
2671
2672        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
2673
2674        // Check user overrides
2675        assert_eq!(result.search_nprobe, Some(20));
2676        assert_eq!(result.search_rng_epsilon, Some(8.0));
2677        assert_eq!(result.split_threshold, Some(150));
2678
2679        // Check defaults preserved
2680        assert_eq!(result.search_rng_factor, Some(1.0));
2681        assert_eq!(result.nreplica_count, Some(3));
2682        assert_eq!(result.initial_lambda, Some(100.0));
2683    }
2684
2685    #[test]
2686    fn test_spann_index_config_into_internal_configuration() {
2687        let config = SpannIndexConfig {
2688            search_nprobe: Some(33),
2689            search_rng_factor: Some(1.2),
2690            search_rng_epsilon: None,
2691            nreplica_count: None,
2692            write_rng_factor: Some(1.5),
2693            write_rng_epsilon: None,
2694            split_threshold: Some(75),
2695            num_samples_kmeans: None,
2696            initial_lambda: Some(0.9),
2697            reassign_neighbor_count: Some(40),
2698            merge_threshold: None,
2699            num_centers_to_merge_to: Some(4),
2700            write_nprobe: Some(60),
2701            ef_construction: Some(180),
2702            ef_search: Some(170),
2703            max_neighbors: Some(32),
2704        };
2705
2706        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
2707        assert_eq!(with_space.space, Space::Cosine);
2708        assert_eq!(with_space.search_nprobe, 33);
2709        assert_eq!(with_space.search_rng_factor, 1.2);
2710        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2711        assert_eq!(with_space.write_rng_factor, 1.5);
2712        assert_eq!(with_space.write_nprobe, 60);
2713        assert_eq!(with_space.ef_construction, 180);
2714        assert_eq!(with_space.ef_search, 170);
2715        assert_eq!(with_space.max_neighbors, 32);
2716        assert_eq!(with_space.merge_threshold, default_merge_threshold());
2717
2718        let default_space_config: InternalSpannConfiguration = (None, &config).into();
2719        assert_eq!(default_space_config.space, default_space());
2720    }
2721
2722    #[test]
2723    fn test_merge_string_type_combinations() {
2724        // Test all combinations of default and user StringValueType
2725
2726        // Both Some - should merge
2727        let default = StringValueType {
2728            string_inverted_index: Some(StringInvertedIndexType {
2729                enabled: true,
2730                config: StringInvertedIndexConfig {},
2731            }),
2732            fts_index: Some(FtsIndexType {
2733                enabled: false,
2734                config: FtsIndexConfig {},
2735            }),
2736        };
2737
2738        let user = StringValueType {
2739            string_inverted_index: Some(StringInvertedIndexType {
2740                enabled: false, // Override
2741                config: StringInvertedIndexConfig {},
2742            }),
2743            fts_index: None, // Will use default
2744        };
2745
2746        let result = Schema::merge_string_type(Some(&default), Some(&user))
2747            .unwrap()
2748            .unwrap();
2749        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
2750        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
2751
2752        // Default Some, User None - should return default
2753        let result = Schema::merge_string_type(Some(&default), None)
2754            .unwrap()
2755            .unwrap();
2756        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2757
2758        // Default None, User Some - should return user
2759        let result = Schema::merge_string_type(None, Some(&user))
2760            .unwrap()
2761            .unwrap();
2762        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2763
2764        // Both None - should return None
2765        let result = Schema::merge_string_type(None, None).unwrap();
2766        assert!(result.is_none());
2767    }
2768
2769    #[test]
2770    fn test_merge_vector_index_config_comprehensive() {
2771        // Test comprehensive vector index config merging
2772        let default_config = VectorIndexConfig {
2773            space: Some(Space::Cosine),
2774            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2775            source_key: Some("default_key".to_string()),
2776            hnsw: Some(HnswIndexConfig {
2777                ef_construction: Some(200),
2778                max_neighbors: Some(16),
2779                ef_search: Some(10),
2780                num_threads: Some(4),
2781                batch_size: Some(100),
2782                sync_threshold: Some(1000),
2783                resize_factor: Some(1.2),
2784            }),
2785            spann: None,
2786        };
2787
2788        let user_config = VectorIndexConfig {
2789            space: Some(Space::L2),                   // Override
2790            embedding_function: None,                 // Will use default
2791            source_key: Some("user_key".to_string()), // Override
2792            hnsw: Some(HnswIndexConfig {
2793                ef_construction: Some(300), // Override
2794                max_neighbors: None,        // Will use default
2795                ef_search: None,            // Will use default
2796                num_threads: None,
2797                batch_size: None,
2798                sync_threshold: None,
2799                resize_factor: None,
2800            }),
2801            spann: Some(SpannIndexConfig {
2802                search_nprobe: Some(15),
2803                search_rng_factor: None,
2804                search_rng_epsilon: None,
2805                nreplica_count: None,
2806                write_rng_factor: None,
2807                write_rng_epsilon: None,
2808                split_threshold: None,
2809                num_samples_kmeans: None,
2810                initial_lambda: None,
2811                reassign_neighbor_count: None,
2812                merge_threshold: None,
2813                num_centers_to_merge_to: None,
2814                write_nprobe: None,
2815                ef_construction: None,
2816                ef_search: None,
2817                max_neighbors: None,
2818            }), // Add SPANN config
2819        };
2820
2821        let result = Schema::merge_vector_index_config(&default_config, &user_config);
2822
2823        // Check field-level merging
2824        assert_eq!(result.space, Some(Space::L2)); // User override
2825        assert_eq!(
2826            result.embedding_function,
2827            Some(EmbeddingFunctionConfiguration::Legacy)
2828        ); // Default preserved
2829        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
2830
2831        // Check HNSW merging
2832        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
2833        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
2834
2835        // Check SPANN was added from user
2836        assert!(result.spann.is_some());
2837        assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2838    }
2839
2840    #[test]
2841    fn test_merge_sparse_vector_index_config() {
2842        // Test sparse vector index config merging
2843        let default_config = SparseVectorIndexConfig {
2844            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2845            source_key: Some("default_sparse_key".to_string()),
2846            bm25: None,
2847        };
2848
2849        let user_config = SparseVectorIndexConfig {
2850            embedding_function: None,                        // Will use default
2851            source_key: Some("user_sparse_key".to_string()), // Override
2852            bm25: None,
2853        };
2854
2855        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
2856
2857        // Check user override
2858        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2859        // Check default preserved
2860        assert_eq!(
2861            result.embedding_function,
2862            Some(EmbeddingFunctionConfiguration::Legacy)
2863        );
2864    }
2865
2866    #[test]
2867    fn test_complex_nested_merging_scenario() {
2868        // Test a complex scenario with multiple levels of merging
2869        let mut user_schema = Schema {
2870            defaults: ValueTypes::default(),
2871            keys: HashMap::new(),
2872        };
2873
2874        // Set up complex user defaults
2875        user_schema.defaults.string = Some(StringValueType {
2876            string_inverted_index: Some(StringInvertedIndexType {
2877                enabled: false,
2878                config: StringInvertedIndexConfig {},
2879            }),
2880            fts_index: Some(FtsIndexType {
2881                enabled: true,
2882                config: FtsIndexConfig {},
2883            }),
2884        });
2885
2886        user_schema.defaults.float_list = Some(FloatListValueType {
2887            vector_index: Some(VectorIndexType {
2888                enabled: true,
2889                config: VectorIndexConfig {
2890                    space: Some(Space::Ip),
2891                    embedding_function: None, // Will use default
2892                    source_key: Some("custom_vector_key".to_string()),
2893                    hnsw: Some(HnswIndexConfig {
2894                        ef_construction: Some(400),
2895                        max_neighbors: Some(32),
2896                        ef_search: None, // Will use default
2897                        num_threads: None,
2898                        batch_size: None,
2899                        sync_threshold: None,
2900                        resize_factor: None,
2901                    }),
2902                    spann: None,
2903                },
2904            }),
2905        });
2906
2907        // Set up key overrides
2908        let custom_key_override = ValueTypes {
2909            string: Some(StringValueType {
2910                fts_index: Some(FtsIndexType {
2911                    enabled: true,
2912                    config: FtsIndexConfig {},
2913                }),
2914                string_inverted_index: None,
2915            }),
2916            ..Default::default()
2917        };
2918        user_schema
2919            .keys
2920            .insert("custom_field".to_string(), custom_key_override);
2921
2922        // Use HNSW defaults for this test so we have HNSW config to merge with
2923        let result = {
2924            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2925            let merged_defaults =
2926                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2927            let mut merged_keys = default_schema.keys.clone();
2928            for (key, user_value_types) in user_schema.keys {
2929                if let Some(default_value_types) = merged_keys.get(&key) {
2930                    let merged_value_types =
2931                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2932                    merged_keys.insert(key, merged_value_types);
2933                } else {
2934                    merged_keys.insert(key, user_value_types);
2935                }
2936            }
2937            Schema {
2938                defaults: merged_defaults,
2939                keys: merged_keys,
2940            }
2941        };
2942
2943        // Verify complex merging worked correctly
2944
2945        // Check defaults merging
2946        assert!(
2947            !result
2948                .defaults
2949                .string
2950                .as_ref()
2951                .unwrap()
2952                .string_inverted_index
2953                .as_ref()
2954                .unwrap()
2955                .enabled
2956        );
2957        assert!(
2958            result
2959                .defaults
2960                .string
2961                .as_ref()
2962                .unwrap()
2963                .fts_index
2964                .as_ref()
2965                .unwrap()
2966                .enabled
2967        );
2968
2969        let vector_config = &result
2970            .defaults
2971            .float_list
2972            .as_ref()
2973            .unwrap()
2974            .vector_index
2975            .as_ref()
2976            .unwrap()
2977            .config;
2978        assert_eq!(vector_config.space, Some(Space::Ip));
2979        assert_eq!(
2980            vector_config.embedding_function,
2981            Some(EmbeddingFunctionConfiguration::Legacy)
2982        ); // Default preserved
2983        assert_eq!(
2984            vector_config.source_key,
2985            Some("custom_vector_key".to_string())
2986        );
2987        assert_eq!(
2988            vector_config.hnsw.as_ref().unwrap().ef_construction,
2989            Some(400)
2990        );
2991        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2992        assert_eq!(
2993            vector_config.hnsw.as_ref().unwrap().ef_search,
2994            Some(default_search_ef())
2995        ); // Default preserved
2996
2997        // Check key overrides
2998        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
2999        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
3000        assert!(result.keys.contains_key("custom_field")); // User added
3001
3002        let custom_override = result.keys.get("custom_field").unwrap();
3003        assert!(
3004            custom_override
3005                .string
3006                .as_ref()
3007                .unwrap()
3008                .fts_index
3009                .as_ref()
3010                .unwrap()
3011                .enabled
3012        );
3013        assert!(custom_override
3014            .string
3015            .as_ref()
3016            .unwrap()
3017            .string_inverted_index
3018            .is_none());
3019    }
3020
3021    #[test]
3022    fn test_reconcile_with_collection_config_default_config() {
3023        // Test that when collection config is default, schema is returned as-is
3024        let schema = Schema::new_default(KnnIndex::Hnsw);
3025        let collection_config = InternalCollectionConfiguration::default_hnsw();
3026
3027        let result =
3028            Schema::reconcile_with_collection_config(schema.clone(), collection_config).unwrap();
3029        assert_eq!(result, schema);
3030    }
3031
3032    #[test]
3033    fn test_reconcile_with_collection_config_both_non_default() {
3034        // Test that when both schema and collection config are non-default, it returns an error
3035        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3036        schema.defaults.string = Some(StringValueType {
3037            fts_index: Some(FtsIndexType {
3038                enabled: true,
3039                config: FtsIndexConfig {},
3040            }),
3041            string_inverted_index: None,
3042        });
3043
3044        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
3045        // Make collection config non-default by changing a parameter
3046        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
3047        {
3048            hnsw_config.ef_construction = 500; // Non-default value
3049        }
3050
3051        let result = Schema::reconcile_with_collection_config(schema, collection_config);
3052        assert!(result.is_err());
3053        assert!(matches!(
3054            result.unwrap_err(),
3055            SchemaError::ConfigAndSchemaConflict
3056        ));
3057    }
3058
3059    #[test]
3060    fn test_reconcile_with_collection_config_hnsw_override() {
3061        // Test that non-default HNSW collection config overrides default schema
3062        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
3063
3064        let collection_config = InternalCollectionConfiguration {
3065            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3066                ef_construction: 300,
3067                max_neighbors: 32,
3068                ef_search: 50,
3069                num_threads: 8,
3070                batch_size: 200,
3071                sync_threshold: 2000,
3072                resize_factor: 1.5,
3073                space: Space::L2,
3074            }),
3075            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3076        };
3077
3078        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
3079
3080        // Check that #embedding key override was created with the collection config settings
3081        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3082        let vector_index = embedding_override
3083            .float_list
3084            .as_ref()
3085            .unwrap()
3086            .vector_index
3087            .as_ref()
3088            .unwrap();
3089
3090        assert!(vector_index.enabled);
3091        assert_eq!(vector_index.config.space, Some(Space::L2));
3092        assert_eq!(
3093            vector_index.config.embedding_function,
3094            Some(EmbeddingFunctionConfiguration::Legacy)
3095        );
3096        assert_eq!(
3097            vector_index.config.source_key,
3098            Some(DOCUMENT_KEY.to_string())
3099        );
3100
3101        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
3102        assert_eq!(hnsw_config.ef_construction, Some(300));
3103        assert_eq!(hnsw_config.max_neighbors, Some(32));
3104        assert_eq!(hnsw_config.ef_search, Some(50));
3105        assert_eq!(hnsw_config.num_threads, Some(8));
3106        assert_eq!(hnsw_config.batch_size, Some(200));
3107        assert_eq!(hnsw_config.sync_threshold, Some(2000));
3108        assert_eq!(hnsw_config.resize_factor, Some(1.5));
3109
3110        assert!(vector_index.config.spann.is_none());
3111    }
3112
3113    #[test]
3114    fn test_reconcile_with_collection_config_spann_override() {
3115        // Test that non-default SPANN collection config overrides default schema
3116        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
3117
3118        let collection_config = InternalCollectionConfiguration {
3119            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
3120                search_nprobe: 20,
3121                search_rng_factor: 3.0,
3122                search_rng_epsilon: 0.2,
3123                nreplica_count: 5,
3124                write_rng_factor: 2.0,
3125                write_rng_epsilon: 0.1,
3126                split_threshold: 2000,
3127                num_samples_kmeans: 200,
3128                initial_lambda: 0.8,
3129                reassign_neighbor_count: 100,
3130                merge_threshold: 800,
3131                num_centers_to_merge_to: 20,
3132                write_nprobe: 10,
3133                ef_construction: 400,
3134                ef_search: 60,
3135                max_neighbors: 24,
3136                space: Space::Cosine,
3137            }),
3138            embedding_function: None,
3139        };
3140
3141        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
3142
3143        // Check that #embedding key override was created with the collection config settings
3144        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3145        let vector_index = embedding_override
3146            .float_list
3147            .as_ref()
3148            .unwrap()
3149            .vector_index
3150            .as_ref()
3151            .unwrap();
3152
3153        assert!(vector_index.enabled);
3154        assert_eq!(vector_index.config.space, Some(Space::Cosine));
3155        assert_eq!(vector_index.config.embedding_function, None);
3156        assert_eq!(
3157            vector_index.config.source_key,
3158            Some(DOCUMENT_KEY.to_string())
3159        );
3160
3161        assert!(vector_index.config.hnsw.is_none());
3162
3163        let spann_config = vector_index.config.spann.as_ref().unwrap();
3164        assert_eq!(spann_config.search_nprobe, Some(20));
3165        assert_eq!(spann_config.search_rng_factor, Some(3.0));
3166        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
3167        assert_eq!(spann_config.nreplica_count, Some(5));
3168        assert_eq!(spann_config.write_rng_factor, Some(2.0));
3169        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
3170        assert_eq!(spann_config.split_threshold, Some(2000));
3171        assert_eq!(spann_config.num_samples_kmeans, Some(200));
3172        assert_eq!(spann_config.initial_lambda, Some(0.8));
3173        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
3174        assert_eq!(spann_config.merge_threshold, Some(800));
3175        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
3176        assert_eq!(spann_config.write_nprobe, Some(10));
3177        assert_eq!(spann_config.ef_construction, Some(400));
3178        assert_eq!(spann_config.ef_search, Some(60));
3179        assert_eq!(spann_config.max_neighbors, Some(24));
3180    }
3181
3182    #[test]
3183    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
3184        // Test that collection config updates BOTH defaults.float_list.vector_index
3185        // AND keys["embedding"].float_list.vector_index
3186        let schema = Schema::new_default(KnnIndex::Hnsw);
3187
3188        let collection_config = InternalCollectionConfiguration {
3189            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3190                ef_construction: 300,
3191                max_neighbors: 32,
3192                ef_search: 50,
3193                num_threads: 8,
3194                batch_size: 200,
3195                sync_threshold: 2000,
3196                resize_factor: 1.5,
3197                space: Space::L2,
3198            }),
3199            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3200        };
3201
3202        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
3203
3204        // Check that defaults.float_list.vector_index was updated
3205        let defaults_vector_index = result
3206            .defaults
3207            .float_list
3208            .as_ref()
3209            .unwrap()
3210            .vector_index
3211            .as_ref()
3212            .unwrap();
3213
3214        // Should be disabled in defaults (template for new keys)
3215        assert!(!defaults_vector_index.enabled);
3216        // But config should be updated
3217        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
3218        assert_eq!(
3219            defaults_vector_index.config.embedding_function,
3220            Some(EmbeddingFunctionConfiguration::Legacy)
3221        );
3222        assert_eq!(
3223            defaults_vector_index.config.source_key,
3224            Some(DOCUMENT_KEY.to_string())
3225        );
3226        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
3227        assert_eq!(defaults_hnsw.ef_construction, Some(300));
3228        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
3229
3230        // Check that #embedding key override was also updated
3231        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3232        let embedding_vector_index = embedding_override
3233            .float_list
3234            .as_ref()
3235            .unwrap()
3236            .vector_index
3237            .as_ref()
3238            .unwrap();
3239
3240        // Should be enabled on #embedding
3241        assert!(embedding_vector_index.enabled);
3242        // Config should match defaults
3243        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
3244        assert_eq!(
3245            embedding_vector_index.config.embedding_function,
3246            Some(EmbeddingFunctionConfiguration::Legacy)
3247        );
3248        assert_eq!(
3249            embedding_vector_index.config.source_key,
3250            Some(DOCUMENT_KEY.to_string())
3251        );
3252        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
3253        assert_eq!(embedding_hnsw.ef_construction, Some(300));
3254        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
3255    }
3256
3257    #[test]
3258    fn test_is_schema_default() {
3259        // Test that actual default schemas are correctly identified
3260        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
3261        assert!(Schema::is_schema_default(&default_hnsw_schema));
3262
3263        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
3264        assert!(Schema::is_schema_default(&default_spann_schema));
3265
3266        // Test that an empty schema is NOT considered default (since it doesn't match new_default structure)
3267        let empty_schema = Schema {
3268            defaults: ValueTypes::default(),
3269            keys: HashMap::new(),
3270        };
3271        assert!(!Schema::is_schema_default(&empty_schema));
3272
3273        // Test that a modified default schema is not considered default
3274        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
3275        // Make a clear modification - change the string inverted index enabled state
3276        if let Some(ref mut string_type) = modified_schema.defaults.string {
3277            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
3278                string_inverted.enabled = false; // Default is true, so this should make it non-default
3279            }
3280        }
3281        assert!(!Schema::is_schema_default(&modified_schema));
3282
3283        // Test that schema with additional key overrides is not default
3284        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
3285        schema_with_extra_overrides
3286            .keys
3287            .insert("custom_key".to_string(), ValueTypes::default());
3288        assert!(!Schema::is_schema_default(&schema_with_extra_overrides));
3289    }
3290
3291    #[test]
3292    fn test_add_merges_keys_by_value_type() {
3293        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3294        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3295
3296        let string_override = ValueTypes {
3297            string: Some(StringValueType {
3298                string_inverted_index: Some(StringInvertedIndexType {
3299                    enabled: true,
3300                    config: StringInvertedIndexConfig {},
3301                }),
3302                fts_index: None,
3303            }),
3304            ..Default::default()
3305        };
3306        schema_a
3307            .keys
3308            .insert("custom_field".to_string(), string_override);
3309
3310        let float_override = ValueTypes {
3311            float: Some(FloatValueType {
3312                float_inverted_index: Some(FloatInvertedIndexType {
3313                    enabled: true,
3314                    config: FloatInvertedIndexConfig {},
3315                }),
3316            }),
3317            ..Default::default()
3318        };
3319        schema_b
3320            .keys
3321            .insert("custom_field".to_string(), float_override);
3322
3323        let merged = schema_a.merge(&schema_b).unwrap();
3324        let merged_override = merged.keys.get("custom_field").unwrap();
3325
3326        assert!(merged_override.string.is_some());
3327        assert!(merged_override.float.is_some());
3328        assert!(
3329            merged_override
3330                .string
3331                .as_ref()
3332                .unwrap()
3333                .string_inverted_index
3334                .as_ref()
3335                .unwrap()
3336                .enabled
3337        );
3338        assert!(
3339            merged_override
3340                .float
3341                .as_ref()
3342                .unwrap()
3343                .float_inverted_index
3344                .as_ref()
3345                .unwrap()
3346                .enabled
3347        );
3348    }
3349
3350    #[test]
3351    fn test_add_rejects_different_defaults() {
3352        let schema_a = Schema::new_default(KnnIndex::Hnsw);
3353        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3354
3355        if let Some(string_type) = schema_b.defaults.string.as_mut() {
3356            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
3357                string_index.enabled = false;
3358            }
3359        }
3360
3361        let err = schema_a.merge(&schema_b).unwrap_err();
3362        assert!(matches!(err, SchemaError::DefaultsMismatch));
3363    }
3364
3365    #[test]
3366    fn test_add_detects_conflicting_value_type_configuration() {
3367        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3368        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3369
3370        let string_override_enabled = ValueTypes {
3371            string: Some(StringValueType {
3372                string_inverted_index: Some(StringInvertedIndexType {
3373                    enabled: true,
3374                    config: StringInvertedIndexConfig {},
3375                }),
3376                fts_index: None,
3377            }),
3378            ..Default::default()
3379        };
3380        schema_a
3381            .keys
3382            .insert("custom_field".to_string(), string_override_enabled);
3383
3384        let string_override_disabled = ValueTypes {
3385            string: Some(StringValueType {
3386                string_inverted_index: Some(StringInvertedIndexType {
3387                    enabled: false,
3388                    config: StringInvertedIndexConfig {},
3389                }),
3390                fts_index: None,
3391            }),
3392            ..Default::default()
3393        };
3394        schema_b
3395            .keys
3396            .insert("custom_field".to_string(), string_override_disabled);
3397
3398        let err = schema_a.merge(&schema_b).unwrap_err();
3399        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
3400    }
3401
3402    // TODO(Sanket): Remove this test once deployed
3403    #[test]
3404    fn test_backward_compatibility_aliases() {
3405        // Test that old format with # and $ prefixes and key_overrides can be deserialized
3406        let old_format_json = r###"{
3407            "defaults": {
3408                "#string": {
3409                    "$fts_index": {
3410                        "enabled": true,
3411                        "config": {}
3412                    }
3413                },
3414                "#int": {
3415                    "$int_inverted_index": {
3416                        "enabled": true,
3417                        "config": {}
3418                    }
3419                },
3420                "#float_list": {
3421                    "$vector_index": {
3422                        "enabled": true,
3423                        "config": {
3424                            "spann": {
3425                                "search_nprobe": 10
3426                            }
3427                        }
3428                    }
3429                }
3430            },
3431            "key_overrides": {
3432                "#document": {
3433                    "#string": {
3434                        "$fts_index": {
3435                            "enabled": false,
3436                            "config": {}
3437                        }
3438                    }
3439                }
3440            }
3441        }"###;
3442
3443        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
3444
3445        // Test that new format without prefixes and keys can be deserialized
3446        let new_format_json = r###"{
3447            "defaults": {
3448                "string": {
3449                    "fts_index": {
3450                        "enabled": true,
3451                        "config": {}
3452                    }
3453                },
3454                "int": {
3455                    "int_inverted_index": {
3456                        "enabled": true,
3457                        "config": {}
3458                    }
3459                },
3460                "float_list": {
3461                    "vector_index": {
3462                        "enabled": true,
3463                        "config": {
3464                            "spann": {
3465                                "search_nprobe": 10
3466                            }
3467                        }
3468                    }
3469                }
3470            },
3471            "keys": {
3472                "#document": {
3473                    "string": {
3474                        "fts_index": {
3475                            "enabled": false,
3476                            "config": {}
3477                        }
3478                    }
3479                }
3480            }
3481        }"###;
3482
3483        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
3484
3485        // Both should deserialize to the same structure
3486        assert_eq!(schema_from_old, schema_from_new);
3487
3488        // Verify the deserialized content is correct
3489        assert!(schema_from_old.defaults.string.is_some());
3490        assert!(schema_from_old
3491            .defaults
3492            .string
3493            .as_ref()
3494            .unwrap()
3495            .fts_index
3496            .is_some());
3497        assert!(
3498            schema_from_old
3499                .defaults
3500                .string
3501                .as_ref()
3502                .unwrap()
3503                .fts_index
3504                .as_ref()
3505                .unwrap()
3506                .enabled
3507        );
3508
3509        assert!(schema_from_old.defaults.int.is_some());
3510        assert!(schema_from_old
3511            .defaults
3512            .int
3513            .as_ref()
3514            .unwrap()
3515            .int_inverted_index
3516            .is_some());
3517
3518        assert!(schema_from_old.defaults.float_list.is_some());
3519        assert!(schema_from_old
3520            .defaults
3521            .float_list
3522            .as_ref()
3523            .unwrap()
3524            .vector_index
3525            .is_some());
3526
3527        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
3528        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
3529        assert!(doc_override.string.is_some());
3530        assert!(
3531            !doc_override
3532                .string
3533                .as_ref()
3534                .unwrap()
3535                .fts_index
3536                .as_ref()
3537                .unwrap()
3538                .enabled
3539        );
3540
3541        // Test that serialization always outputs the new format (without prefixes)
3542        let serialized = serde_json::to_string(&schema_from_old).unwrap();
3543
3544        // Should contain new format keys
3545        assert!(serialized.contains(r#""keys":"#));
3546        assert!(serialized.contains(r#""string":"#));
3547        assert!(serialized.contains(r#""fts_index":"#));
3548        assert!(serialized.contains(r#""int_inverted_index":"#));
3549        assert!(serialized.contains(r#""vector_index":"#));
3550
3551        // Should NOT contain old format keys
3552        assert!(!serialized.contains(r#""key_overrides":"#));
3553        assert!(!serialized.contains(r###""#string":"###));
3554        assert!(!serialized.contains(r###""$fts_index":"###));
3555        assert!(!serialized.contains(r###""$int_inverted_index":"###));
3556        assert!(!serialized.contains(r###""$vector_index":"###));
3557    }
3558
3559    #[test]
3560    fn test_hnsw_index_config_validation() {
3561        use validator::Validate;
3562
3563        // Valid configuration - should pass
3564        let valid_config = HnswIndexConfig {
3565            batch_size: Some(10),
3566            sync_threshold: Some(100),
3567            ef_construction: Some(100),
3568            max_neighbors: Some(16),
3569            ..Default::default()
3570        };
3571        assert!(valid_config.validate().is_ok());
3572
3573        // Invalid: batch_size too small (min 2)
3574        let invalid_batch_size = HnswIndexConfig {
3575            batch_size: Some(1),
3576            ..Default::default()
3577        };
3578        assert!(invalid_batch_size.validate().is_err());
3579
3580        // Invalid: sync_threshold too small (min 2)
3581        let invalid_sync_threshold = HnswIndexConfig {
3582            sync_threshold: Some(1),
3583            ..Default::default()
3584        };
3585        assert!(invalid_sync_threshold.validate().is_err());
3586
3587        // Valid: boundary values (exactly 2) should pass
3588        let boundary_config = HnswIndexConfig {
3589            batch_size: Some(2),
3590            sync_threshold: Some(2),
3591            ..Default::default()
3592        };
3593        assert!(boundary_config.validate().is_ok());
3594
3595        // Valid: None values should pass validation
3596        let all_none_config = HnswIndexConfig {
3597            ..Default::default()
3598        };
3599        assert!(all_none_config.validate().is_ok());
3600
3601        // Valid: fields without validation can be any value
3602        let other_fields_config = HnswIndexConfig {
3603            ef_construction: Some(1),
3604            max_neighbors: Some(1),
3605            ef_search: Some(1),
3606            num_threads: Some(1),
3607            resize_factor: Some(0.1),
3608            ..Default::default()
3609        };
3610        assert!(other_fields_config.validate().is_ok());
3611    }
3612
3613    #[test]
3614    fn test_spann_index_config_validation() {
3615        use validator::Validate;
3616
3617        // Valid configuration - should pass
3618        let valid_config = SpannIndexConfig {
3619            write_nprobe: Some(32),
3620            nreplica_count: Some(4),
3621            split_threshold: Some(100),
3622            merge_threshold: Some(50),
3623            reassign_neighbor_count: Some(32),
3624            num_centers_to_merge_to: Some(4),
3625            ef_construction: Some(100),
3626            ef_search: Some(100),
3627            max_neighbors: Some(32),
3628            search_rng_factor: Some(1.0),
3629            write_rng_factor: Some(1.0),
3630            search_rng_epsilon: Some(7.5),
3631            write_rng_epsilon: Some(7.5),
3632            ..Default::default()
3633        };
3634        assert!(valid_config.validate().is_ok());
3635
3636        // Invalid: write_nprobe too large (max 64)
3637        let invalid_write_nprobe = SpannIndexConfig {
3638            write_nprobe: Some(200),
3639            ..Default::default()
3640        };
3641        assert!(invalid_write_nprobe.validate().is_err());
3642
3643        // Invalid: split_threshold too small (min 50)
3644        let invalid_split_threshold = SpannIndexConfig {
3645            split_threshold: Some(10),
3646            ..Default::default()
3647        };
3648        assert!(invalid_split_threshold.validate().is_err());
3649
3650        // Invalid: split_threshold too large (max 200)
3651        let invalid_split_threshold_high = SpannIndexConfig {
3652            split_threshold: Some(250),
3653            ..Default::default()
3654        };
3655        assert!(invalid_split_threshold_high.validate().is_err());
3656
3657        // Invalid: nreplica_count too large (max 8)
3658        let invalid_nreplica = SpannIndexConfig {
3659            nreplica_count: Some(10),
3660            ..Default::default()
3661        };
3662        assert!(invalid_nreplica.validate().is_err());
3663
3664        // Invalid: reassign_neighbor_count too large (max 64)
3665        let invalid_reassign = SpannIndexConfig {
3666            reassign_neighbor_count: Some(100),
3667            ..Default::default()
3668        };
3669        assert!(invalid_reassign.validate().is_err());
3670
3671        // Invalid: merge_threshold out of range (min 25, max 100)
3672        let invalid_merge_threshold_low = SpannIndexConfig {
3673            merge_threshold: Some(5),
3674            ..Default::default()
3675        };
3676        assert!(invalid_merge_threshold_low.validate().is_err());
3677
3678        let invalid_merge_threshold_high = SpannIndexConfig {
3679            merge_threshold: Some(150),
3680            ..Default::default()
3681        };
3682        assert!(invalid_merge_threshold_high.validate().is_err());
3683
3684        // Invalid: num_centers_to_merge_to too large (max 8)
3685        let invalid_num_centers = SpannIndexConfig {
3686            num_centers_to_merge_to: Some(10),
3687            ..Default::default()
3688        };
3689        assert!(invalid_num_centers.validate().is_err());
3690
3691        // Invalid: ef_construction too large (max 200)
3692        let invalid_ef_construction = SpannIndexConfig {
3693            ef_construction: Some(300),
3694            ..Default::default()
3695        };
3696        assert!(invalid_ef_construction.validate().is_err());
3697
3698        // Invalid: ef_search too large (max 200)
3699        let invalid_ef_search = SpannIndexConfig {
3700            ef_search: Some(300),
3701            ..Default::default()
3702        };
3703        assert!(invalid_ef_search.validate().is_err());
3704
3705        // Invalid: max_neighbors too large (max 64)
3706        let invalid_max_neighbors = SpannIndexConfig {
3707            max_neighbors: Some(100),
3708            ..Default::default()
3709        };
3710        assert!(invalid_max_neighbors.validate().is_err());
3711
3712        // Invalid: search_nprobe too large (max 128)
3713        let invalid_search_nprobe = SpannIndexConfig {
3714            search_nprobe: Some(200),
3715            ..Default::default()
3716        };
3717        assert!(invalid_search_nprobe.validate().is_err());
3718
3719        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
3720        let invalid_search_rng_factor_low = SpannIndexConfig {
3721            search_rng_factor: Some(0.9),
3722            ..Default::default()
3723        };
3724        assert!(invalid_search_rng_factor_low.validate().is_err());
3725
3726        let invalid_search_rng_factor_high = SpannIndexConfig {
3727            search_rng_factor: Some(1.1),
3728            ..Default::default()
3729        };
3730        assert!(invalid_search_rng_factor_high.validate().is_err());
3731
3732        // Valid: search_rng_factor exactly 1.0
3733        let valid_search_rng_factor = SpannIndexConfig {
3734            search_rng_factor: Some(1.0),
3735            ..Default::default()
3736        };
3737        assert!(valid_search_rng_factor.validate().is_ok());
3738
3739        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
3740        let invalid_search_rng_epsilon_low = SpannIndexConfig {
3741            search_rng_epsilon: Some(4.0),
3742            ..Default::default()
3743        };
3744        assert!(invalid_search_rng_epsilon_low.validate().is_err());
3745
3746        let invalid_search_rng_epsilon_high = SpannIndexConfig {
3747            search_rng_epsilon: Some(11.0),
3748            ..Default::default()
3749        };
3750        assert!(invalid_search_rng_epsilon_high.validate().is_err());
3751
3752        // Valid: search_rng_epsilon within range
3753        let valid_search_rng_epsilon = SpannIndexConfig {
3754            search_rng_epsilon: Some(7.5),
3755            ..Default::default()
3756        };
3757        assert!(valid_search_rng_epsilon.validate().is_ok());
3758
3759        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
3760        let invalid_write_rng_factor_low = SpannIndexConfig {
3761            write_rng_factor: Some(0.9),
3762            ..Default::default()
3763        };
3764        assert!(invalid_write_rng_factor_low.validate().is_err());
3765
3766        let invalid_write_rng_factor_high = SpannIndexConfig {
3767            write_rng_factor: Some(1.1),
3768            ..Default::default()
3769        };
3770        assert!(invalid_write_rng_factor_high.validate().is_err());
3771
3772        // Valid: write_rng_factor exactly 1.0
3773        let valid_write_rng_factor = SpannIndexConfig {
3774            write_rng_factor: Some(1.0),
3775            ..Default::default()
3776        };
3777        assert!(valid_write_rng_factor.validate().is_ok());
3778
3779        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
3780        let invalid_write_rng_epsilon_low = SpannIndexConfig {
3781            write_rng_epsilon: Some(4.0),
3782            ..Default::default()
3783        };
3784        assert!(invalid_write_rng_epsilon_low.validate().is_err());
3785
3786        let invalid_write_rng_epsilon_high = SpannIndexConfig {
3787            write_rng_epsilon: Some(11.0),
3788            ..Default::default()
3789        };
3790        assert!(invalid_write_rng_epsilon_high.validate().is_err());
3791
3792        // Valid: write_rng_epsilon within range
3793        let valid_write_rng_epsilon = SpannIndexConfig {
3794            write_rng_epsilon: Some(7.5),
3795            ..Default::default()
3796        };
3797        assert!(valid_write_rng_epsilon.validate().is_ok());
3798
3799        // Invalid: num_samples_kmeans too large (max 1000)
3800        let invalid_num_samples_kmeans = SpannIndexConfig {
3801            num_samples_kmeans: Some(1500),
3802            ..Default::default()
3803        };
3804        assert!(invalid_num_samples_kmeans.validate().is_err());
3805
3806        // Valid: num_samples_kmeans within range
3807        let valid_num_samples_kmeans = SpannIndexConfig {
3808            num_samples_kmeans: Some(500),
3809            ..Default::default()
3810        };
3811        assert!(valid_num_samples_kmeans.validate().is_ok());
3812
3813        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
3814        let invalid_initial_lambda_high = SpannIndexConfig {
3815            initial_lambda: Some(150.0),
3816            ..Default::default()
3817        };
3818        assert!(invalid_initial_lambda_high.validate().is_err());
3819
3820        let invalid_initial_lambda_low = SpannIndexConfig {
3821            initial_lambda: Some(50.0),
3822            ..Default::default()
3823        };
3824        assert!(invalid_initial_lambda_low.validate().is_err());
3825
3826        // Valid: initial_lambda exactly 100.0
3827        let valid_initial_lambda = SpannIndexConfig {
3828            initial_lambda: Some(100.0),
3829            ..Default::default()
3830        };
3831        assert!(valid_initial_lambda.validate().is_ok());
3832
3833        // Valid: None values should pass validation
3834        let all_none_config = SpannIndexConfig {
3835            ..Default::default()
3836        };
3837        assert!(all_none_config.validate().is_ok());
3838    }
3839
3840    #[test]
3841    fn test_builder_pattern_crud_workflow() {
3842        // Test comprehensive CRUD workflow using the builder pattern
3843
3844        // CREATE: Build a schema with multiple indexes
3845        let schema = Schema::new_default(KnnIndex::Hnsw)
3846            .create_index(
3847                None,
3848                IndexConfig::Vector(VectorIndexConfig {
3849                    space: Some(Space::Cosine),
3850                    embedding_function: None,
3851                    source_key: None,
3852                    hnsw: Some(HnswIndexConfig {
3853                        ef_construction: Some(200),
3854                        max_neighbors: Some(32),
3855                        ef_search: Some(50),
3856                        num_threads: None,
3857                        batch_size: None,
3858                        sync_threshold: None,
3859                        resize_factor: None,
3860                    }),
3861                    spann: None,
3862                }),
3863            )
3864            .expect("vector config should succeed")
3865            .create_index(
3866                Some("category"),
3867                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
3868            )
3869            .expect("string inverted on key should succeed")
3870            .create_index(
3871                Some("year"),
3872                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
3873            )
3874            .expect("int inverted on key should succeed")
3875            .create_index(
3876                Some("rating"),
3877                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
3878            )
3879            .expect("float inverted on key should succeed")
3880            .create_index(
3881                Some("is_active"),
3882                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
3883            )
3884            .expect("bool inverted on key should succeed");
3885
3886        // READ: Verify the schema was built correctly
3887        // Check vector config
3888        assert!(schema.keys.contains_key(EMBEDDING_KEY));
3889        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
3890        assert!(embedding.float_list.is_some());
3891        let vector_index = embedding
3892            .float_list
3893            .as_ref()
3894            .unwrap()
3895            .vector_index
3896            .as_ref()
3897            .unwrap();
3898        assert!(vector_index.enabled);
3899        assert_eq!(vector_index.config.space, Some(Space::Cosine));
3900        assert_eq!(
3901            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
3902            Some(200)
3903        );
3904
3905        // Check per-key indexes
3906        assert!(schema.keys.contains_key("category"));
3907        assert!(schema.keys.contains_key("year"));
3908        assert!(schema.keys.contains_key("rating"));
3909        assert!(schema.keys.contains_key("is_active"));
3910
3911        // Verify category string inverted index
3912        let category = schema.keys.get("category").unwrap();
3913        assert!(category.string.is_some());
3914        let string_idx = category
3915            .string
3916            .as_ref()
3917            .unwrap()
3918            .string_inverted_index
3919            .as_ref()
3920            .unwrap();
3921        assert!(string_idx.enabled);
3922
3923        // Verify year int inverted index
3924        let year = schema.keys.get("year").unwrap();
3925        assert!(year.int.is_some());
3926        let int_idx = year
3927            .int
3928            .as_ref()
3929            .unwrap()
3930            .int_inverted_index
3931            .as_ref()
3932            .unwrap();
3933        assert!(int_idx.enabled);
3934
3935        // UPDATE/DELETE: Disable some indexes
3936        let schema = schema
3937            .delete_index(
3938                Some("category"),
3939                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
3940            )
3941            .expect("delete string inverted should succeed")
3942            .delete_index(
3943                Some("year"),
3944                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
3945            )
3946            .expect("delete int inverted should succeed");
3947
3948        // VERIFY DELETE: Check that indexes were disabled
3949        let category = schema.keys.get("category").unwrap();
3950        let string_idx = category
3951            .string
3952            .as_ref()
3953            .unwrap()
3954            .string_inverted_index
3955            .as_ref()
3956            .unwrap();
3957        assert!(!string_idx.enabled); // Should be disabled now
3958
3959        let year = schema.keys.get("year").unwrap();
3960        let int_idx = year
3961            .int
3962            .as_ref()
3963            .unwrap()
3964            .int_inverted_index
3965            .as_ref()
3966            .unwrap();
3967        assert!(!int_idx.enabled); // Should be disabled now
3968
3969        // Verify other indexes still enabled
3970        let rating = schema.keys.get("rating").unwrap();
3971        let float_idx = rating
3972            .float
3973            .as_ref()
3974            .unwrap()
3975            .float_inverted_index
3976            .as_ref()
3977            .unwrap();
3978        assert!(float_idx.enabled); // Should still be enabled
3979
3980        let is_active = schema.keys.get("is_active").unwrap();
3981        let bool_idx = is_active
3982            .boolean
3983            .as_ref()
3984            .unwrap()
3985            .bool_inverted_index
3986            .as_ref()
3987            .unwrap();
3988        assert!(bool_idx.enabled); // Should still be enabled
3989    }
3990
3991    #[test]
3992    fn test_builder_create_index_validation_errors() {
3993        // Test all validation errors for create_index() as documented in the docstring:
3994        // - Attempting to create index on special keys (#document, #embedding)
3995        // - Invalid configuration (e.g., vector index on non-embedding key)
3996        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
3997
3998        // Error: Vector index on specific key (must be global)
3999        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4000            Some("my_vectors"),
4001            IndexConfig::Vector(VectorIndexConfig {
4002                space: Some(Space::L2),
4003                embedding_function: None,
4004                source_key: None,
4005                hnsw: None,
4006                spann: None,
4007            }),
4008        );
4009        assert!(result.is_err());
4010        assert!(matches!(
4011            result.unwrap_err(),
4012            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
4013        ));
4014
4015        // Error: FTS index on specific key (must be global)
4016        let result = Schema::new_default(KnnIndex::Hnsw)
4017            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
4018        assert!(result.is_err());
4019        assert!(matches!(
4020            result.unwrap_err(),
4021            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
4022        ));
4023
4024        // Error: Cannot create index on special key #document
4025        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4026            Some(DOCUMENT_KEY),
4027            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4028        );
4029        assert!(result.is_err());
4030        assert!(matches!(
4031            result.unwrap_err(),
4032            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4033        ));
4034
4035        // Error: Cannot create index on special key #embedding
4036        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4037            Some(EMBEDDING_KEY),
4038            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4039        );
4040        assert!(result.is_err());
4041        assert!(matches!(
4042            result.unwrap_err(),
4043            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4044        ));
4045
4046        // Error: Sparse vector without key (must specify key)
4047        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4048            None,
4049            IndexConfig::SparseVector(SparseVectorIndexConfig {
4050                embedding_function: None,
4051                source_key: None,
4052                bm25: None,
4053            }),
4054        );
4055        assert!(result.is_err());
4056        assert!(matches!(
4057            result.unwrap_err(),
4058            SchemaBuilderError::SparseVectorRequiresKey
4059        ));
4060
4061        // Error: Multiple sparse vector indexes (only one allowed per collection)
4062        let result = Schema::new_default(KnnIndex::Hnsw)
4063            .create_index(
4064                Some("sparse1"),
4065                IndexConfig::SparseVector(SparseVectorIndexConfig {
4066                    embedding_function: None,
4067                    source_key: None,
4068                    bm25: None,
4069                }),
4070            )
4071            .expect("first sparse should succeed")
4072            .create_index(
4073                Some("sparse2"),
4074                IndexConfig::SparseVector(SparseVectorIndexConfig {
4075                    embedding_function: None,
4076                    source_key: None,
4077                    bm25: None,
4078                }),
4079            );
4080        assert!(result.is_err());
4081        assert!(matches!(
4082            result.unwrap_err(),
4083            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
4084        ));
4085    }
4086
4087    #[test]
4088    fn test_builder_delete_index_validation_errors() {
4089        // Test all validation errors for delete_index() as documented in the docstring:
4090        // - Attempting to delete index on special keys (#document, #embedding)
4091        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
4092
4093        // Error: Delete on special key #embedding
4094        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4095            Some(EMBEDDING_KEY),
4096            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4097        );
4098        assert!(result.is_err());
4099        assert!(matches!(
4100            result.unwrap_err(),
4101            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4102        ));
4103
4104        // Error: Delete on special key #document
4105        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4106            Some(DOCUMENT_KEY),
4107            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4108        );
4109        assert!(result.is_err());
4110        assert!(matches!(
4111            result.unwrap_err(),
4112            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4113        ));
4114
4115        // Error: Delete vector index (not currently supported)
4116        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4117            None,
4118            IndexConfig::Vector(VectorIndexConfig {
4119                space: None,
4120                embedding_function: None,
4121                source_key: None,
4122                hnsw: None,
4123                spann: None,
4124            }),
4125        );
4126        assert!(result.is_err());
4127        assert!(matches!(
4128            result.unwrap_err(),
4129            SchemaBuilderError::VectorIndexDeletionNotSupported
4130        ));
4131
4132        // Error: Delete FTS index (not currently supported)
4133        let result = Schema::new_default(KnnIndex::Hnsw)
4134            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
4135        assert!(result.is_err());
4136        assert!(matches!(
4137            result.unwrap_err(),
4138            SchemaBuilderError::FtsIndexDeletionNotSupported
4139        ));
4140
4141        // Error: Delete sparse vector index (not currently supported)
4142        let result = Schema::new_default(KnnIndex::Hnsw)
4143            .create_index(
4144                Some("sparse"),
4145                IndexConfig::SparseVector(SparseVectorIndexConfig {
4146                    embedding_function: None,
4147                    source_key: None,
4148                    bm25: None,
4149                }),
4150            )
4151            .expect("create should succeed")
4152            .delete_index(
4153                Some("sparse"),
4154                IndexConfig::SparseVector(SparseVectorIndexConfig {
4155                    embedding_function: None,
4156                    source_key: None,
4157                    bm25: None,
4158                }),
4159            );
4160        assert!(result.is_err());
4161        assert!(matches!(
4162            result.unwrap_err(),
4163            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
4164        ));
4165    }
4166
4167    #[test]
4168    fn test_builder_pattern_chaining() {
4169        // Test complex chaining scenario
4170        let schema = Schema::new_default(KnnIndex::Hnsw)
4171            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
4172            .unwrap()
4173            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4174            .unwrap()
4175            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
4176            .unwrap()
4177            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
4178            .unwrap()
4179            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4180            .unwrap()
4181            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
4182            .unwrap();
4183
4184        // Verify tag1 is enabled
4185        assert!(
4186            schema
4187                .keys
4188                .get("tag1")
4189                .unwrap()
4190                .string
4191                .as_ref()
4192                .unwrap()
4193                .string_inverted_index
4194                .as_ref()
4195                .unwrap()
4196                .enabled
4197        );
4198
4199        // Verify tag2 is disabled
4200        assert!(
4201            !schema
4202                .keys
4203                .get("tag2")
4204                .unwrap()
4205                .string
4206                .as_ref()
4207                .unwrap()
4208                .string_inverted_index
4209                .as_ref()
4210                .unwrap()
4211                .enabled
4212        );
4213
4214        // Verify tag3 is enabled
4215        assert!(
4216            schema
4217                .keys
4218                .get("tag3")
4219                .unwrap()
4220                .string
4221                .as_ref()
4222                .unwrap()
4223                .string_inverted_index
4224                .as_ref()
4225                .unwrap()
4226                .enabled
4227        );
4228
4229        // Verify count is enabled
4230        assert!(
4231            schema
4232                .keys
4233                .get("count")
4234                .unwrap()
4235                .int
4236                .as_ref()
4237                .unwrap()
4238                .int_inverted_index
4239                .as_ref()
4240                .unwrap()
4241                .enabled
4242        );
4243
4244        // Verify score is enabled
4245        assert!(
4246            schema
4247                .keys
4248                .get("score")
4249                .unwrap()
4250                .float
4251                .as_ref()
4252                .unwrap()
4253                .float_inverted_index
4254                .as_ref()
4255                .unwrap()
4256                .enabled
4257        );
4258    }
4259
4260    #[test]
4261    fn test_schema_default_matches_python() {
4262        // Test that Schema::default() matches Python's Schema() behavior exactly
4263        let schema = Schema::default();
4264
4265        // ============================================================================
4266        // VERIFY DEFAULTS (match Python's _initialize_defaults)
4267        // ============================================================================
4268
4269        // String defaults: FTS disabled, string inverted enabled
4270        assert!(schema.defaults.string.is_some());
4271        let string = schema.defaults.string.as_ref().unwrap();
4272        assert!(!string.fts_index.as_ref().unwrap().enabled);
4273        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
4274
4275        // Float list defaults: vector index disabled
4276        assert!(schema.defaults.float_list.is_some());
4277        let float_list = schema.defaults.float_list.as_ref().unwrap();
4278        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
4279        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
4280        assert_eq!(vector_config.space, None); // Python leaves as None
4281        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
4282        assert_eq!(vector_config.spann, None); // Python doesn't specify
4283        assert_eq!(vector_config.source_key, None);
4284
4285        // Sparse vector defaults: disabled
4286        assert!(schema.defaults.sparse_vector.is_some());
4287        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
4288        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
4289
4290        // Int defaults: inverted index enabled
4291        assert!(schema.defaults.int.is_some());
4292        assert!(
4293            schema
4294                .defaults
4295                .int
4296                .as_ref()
4297                .unwrap()
4298                .int_inverted_index
4299                .as_ref()
4300                .unwrap()
4301                .enabled
4302        );
4303
4304        // Float defaults: inverted index enabled
4305        assert!(schema.defaults.float.is_some());
4306        assert!(
4307            schema
4308                .defaults
4309                .float
4310                .as_ref()
4311                .unwrap()
4312                .float_inverted_index
4313                .as_ref()
4314                .unwrap()
4315                .enabled
4316        );
4317
4318        // Bool defaults: inverted index enabled
4319        assert!(schema.defaults.boolean.is_some());
4320        assert!(
4321            schema
4322                .defaults
4323                .boolean
4324                .as_ref()
4325                .unwrap()
4326                .bool_inverted_index
4327                .as_ref()
4328                .unwrap()
4329                .enabled
4330        );
4331
4332        // ============================================================================
4333        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
4334        // ============================================================================
4335
4336        // #document: FTS enabled, string inverted disabled
4337        assert!(schema.keys.contains_key(DOCUMENT_KEY));
4338        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
4339        assert!(doc.string.is_some());
4340        assert!(
4341            doc.string
4342                .as_ref()
4343                .unwrap()
4344                .fts_index
4345                .as_ref()
4346                .unwrap()
4347                .enabled
4348        );
4349        assert!(
4350            !doc.string
4351                .as_ref()
4352                .unwrap()
4353                .string_inverted_index
4354                .as_ref()
4355                .unwrap()
4356                .enabled
4357        );
4358
4359        // #embedding: vector index enabled with source_key=#document
4360        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4361        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4362        assert!(embedding.float_list.is_some());
4363        let vec_idx = embedding
4364            .float_list
4365            .as_ref()
4366            .unwrap()
4367            .vector_index
4368            .as_ref()
4369            .unwrap();
4370        assert!(vec_idx.enabled);
4371        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
4372        assert_eq!(vec_idx.config.space, None); // Python leaves as None
4373        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
4374        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
4375
4376        // Verify only these two special keys exist
4377        assert_eq!(schema.keys.len(), 2);
4378    }
4379
4380    #[test]
4381    fn test_schema_default_works_with_builder() {
4382        // Test that Schema::default() can be used with builder pattern
4383        let schema = Schema::default()
4384            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
4385            .expect("should succeed");
4386
4387        // Verify the new index was added
4388        assert!(schema.keys.contains_key("category"));
4389        assert!(schema.keys.contains_key(DOCUMENT_KEY));
4390        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4391        assert_eq!(schema.keys.len(), 3);
4392    }
4393}