chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8    EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
9    UpdateVectorIndexConfiguration, VectorIndexConfiguration,
10};
11use crate::hnsw_configuration::Space;
12use crate::metadata::{MetadataComparison, MetadataValueType, Where};
13use crate::operator::QueryVector;
14use crate::{
15    default_batch_size, default_construction_ef, default_construction_ef_spann,
16    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
17    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
18    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
19    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
20    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
21    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
22    HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
23    InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
24};
25
26impl ChromaError for SchemaError {
27    fn code(&self) -> ErrorCodes {
28        ErrorCodes::Internal
29    }
30}
31
32#[derive(Debug, Error)]
33pub enum SchemaError {
34    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
35    MissingIndexConfiguration { key: String, value_type: String },
36    #[error("Schema reconciliation failed: {reason}")]
37    InvalidSchema { reason: String },
38    #[error("Cannot set both collection config and schema simultaneously")]
39    ConfigAndSchemaConflict,
40    #[error("Cannot merge schemas with differing defaults")]
41    DefaultsMismatch,
42    #[error("Conflicting configuration for {context}")]
43    ConfigurationConflict { context: String },
44    #[error("Invalid HNSW configuration: {0}")]
45    InvalidHnswConfig(validator::ValidationErrors),
46    #[error("Invalid SPANN configuration: {0}")]
47    InvalidSpannConfig(validator::ValidationErrors),
48    #[error(transparent)]
49    Builder(#[from] SchemaBuilderError),
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaBuilderError {
54    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
55    VectorIndexMustBeGlobal { key: String },
56    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
57    FtsIndexMustBeGlobal { key: String },
58    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
59    SpecialKeyModificationNotAllowed { key: String },
60    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
61    SparseVectorRequiresKey,
62    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
63    MultipleSparseVectorIndexes { existing_key: String },
64    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
65    VectorIndexDeletionNotSupported,
66    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
67    FtsIndexDeletionNotSupported,
68    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
69    SparseVectorIndexDeletionNotSupported,
70}
71
72#[derive(Debug, Error)]
73pub enum FilterValidationError {
74    #[error(
75        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
76    )]
77    IndexingDisabled {
78        key: String,
79        value_type: MetadataValueType,
80    },
81    #[error(transparent)]
82    Schema(#[from] SchemaError),
83}
84
85impl ChromaError for SchemaBuilderError {
86    fn code(&self) -> ErrorCodes {
87        ErrorCodes::InvalidArgument
88    }
89}
90
91impl ChromaError for FilterValidationError {
92    fn code(&self) -> ErrorCodes {
93        match self {
94            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
95            FilterValidationError::Schema(_) => ErrorCodes::Internal,
96        }
97    }
98}
99
100// ============================================================================
101// SCHEMA CONSTANTS
102// ============================================================================
103// These constants must match the Python constants in chromadb/api/types.py
104
105// Value type name constants
106pub const STRING_VALUE_NAME: &str = "string";
107pub const INT_VALUE_NAME: &str = "int";
108pub const BOOL_VALUE_NAME: &str = "bool";
109pub const FLOAT_VALUE_NAME: &str = "float";
110pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
111pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
112
113// Index type name constants
114pub const FTS_INDEX_NAME: &str = "fts_index";
115pub const VECTOR_INDEX_NAME: &str = "vector_index";
116pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
117pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
118pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
119pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
120pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
121
122// Special metadata keys - must match Python constants in chromadb/api/types.py
123pub const DOCUMENT_KEY: &str = "#document";
124pub const EMBEDDING_KEY: &str = "#embedding";
125
126// ============================================================================
127// SCHEMA STRUCTURES
128// ============================================================================
129
130/// Schema representation for collection index configurations
131///
132/// This represents the server-side schema structure used for index management
133
134#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
135#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
136pub struct Schema {
137    /// Default index configurations for each value type
138    pub defaults: ValueTypes,
139    /// Key-specific index overrides
140    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
141    #[serde(rename = "keys", alias = "key_overrides")]
142    pub keys: HashMap<String, ValueTypes>,
143}
144
145impl Schema {
146    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
147        if let Some(vector_update) = &configuration.vector_index {
148            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
149                Self::apply_vector_index_update(default_vector_index, vector_update);
150            }
151            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
152                Self::apply_vector_index_update(embedding_vector_index, vector_update);
153            }
154        }
155
156        if let Some(embedding_function) = configuration.embedding_function.as_ref() {
157            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
158                default_vector_index.config.embedding_function = Some(embedding_function.clone());
159            }
160            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
161                embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
162            }
163        }
164    }
165
166    fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
167        self.defaults
168            .float_list
169            .as_mut()
170            .and_then(|float_list| float_list.vector_index.as_mut())
171    }
172
173    fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
174        self.keys
175            .get_mut(EMBEDDING_KEY)
176            .and_then(|value_types| value_types.float_list.as_mut())
177            .and_then(|float_list| float_list.vector_index.as_mut())
178    }
179
180    fn apply_vector_index_update(
181        vector_index: &mut VectorIndexType,
182        update: &UpdateVectorIndexConfiguration,
183    ) {
184        match update {
185            UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
186                if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
187                    if let Some(ef_search) = hnsw_update.ef_search {
188                        hnsw_config.ef_search = Some(ef_search);
189                    }
190                    if let Some(max_neighbors) = hnsw_update.max_neighbors {
191                        hnsw_config.max_neighbors = Some(max_neighbors);
192                    }
193                    if let Some(num_threads) = hnsw_update.num_threads {
194                        hnsw_config.num_threads = Some(num_threads);
195                    }
196                    if let Some(resize_factor) = hnsw_update.resize_factor {
197                        hnsw_config.resize_factor = Some(resize_factor);
198                    }
199                    if let Some(sync_threshold) = hnsw_update.sync_threshold {
200                        hnsw_config.sync_threshold = Some(sync_threshold);
201                    }
202                    if let Some(batch_size) = hnsw_update.batch_size {
203                        hnsw_config.batch_size = Some(batch_size);
204                    }
205                }
206            }
207            UpdateVectorIndexConfiguration::Hnsw(None) => {}
208            UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
209                if let Some(spann_config) = vector_index.config.spann.as_mut() {
210                    if let Some(search_nprobe) = spann_update.search_nprobe {
211                        spann_config.search_nprobe = Some(search_nprobe);
212                    }
213                    if let Some(ef_search) = spann_update.ef_search {
214                        spann_config.ef_search = Some(ef_search);
215                    }
216                }
217            }
218            UpdateVectorIndexConfiguration::Spann(None) => {}
219        }
220    }
221
222    pub fn is_sparse_index_enabled(&self) -> bool {
223        let defaults_enabled = self
224            .defaults
225            .sparse_vector
226            .as_ref()
227            .and_then(|sv| sv.sparse_vector_index.as_ref())
228            .is_some_and(|idx| idx.enabled);
229        let key_enabled = self.keys.values().any(|value_types| {
230            value_types
231                .sparse_vector
232                .as_ref()
233                .and_then(|sv| sv.sparse_vector_index.as_ref())
234                .is_some_and(|idx| idx.enabled)
235        });
236        defaults_enabled || key_enabled
237    }
238}
239
240impl Default for Schema {
241    /// Create a default Schema that matches Python's behavior exactly.
242    ///
243    /// Python creates a Schema with:
244    /// - All inverted indexes enabled by default (string, int, float, bool)
245    /// - Vector and FTS indexes disabled in defaults
246    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
247    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
248    ///
249    /// # Examples
250    /// ```
251    /// use chroma_types::Schema;
252    ///
253    /// let schema = Schema::default();
254    /// assert!(schema.keys.contains_key("#document"));
255    /// assert!(schema.keys.contains_key("#embedding"));
256    /// ```
257    fn default() -> Self {
258        // Initialize defaults - match Python's _initialize_defaults()
259        let defaults = ValueTypes {
260            string: Some(StringValueType {
261                fts_index: Some(FtsIndexType {
262                    enabled: false,
263                    config: FtsIndexConfig {},
264                }),
265                string_inverted_index: Some(StringInvertedIndexType {
266                    enabled: true,
267                    config: StringInvertedIndexConfig {},
268                }),
269            }),
270            float_list: Some(FloatListValueType {
271                vector_index: Some(VectorIndexType {
272                    enabled: false,
273                    config: VectorIndexConfig {
274                        space: None, // Python leaves as None (resolved on serialization)
275                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
276                        source_key: None,
277                        hnsw: None,  // Python doesn't specify
278                        spann: None, // Python doesn't specify
279                    },
280                }),
281            }),
282            sparse_vector: Some(SparseVectorValueType {
283                sparse_vector_index: Some(SparseVectorIndexType {
284                    enabled: false,
285                    config: SparseVectorIndexConfig {
286                        embedding_function: None,
287                        source_key: None,
288                        bm25: None,
289                    },
290                }),
291            }),
292            int: Some(IntValueType {
293                int_inverted_index: Some(IntInvertedIndexType {
294                    enabled: true,
295                    config: IntInvertedIndexConfig {},
296                }),
297            }),
298            float: Some(FloatValueType {
299                float_inverted_index: Some(FloatInvertedIndexType {
300                    enabled: true,
301                    config: FloatInvertedIndexConfig {},
302                }),
303            }),
304            boolean: Some(BoolValueType {
305                bool_inverted_index: Some(BoolInvertedIndexType {
306                    enabled: true,
307                    config: BoolInvertedIndexConfig {},
308                }),
309            }),
310        };
311
312        // Initialize key-specific overrides - match Python's _initialize_keys()
313        let mut keys = HashMap::new();
314
315        // #document: FTS enabled, string inverted disabled
316        keys.insert(
317            DOCUMENT_KEY.to_string(),
318            ValueTypes {
319                string: Some(StringValueType {
320                    fts_index: Some(FtsIndexType {
321                        enabled: true,
322                        config: FtsIndexConfig {},
323                    }),
324                    string_inverted_index: Some(StringInvertedIndexType {
325                        enabled: false,
326                        config: StringInvertedIndexConfig {},
327                    }),
328                }),
329                ..Default::default()
330            },
331        );
332
333        // #embedding: Vector index enabled with source_key=#document
334        keys.insert(
335            EMBEDDING_KEY.to_string(),
336            ValueTypes {
337                float_list: Some(FloatListValueType {
338                    vector_index: Some(VectorIndexType {
339                        enabled: true,
340                        config: VectorIndexConfig {
341                            space: None, // Python leaves as None (resolved on serialization)
342                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
343                            source_key: Some(DOCUMENT_KEY.to_string()),
344                            hnsw: None,  // Python doesn't specify
345                            spann: None, // Python doesn't specify
346                        },
347                    }),
348                }),
349                ..Default::default()
350            },
351        );
352
353        Schema { defaults, keys }
354    }
355}
356
357pub fn is_embedding_function_default(
358    embedding_function: &Option<EmbeddingFunctionConfiguration>,
359) -> bool {
360    match embedding_function {
361        None => true,
362        Some(embedding_function) => embedding_function.is_default(),
363    }
364}
365
366/// Check if space is default (None means default, or if present, should be default space)
367pub fn is_space_default(space: &Option<Space>) -> bool {
368    match space {
369        None => true,                     // None means default
370        Some(s) => *s == default_space(), // If present, check if it's the default space
371    }
372}
373
374/// Check if HNSW config is default
375pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
376    hnsw_config.ef_construction == Some(default_construction_ef())
377        && hnsw_config.ef_search == Some(default_search_ef())
378        && hnsw_config.max_neighbors == Some(default_m())
379        && hnsw_config.num_threads == Some(default_num_threads())
380        && hnsw_config.batch_size == Some(default_batch_size())
381        && hnsw_config.sync_threshold == Some(default_sync_threshold())
382        && hnsw_config.resize_factor == Some(default_resize_factor())
383}
384
385// ============================================================================
386// NEW STRONGLY-TYPED SCHEMA STRUCTURES
387// ============================================================================
388
389/// Strongly-typed value type configurations
390/// Contains optional configurations for each supported value type
391#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
392#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
393pub struct ValueTypes {
394    #[serde(
395        rename = "string",
396        alias = "#string",
397        skip_serializing_if = "Option::is_none"
398    )] // STRING_VALUE_NAME
399    pub string: Option<StringValueType>,
400
401    #[serde(
402        rename = "float_list",
403        alias = "#float_list",
404        skip_serializing_if = "Option::is_none"
405    )]
406    // FLOAT_LIST_VALUE_NAME
407    pub float_list: Option<FloatListValueType>,
408
409    #[serde(
410        rename = "sparse_vector",
411        alias = "#sparse_vector",
412        skip_serializing_if = "Option::is_none"
413    )]
414    // SPARSE_VECTOR_VALUE_NAME
415    pub sparse_vector: Option<SparseVectorValueType>,
416
417    #[serde(
418        rename = "int",
419        alias = "#int",
420        skip_serializing_if = "Option::is_none"
421    )] // INT_VALUE_NAME
422    pub int: Option<IntValueType>,
423
424    #[serde(
425        rename = "float",
426        alias = "#float",
427        skip_serializing_if = "Option::is_none"
428    )] // FLOAT_VALUE_NAME
429    pub float: Option<FloatValueType>,
430
431    #[serde(
432        rename = "bool",
433        alias = "#bool",
434        skip_serializing_if = "Option::is_none"
435    )] // BOOL_VALUE_NAME
436    pub boolean: Option<BoolValueType>,
437}
438
439/// String value type index configurations
440#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
441#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
442pub struct StringValueType {
443    #[serde(
444        rename = "fts_index",
445        alias = "$fts_index",
446        skip_serializing_if = "Option::is_none"
447    )] // FTS_INDEX_NAME
448    pub fts_index: Option<FtsIndexType>,
449
450    #[serde(
451        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
452        alias = "$string_inverted_index",
453        skip_serializing_if = "Option::is_none"
454    )]
455    pub string_inverted_index: Option<StringInvertedIndexType>,
456}
457
458/// Float list value type index configurations (for vectors)
459#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
460#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
461pub struct FloatListValueType {
462    #[serde(
463        rename = "vector_index",
464        alias = "$vector_index",
465        skip_serializing_if = "Option::is_none"
466    )] // VECTOR_INDEX_NAME
467    pub vector_index: Option<VectorIndexType>,
468}
469
470/// Sparse vector value type index configurations
471#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
472#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
473pub struct SparseVectorValueType {
474    #[serde(
475        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
476        alias = "$sparse_vector_index",
477        skip_serializing_if = "Option::is_none"
478    )]
479    pub sparse_vector_index: Option<SparseVectorIndexType>,
480}
481
482/// Integer value type index configurations
483#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
484#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
485pub struct IntValueType {
486    #[serde(
487        rename = "int_inverted_index",
488        alias = "$int_inverted_index",
489        skip_serializing_if = "Option::is_none"
490    )]
491    // INT_INVERTED_INDEX_NAME
492    pub int_inverted_index: Option<IntInvertedIndexType>,
493}
494
495/// Float value type index configurations
496#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
497#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
498pub struct FloatValueType {
499    #[serde(
500        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
501        alias = "$float_inverted_index",
502        skip_serializing_if = "Option::is_none"
503    )]
504    pub float_inverted_index: Option<FloatInvertedIndexType>,
505}
506
507/// Boolean value type index configurations
508#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
509#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
510pub struct BoolValueType {
511    #[serde(
512        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
513        alias = "$bool_inverted_index",
514        skip_serializing_if = "Option::is_none"
515    )]
516    pub bool_inverted_index: Option<BoolInvertedIndexType>,
517}
518
519// Individual index type structs with enabled status and config
520#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
521#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
522pub struct FtsIndexType {
523    pub enabled: bool,
524    pub config: FtsIndexConfig,
525}
526
527#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
528#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
529pub struct VectorIndexType {
530    pub enabled: bool,
531    pub config: VectorIndexConfig,
532}
533
534#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
535#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
536pub struct SparseVectorIndexType {
537    pub enabled: bool,
538    pub config: SparseVectorIndexConfig,
539}
540
541#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
542#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
543pub struct StringInvertedIndexType {
544    pub enabled: bool,
545    pub config: StringInvertedIndexConfig,
546}
547
548#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
549#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
550pub struct IntInvertedIndexType {
551    pub enabled: bool,
552    pub config: IntInvertedIndexConfig,
553}
554
555#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
556#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
557pub struct FloatInvertedIndexType {
558    pub enabled: bool,
559    pub config: FloatInvertedIndexConfig,
560}
561
562#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
563#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
564pub struct BoolInvertedIndexType {
565    pub enabled: bool,
566    pub config: BoolInvertedIndexConfig,
567}
568
569impl Schema {
570    /// Create a new Schema with strongly-typed default configurations
571    pub fn new_default(default_knn_index: KnnIndex) -> Self {
572        // Vector index disabled on all keys except #embedding.
573        let vector_config = VectorIndexType {
574            enabled: false,
575            config: VectorIndexConfig {
576                space: Some(default_space()),
577                embedding_function: None,
578                source_key: None,
579                hnsw: match default_knn_index {
580                    KnnIndex::Hnsw => Some(HnswIndexConfig {
581                        ef_construction: Some(default_construction_ef()),
582                        max_neighbors: Some(default_m()),
583                        ef_search: Some(default_search_ef()),
584                        num_threads: Some(default_num_threads()),
585                        batch_size: Some(default_batch_size()),
586                        sync_threshold: Some(default_sync_threshold()),
587                        resize_factor: Some(default_resize_factor()),
588                    }),
589                    KnnIndex::Spann => None,
590                },
591                spann: match default_knn_index {
592                    KnnIndex::Hnsw => None,
593                    KnnIndex::Spann => Some(SpannIndexConfig {
594                        search_nprobe: Some(default_search_nprobe()),
595                        search_rng_factor: Some(default_search_rng_factor()),
596                        search_rng_epsilon: Some(default_search_rng_epsilon()),
597                        nreplica_count: Some(default_nreplica_count()),
598                        write_rng_factor: Some(default_write_rng_factor()),
599                        write_rng_epsilon: Some(default_write_rng_epsilon()),
600                        split_threshold: Some(default_split_threshold()),
601                        num_samples_kmeans: Some(default_num_samples_kmeans()),
602                        initial_lambda: Some(default_initial_lambda()),
603                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
604                        merge_threshold: Some(default_merge_threshold()),
605                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
606                        write_nprobe: Some(default_write_nprobe()),
607                        ef_construction: Some(default_construction_ef_spann()),
608                        ef_search: Some(default_search_ef_spann()),
609                        max_neighbors: Some(default_m_spann()),
610                    }),
611                },
612            },
613        };
614
615        // Initialize defaults struct directly instead of using Default::default() + field assignments
616        let defaults = ValueTypes {
617            string: Some(StringValueType {
618                string_inverted_index: Some(StringInvertedIndexType {
619                    enabled: true,
620                    config: StringInvertedIndexConfig {},
621                }),
622                fts_index: Some(FtsIndexType {
623                    enabled: false,
624                    config: FtsIndexConfig {},
625                }),
626            }),
627            float: Some(FloatValueType {
628                float_inverted_index: Some(FloatInvertedIndexType {
629                    enabled: true,
630                    config: FloatInvertedIndexConfig {},
631                }),
632            }),
633            int: Some(IntValueType {
634                int_inverted_index: Some(IntInvertedIndexType {
635                    enabled: true,
636                    config: IntInvertedIndexConfig {},
637                }),
638            }),
639            boolean: Some(BoolValueType {
640                bool_inverted_index: Some(BoolInvertedIndexType {
641                    enabled: true,
642                    config: BoolInvertedIndexConfig {},
643                }),
644            }),
645            float_list: Some(FloatListValueType {
646                vector_index: Some(vector_config),
647            }),
648            sparse_vector: Some(SparseVectorValueType {
649                sparse_vector_index: Some(SparseVectorIndexType {
650                    enabled: false,
651                    config: SparseVectorIndexConfig {
652                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
653                        source_key: None,
654                        bm25: Some(false),
655                    },
656                }),
657            }),
658        };
659
660        // Set up key overrides
661        let mut keys = HashMap::new();
662
663        // Enable vector index for #embedding.
664        let embedding_defaults = ValueTypes {
665            float_list: Some(FloatListValueType {
666                vector_index: Some(VectorIndexType {
667                    enabled: true,
668                    config: VectorIndexConfig {
669                        space: Some(default_space()),
670                        embedding_function: None,
671                        source_key: Some(DOCUMENT_KEY.to_string()),
672                        hnsw: match default_knn_index {
673                            KnnIndex::Hnsw => Some(HnswIndexConfig {
674                                ef_construction: Some(default_construction_ef()),
675                                max_neighbors: Some(default_m()),
676                                ef_search: Some(default_search_ef()),
677                                num_threads: Some(default_num_threads()),
678                                batch_size: Some(default_batch_size()),
679                                sync_threshold: Some(default_sync_threshold()),
680                                resize_factor: Some(default_resize_factor()),
681                            }),
682                            KnnIndex::Spann => None,
683                        },
684                        spann: match default_knn_index {
685                            KnnIndex::Hnsw => None,
686                            KnnIndex::Spann => Some(SpannIndexConfig {
687                                search_nprobe: Some(default_search_nprobe()),
688                                search_rng_factor: Some(default_search_rng_factor()),
689                                search_rng_epsilon: Some(default_search_rng_epsilon()),
690                                nreplica_count: Some(default_nreplica_count()),
691                                write_rng_factor: Some(default_write_rng_factor()),
692                                write_rng_epsilon: Some(default_write_rng_epsilon()),
693                                split_threshold: Some(default_split_threshold()),
694                                num_samples_kmeans: Some(default_num_samples_kmeans()),
695                                initial_lambda: Some(default_initial_lambda()),
696                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
697                                merge_threshold: Some(default_merge_threshold()),
698                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
699                                write_nprobe: Some(default_write_nprobe()),
700                                ef_construction: Some(default_construction_ef_spann()),
701                                ef_search: Some(default_search_ef_spann()),
702                                max_neighbors: Some(default_m_spann()),
703                            }),
704                        },
705                    },
706                }),
707            }),
708            ..Default::default()
709        };
710        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
711
712        // Document defaults - initialize directly instead of Default::default() + field assignment
713        let document_defaults = ValueTypes {
714            string: Some(StringValueType {
715                fts_index: Some(FtsIndexType {
716                    enabled: true,
717                    config: FtsIndexConfig {},
718                }),
719                string_inverted_index: Some(StringInvertedIndexType {
720                    enabled: false,
721                    config: StringInvertedIndexConfig {},
722                }),
723            }),
724            ..Default::default()
725        };
726        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
727
728        Schema { defaults, keys }
729    }
730
731    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
732        let to_internal = |vector_index: &VectorIndexType| {
733            let space = vector_index.config.space.clone();
734            vector_index
735                .config
736                .spann
737                .clone()
738                .map(|config| (space.as_ref(), &config).into())
739        };
740
741        self.keys
742            .get(EMBEDDING_KEY)
743            .and_then(|value_types| value_types.float_list.as_ref())
744            .and_then(|float_list| float_list.vector_index.as_ref())
745            .and_then(to_internal)
746            .or_else(|| {
747                self.defaults
748                    .float_list
749                    .as_ref()
750                    .and_then(|float_list| float_list.vector_index.as_ref())
751                    .and_then(to_internal)
752            })
753    }
754
755    pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
756        let to_internal = |vector_index: &VectorIndexType| {
757            if vector_index.config.spann.is_some() {
758                return None;
759            }
760            let space = vector_index.config.space.as_ref();
761            let hnsw_config = vector_index.config.hnsw.as_ref();
762            Some((space, hnsw_config).into())
763        };
764
765        self.keys
766            .get(EMBEDDING_KEY)
767            .and_then(|value_types| value_types.float_list.as_ref())
768            .and_then(|float_list| float_list.vector_index.as_ref())
769            .and_then(to_internal)
770            .or_else(|| {
771                self.defaults
772                    .float_list
773                    .as_ref()
774                    .and_then(|float_list| float_list.vector_index.as_ref())
775                    .and_then(to_internal)
776            })
777    }
778
779    pub fn get_internal_hnsw_config_with_legacy_fallback(
780        &self,
781        segment: &Segment,
782    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
783        if let Some(config) = self.get_internal_hnsw_config() {
784            let config_from_metadata =
785                InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
786
787            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
788                return Ok(Some(config_from_metadata));
789            }
790
791            return Ok(Some(config));
792        }
793
794        Ok(None)
795    }
796
797    /// Reconcile user-provided schema with system defaults
798    ///
799    /// This method merges user configurations with system defaults, ensuring that:
800    /// - User overrides take precedence over defaults
801    /// - Missing user configurations fall back to system defaults
802    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
803    pub fn reconcile_with_defaults(
804        user_schema: Option<&Schema>,
805        knn_index: KnnIndex,
806    ) -> Result<Self, SchemaError> {
807        let default_schema = Schema::new_default(knn_index);
808
809        match user_schema {
810            Some(user) => {
811                // Merge defaults with user overrides
812                let merged_defaults =
813                    Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
814
815                // Merge key overrides
816                let mut merged_keys = default_schema.keys.clone();
817                for (key, user_value_types) in &user.keys {
818                    if let Some(default_value_types) = merged_keys.get(key) {
819                        // Merge with existing default key override
820                        let merged_value_types = Self::merge_value_types(
821                            default_value_types,
822                            user_value_types,
823                            knn_index,
824                        )?;
825                        merged_keys.insert(key.clone(), merged_value_types);
826                    } else {
827                        // New key override from user
828                        merged_keys.insert(key.clone(), user_value_types.clone());
829                    }
830                }
831
832                Ok(Schema {
833                    defaults: merged_defaults,
834                    keys: merged_keys,
835                })
836            }
837            None => Ok(default_schema),
838        }
839    }
840
841    /// Merge two schemas together, combining key overrides when possible.
842    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
843        if self.defaults != other.defaults {
844            return Err(SchemaError::DefaultsMismatch);
845        }
846
847        let mut keys = self.keys.clone();
848
849        for (key, other_value_types) in &other.keys {
850            if let Some(existing) = keys.get(key).cloned() {
851                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
852                keys.insert(key.clone(), merged);
853            } else {
854                keys.insert(key.clone(), other_value_types.clone());
855            }
856        }
857
858        Ok(Schema {
859            defaults: self.defaults.clone(),
860            keys,
861        })
862    }
863
864    fn merge_override_value_types(
865        key: &str,
866        left: &ValueTypes,
867        right: &ValueTypes,
868    ) -> Result<ValueTypes, SchemaError> {
869        Ok(ValueTypes {
870            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
871            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
872            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
873            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
874            float_list: Self::merge_float_list_override(
875                key,
876                left.float_list.as_ref(),
877                right.float_list.as_ref(),
878            )?,
879            sparse_vector: Self::merge_sparse_vector_override(
880                key,
881                left.sparse_vector.as_ref(),
882                right.sparse_vector.as_ref(),
883            )?,
884        })
885    }
886
887    fn merge_string_override(
888        key: &str,
889        left: Option<&StringValueType>,
890        right: Option<&StringValueType>,
891    ) -> Result<Option<StringValueType>, SchemaError> {
892        match (left, right) {
893            (Some(l), Some(r)) => Ok(Some(StringValueType {
894                string_inverted_index: Self::merge_index_or_error(
895                    l.string_inverted_index.as_ref(),
896                    r.string_inverted_index.as_ref(),
897                    &format!("key '{key}' string.string_inverted_index"),
898                )?,
899                fts_index: Self::merge_index_or_error(
900                    l.fts_index.as_ref(),
901                    r.fts_index.as_ref(),
902                    &format!("key '{key}' string.fts_index"),
903                )?,
904            })),
905            (Some(l), None) => Ok(Some(l.clone())),
906            (None, Some(r)) => Ok(Some(r.clone())),
907            (None, None) => Ok(None),
908        }
909    }
910
911    fn merge_float_override(
912        key: &str,
913        left: Option<&FloatValueType>,
914        right: Option<&FloatValueType>,
915    ) -> Result<Option<FloatValueType>, SchemaError> {
916        match (left, right) {
917            (Some(l), Some(r)) => Ok(Some(FloatValueType {
918                float_inverted_index: Self::merge_index_or_error(
919                    l.float_inverted_index.as_ref(),
920                    r.float_inverted_index.as_ref(),
921                    &format!("key '{key}' float.float_inverted_index"),
922                )?,
923            })),
924            (Some(l), None) => Ok(Some(l.clone())),
925            (None, Some(r)) => Ok(Some(r.clone())),
926            (None, None) => Ok(None),
927        }
928    }
929
930    fn merge_int_override(
931        key: &str,
932        left: Option<&IntValueType>,
933        right: Option<&IntValueType>,
934    ) -> Result<Option<IntValueType>, SchemaError> {
935        match (left, right) {
936            (Some(l), Some(r)) => Ok(Some(IntValueType {
937                int_inverted_index: Self::merge_index_or_error(
938                    l.int_inverted_index.as_ref(),
939                    r.int_inverted_index.as_ref(),
940                    &format!("key '{key}' int.int_inverted_index"),
941                )?,
942            })),
943            (Some(l), None) => Ok(Some(l.clone())),
944            (None, Some(r)) => Ok(Some(r.clone())),
945            (None, None) => Ok(None),
946        }
947    }
948
949    fn merge_bool_override(
950        key: &str,
951        left: Option<&BoolValueType>,
952        right: Option<&BoolValueType>,
953    ) -> Result<Option<BoolValueType>, SchemaError> {
954        match (left, right) {
955            (Some(l), Some(r)) => Ok(Some(BoolValueType {
956                bool_inverted_index: Self::merge_index_or_error(
957                    l.bool_inverted_index.as_ref(),
958                    r.bool_inverted_index.as_ref(),
959                    &format!("key '{key}' bool.bool_inverted_index"),
960                )?,
961            })),
962            (Some(l), None) => Ok(Some(l.clone())),
963            (None, Some(r)) => Ok(Some(r.clone())),
964            (None, None) => Ok(None),
965        }
966    }
967
968    fn merge_float_list_override(
969        key: &str,
970        left: Option<&FloatListValueType>,
971        right: Option<&FloatListValueType>,
972    ) -> Result<Option<FloatListValueType>, SchemaError> {
973        match (left, right) {
974            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
975                vector_index: Self::merge_index_or_error(
976                    l.vector_index.as_ref(),
977                    r.vector_index.as_ref(),
978                    &format!("key '{key}' float_list.vector_index"),
979                )?,
980            })),
981            (Some(l), None) => Ok(Some(l.clone())),
982            (None, Some(r)) => Ok(Some(r.clone())),
983            (None, None) => Ok(None),
984        }
985    }
986
987    fn merge_sparse_vector_override(
988        key: &str,
989        left: Option<&SparseVectorValueType>,
990        right: Option<&SparseVectorValueType>,
991    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
992        match (left, right) {
993            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
994                sparse_vector_index: Self::merge_index_or_error(
995                    l.sparse_vector_index.as_ref(),
996                    r.sparse_vector_index.as_ref(),
997                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
998                )?,
999            })),
1000            (Some(l), None) => Ok(Some(l.clone())),
1001            (None, Some(r)) => Ok(Some(r.clone())),
1002            (None, None) => Ok(None),
1003        }
1004    }
1005
1006    fn merge_index_or_error<T: Clone + PartialEq>(
1007        left: Option<&T>,
1008        right: Option<&T>,
1009        context: &str,
1010    ) -> Result<Option<T>, SchemaError> {
1011        match (left, right) {
1012            (Some(l), Some(r)) => {
1013                if l == r {
1014                    Ok(Some(l.clone()))
1015                } else {
1016                    Err(SchemaError::ConfigurationConflict {
1017                        context: context.to_string(),
1018                    })
1019                }
1020            }
1021            (Some(l), None) => Ok(Some(l.clone())),
1022            (None, Some(r)) => Ok(Some(r.clone())),
1023            (None, None) => Ok(None),
1024        }
1025    }
1026
1027    /// Merge two ValueTypes with field-level merging
1028    /// User values take precedence over default values
1029    fn merge_value_types(
1030        default: &ValueTypes,
1031        user: &ValueTypes,
1032        knn_index: KnnIndex,
1033    ) -> Result<ValueTypes, SchemaError> {
1034        // Merge float_list first
1035        let float_list = Self::merge_float_list_type(
1036            default.float_list.as_ref(),
1037            user.float_list.as_ref(),
1038            knn_index,
1039        );
1040
1041        // Validate the merged float_list (covers all merge cases)
1042        if let Some(ref fl) = float_list {
1043            Self::validate_float_list_value_type(fl)?;
1044        }
1045
1046        Ok(ValueTypes {
1047            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1048            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1049            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1050            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1051            float_list,
1052            sparse_vector: Self::merge_sparse_vector_type(
1053                default.sparse_vector.as_ref(),
1054                user.sparse_vector.as_ref(),
1055            )?,
1056        })
1057    }
1058
1059    /// Merge StringValueType configurations
1060    fn merge_string_type(
1061        default: Option<&StringValueType>,
1062        user: Option<&StringValueType>,
1063    ) -> Result<Option<StringValueType>, SchemaError> {
1064        match (default, user) {
1065            (Some(default), Some(user)) => Ok(Some(StringValueType {
1066                string_inverted_index: Self::merge_string_inverted_index_type(
1067                    default.string_inverted_index.as_ref(),
1068                    user.string_inverted_index.as_ref(),
1069                )?,
1070                fts_index: Self::merge_fts_index_type(
1071                    default.fts_index.as_ref(),
1072                    user.fts_index.as_ref(),
1073                )?,
1074            })),
1075            (Some(default), None) => Ok(Some(default.clone())),
1076            (None, Some(user)) => Ok(Some(user.clone())),
1077            (None, None) => Ok(None),
1078        }
1079    }
1080
1081    /// Merge FloatValueType configurations
1082    fn merge_float_type(
1083        default: Option<&FloatValueType>,
1084        user: Option<&FloatValueType>,
1085    ) -> Result<Option<FloatValueType>, SchemaError> {
1086        match (default, user) {
1087            (Some(default), Some(user)) => Ok(Some(FloatValueType {
1088                float_inverted_index: Self::merge_float_inverted_index_type(
1089                    default.float_inverted_index.as_ref(),
1090                    user.float_inverted_index.as_ref(),
1091                )?,
1092            })),
1093            (Some(default), None) => Ok(Some(default.clone())),
1094            (None, Some(user)) => Ok(Some(user.clone())),
1095            (None, None) => Ok(None),
1096        }
1097    }
1098
1099    /// Merge IntValueType configurations
1100    fn merge_int_type(
1101        default: Option<&IntValueType>,
1102        user: Option<&IntValueType>,
1103    ) -> Result<Option<IntValueType>, SchemaError> {
1104        match (default, user) {
1105            (Some(default), Some(user)) => Ok(Some(IntValueType {
1106                int_inverted_index: Self::merge_int_inverted_index_type(
1107                    default.int_inverted_index.as_ref(),
1108                    user.int_inverted_index.as_ref(),
1109                )?,
1110            })),
1111            (Some(default), None) => Ok(Some(default.clone())),
1112            (None, Some(user)) => Ok(Some(user.clone())),
1113            (None, None) => Ok(None),
1114        }
1115    }
1116
1117    /// Merge BoolValueType configurations
1118    fn merge_bool_type(
1119        default: Option<&BoolValueType>,
1120        user: Option<&BoolValueType>,
1121    ) -> Result<Option<BoolValueType>, SchemaError> {
1122        match (default, user) {
1123            (Some(default), Some(user)) => Ok(Some(BoolValueType {
1124                bool_inverted_index: Self::merge_bool_inverted_index_type(
1125                    default.bool_inverted_index.as_ref(),
1126                    user.bool_inverted_index.as_ref(),
1127                )?,
1128            })),
1129            (Some(default), None) => Ok(Some(default.clone())),
1130            (None, Some(user)) => Ok(Some(user.clone())),
1131            (None, None) => Ok(None),
1132        }
1133    }
1134
1135    /// Merge FloatListValueType configurations
1136    fn merge_float_list_type(
1137        default: Option<&FloatListValueType>,
1138        user: Option<&FloatListValueType>,
1139        knn_index: KnnIndex,
1140    ) -> Option<FloatListValueType> {
1141        match (default, user) {
1142            (Some(default), Some(user)) => Some(FloatListValueType {
1143                vector_index: Self::merge_vector_index_type(
1144                    default.vector_index.as_ref(),
1145                    user.vector_index.as_ref(),
1146                    knn_index,
1147                ),
1148            }),
1149            (Some(default), None) => Some(default.clone()),
1150            (None, Some(user)) => Some(user.clone()),
1151            (None, None) => None,
1152        }
1153    }
1154
1155    /// Merge SparseVectorValueType configurations
1156    fn merge_sparse_vector_type(
1157        default: Option<&SparseVectorValueType>,
1158        user: Option<&SparseVectorValueType>,
1159    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1160        match (default, user) {
1161            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1162                sparse_vector_index: Self::merge_sparse_vector_index_type(
1163                    default.sparse_vector_index.as_ref(),
1164                    user.sparse_vector_index.as_ref(),
1165                )?,
1166            })),
1167            (Some(default), None) => Ok(Some(default.clone())),
1168            (None, Some(user)) => Ok(Some(user.clone())),
1169            (None, None) => Ok(None),
1170        }
1171    }
1172
1173    /// Merge individual index type configurations
1174    fn merge_string_inverted_index_type(
1175        default: Option<&StringInvertedIndexType>,
1176        user: Option<&StringInvertedIndexType>,
1177    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1178        match (default, user) {
1179            (Some(_default), Some(user)) => {
1180                Ok(Some(StringInvertedIndexType {
1181                    enabled: user.enabled,       // User enabled state takes precedence
1182                    config: user.config.clone(), // User config takes precedence
1183                }))
1184            }
1185            (Some(default), None) => Ok(Some(default.clone())),
1186            (None, Some(user)) => Ok(Some(user.clone())),
1187            (None, None) => Ok(None),
1188        }
1189    }
1190
1191    fn merge_fts_index_type(
1192        default: Option<&FtsIndexType>,
1193        user: Option<&FtsIndexType>,
1194    ) -> Result<Option<FtsIndexType>, SchemaError> {
1195        match (default, user) {
1196            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1197                enabled: user.enabled,
1198                config: user.config.clone(),
1199            })),
1200            (Some(default), None) => Ok(Some(default.clone())),
1201            (None, Some(user)) => Ok(Some(user.clone())),
1202            (None, None) => Ok(None),
1203        }
1204    }
1205
1206    fn merge_float_inverted_index_type(
1207        default: Option<&FloatInvertedIndexType>,
1208        user: Option<&FloatInvertedIndexType>,
1209    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1210        match (default, user) {
1211            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1212                enabled: user.enabled,
1213                config: user.config.clone(),
1214            })),
1215            (Some(default), None) => Ok(Some(default.clone())),
1216            (None, Some(user)) => Ok(Some(user.clone())),
1217            (None, None) => Ok(None),
1218        }
1219    }
1220
1221    fn merge_int_inverted_index_type(
1222        default: Option<&IntInvertedIndexType>,
1223        user: Option<&IntInvertedIndexType>,
1224    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1225        match (default, user) {
1226            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1227                enabled: user.enabled,
1228                config: user.config.clone(),
1229            })),
1230            (Some(default), None) => Ok(Some(default.clone())),
1231            (None, Some(user)) => Ok(Some(user.clone())),
1232            (None, None) => Ok(None),
1233        }
1234    }
1235
1236    fn merge_bool_inverted_index_type(
1237        default: Option<&BoolInvertedIndexType>,
1238        user: Option<&BoolInvertedIndexType>,
1239    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1240        match (default, user) {
1241            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1242                enabled: user.enabled,
1243                config: user.config.clone(),
1244            })),
1245            (Some(default), None) => Ok(Some(default.clone())),
1246            (None, Some(user)) => Ok(Some(user.clone())),
1247            (None, None) => Ok(None),
1248        }
1249    }
1250
1251    fn merge_vector_index_type(
1252        default: Option<&VectorIndexType>,
1253        user: Option<&VectorIndexType>,
1254        knn_index: KnnIndex,
1255    ) -> Option<VectorIndexType> {
1256        match (default, user) {
1257            (Some(default), Some(user)) => Some(VectorIndexType {
1258                enabled: user.enabled,
1259                config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1260            }),
1261            (Some(default), None) => Some(default.clone()),
1262            (None, Some(user)) => Some(user.clone()),
1263            (None, None) => None,
1264        }
1265    }
1266
1267    fn merge_sparse_vector_index_type(
1268        default: Option<&SparseVectorIndexType>,
1269        user: Option<&SparseVectorIndexType>,
1270    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1271        match (default, user) {
1272            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1273                enabled: user.enabled,
1274                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1275            })),
1276            (Some(default), None) => Ok(Some(default.clone())),
1277            (None, Some(user)) => Ok(Some(user.clone())),
1278            (None, None) => Ok(None),
1279        }
1280    }
1281
1282    /// Validate FloatListValueType vector index configurations
1283    /// This validates HNSW and SPANN configs within the merged float_list
1284    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1285        if let Some(vector_index) = &float_list.vector_index {
1286            if let Some(hnsw) = &vector_index.config.hnsw {
1287                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1288            }
1289            if let Some(spann) = &vector_index.config.spann {
1290                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1291            }
1292        }
1293        Ok(())
1294    }
1295
1296    /// Merge VectorIndexConfig with field-level merging
1297    fn merge_vector_index_config(
1298        default: &VectorIndexConfig,
1299        user: &VectorIndexConfig,
1300        knn_index: KnnIndex,
1301    ) -> VectorIndexConfig {
1302        match knn_index {
1303            KnnIndex::Hnsw => VectorIndexConfig {
1304                space: user.space.clone().or(default.space.clone()),
1305                embedding_function: user
1306                    .embedding_function
1307                    .clone()
1308                    .or(default.embedding_function.clone()),
1309                source_key: user.source_key.clone().or(default.source_key.clone()),
1310                hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1311                spann: None,
1312            },
1313            KnnIndex::Spann => VectorIndexConfig {
1314                space: user.space.clone().or(default.space.clone()),
1315                embedding_function: user
1316                    .embedding_function
1317                    .clone()
1318                    .or(default.embedding_function.clone()),
1319                source_key: user.source_key.clone().or(default.source_key.clone()),
1320                hnsw: None,
1321                spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1322            },
1323        }
1324    }
1325
1326    /// Merge SparseVectorIndexConfig with field-level merging
1327    fn merge_sparse_vector_index_config(
1328        default: &SparseVectorIndexConfig,
1329        user: &SparseVectorIndexConfig,
1330    ) -> SparseVectorIndexConfig {
1331        SparseVectorIndexConfig {
1332            embedding_function: user
1333                .embedding_function
1334                .clone()
1335                .or(default.embedding_function.clone()),
1336            source_key: user.source_key.clone().or(default.source_key.clone()),
1337            bm25: user.bm25.or(default.bm25),
1338        }
1339    }
1340
1341    /// Merge HNSW configurations with field-level merging
1342    fn merge_hnsw_configs(
1343        default_hnsw: Option<&HnswIndexConfig>,
1344        user_hnsw: Option<&HnswIndexConfig>,
1345    ) -> Option<HnswIndexConfig> {
1346        match (default_hnsw, user_hnsw) {
1347            (Some(default), Some(user)) => Some(HnswIndexConfig {
1348                ef_construction: user.ef_construction.or(default.ef_construction),
1349                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1350                ef_search: user.ef_search.or(default.ef_search),
1351                num_threads: user.num_threads.or(default.num_threads),
1352                batch_size: user.batch_size.or(default.batch_size),
1353                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1354                resize_factor: user.resize_factor.or(default.resize_factor),
1355            }),
1356            (Some(default), None) => Some(default.clone()),
1357            (None, Some(user)) => Some(user.clone()),
1358            (None, None) => None,
1359        }
1360    }
1361
1362    /// Merge SPANN configurations with field-level merging
1363    fn merge_spann_configs(
1364        default_spann: Option<&SpannIndexConfig>,
1365        user_spann: Option<&SpannIndexConfig>,
1366    ) -> Option<SpannIndexConfig> {
1367        match (default_spann, user_spann) {
1368            (Some(default), Some(user)) => Some(SpannIndexConfig {
1369                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1370                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1371                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1372                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1373                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1374                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1375                split_threshold: user.split_threshold.or(default.split_threshold),
1376                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1377                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1378                reassign_neighbor_count: user
1379                    .reassign_neighbor_count
1380                    .or(default.reassign_neighbor_count),
1381                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1382                num_centers_to_merge_to: user
1383                    .num_centers_to_merge_to
1384                    .or(default.num_centers_to_merge_to),
1385                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1386                ef_construction: user.ef_construction.or(default.ef_construction),
1387                ef_search: user.ef_search.or(default.ef_search),
1388                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1389            }),
1390            (Some(default), None) => Some(default.clone()),
1391            (None, Some(user)) => Some(user.clone()),
1392            (None, None) => None,
1393        }
1394    }
1395
1396    /// Reconcile Schema with InternalCollectionConfiguration
1397    ///
1398    /// Simple reconciliation logic:
1399    /// 1. If collection config is default → return schema (schema is source of truth)
1400    /// 2. If collection config is non-default and schema is default → override schema with collection config
1401    ///
1402    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1403    pub fn reconcile_with_collection_config(
1404        schema: &Schema,
1405        collection_config: &InternalCollectionConfiguration,
1406    ) -> Result<Schema, SchemaError> {
1407        // 1. Check if collection config is default
1408        if collection_config.is_default() {
1409            if schema.is_default() {
1410                // if both are default, use collection config to create schema
1411                // this handles the case where user did not provide schema or config.
1412                // since default schema doesnt have an ef, we need to use the coll config to create
1413                // a schema with the ef.
1414                let new_schema = Self::convert_collection_config_to_schema(collection_config)?;
1415                return Ok(new_schema);
1416            } else {
1417                // Collection config is default and schema is non-default → schema is source of truth
1418                return Ok(schema.clone());
1419            }
1420        }
1421
1422        // 2. Collection config is non-default, schema must be default (already validated earlier)
1423        // Convert collection config to schema
1424        Self::convert_collection_config_to_schema(collection_config)
1425    }
1426
1427    pub fn reconcile_schema_and_config(
1428        schema: Option<&Schema>,
1429        configuration: Option<&InternalCollectionConfiguration>,
1430        knn_index: KnnIndex,
1431    ) -> Result<Schema, SchemaError> {
1432        // Early validation: check if both user-provided schema and config are non-default
1433        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1434            if !user_schema.is_default() && !config.is_default() {
1435                return Err(SchemaError::ConfigAndSchemaConflict);
1436            }
1437        }
1438
1439        let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1440        if let Some(config) = configuration {
1441            Self::reconcile_with_collection_config(&reconciled_schema, config)
1442        } else {
1443            Ok(reconciled_schema)
1444        }
1445    }
1446
1447    pub fn default_with_embedding_function(
1448        embedding_function: EmbeddingFunctionConfiguration,
1449    ) -> Schema {
1450        let mut schema = Schema::new_default(KnnIndex::Spann);
1451        if let Some(float_list) = &mut schema.defaults.float_list {
1452            if let Some(vector_index) = &mut float_list.vector_index {
1453                vector_index.config.embedding_function = Some(embedding_function.clone());
1454            }
1455        }
1456        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1457            if let Some(float_list) = &mut embedding_types.float_list {
1458                if let Some(vector_index) = &mut float_list.vector_index {
1459                    vector_index.config.embedding_function = Some(embedding_function);
1460                }
1461            }
1462        }
1463        schema
1464    }
1465
1466    /// Check if schema is default by checking each field individually
1467    pub fn is_default(&self) -> bool {
1468        // Check if defaults are default (field by field)
1469        if !Self::is_value_types_default(&self.defaults) {
1470            return false;
1471        }
1472
1473        for key in self.keys.keys() {
1474            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1475                return false;
1476            }
1477        }
1478
1479        // Check #embedding key
1480        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1481            if !Self::is_embedding_value_types_default(embedding_value) {
1482                return false;
1483            }
1484        }
1485
1486        // Check #document key
1487        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1488            if !Self::is_document_value_types_default(document_value) {
1489                return false;
1490            }
1491        }
1492
1493        true
1494    }
1495
1496    /// Check if ValueTypes (defaults) are in default state
1497    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1498        // Check string field
1499        if let Some(string) = &value_types.string {
1500            if let Some(string_inverted) = &string.string_inverted_index {
1501                if !string_inverted.enabled {
1502                    return false;
1503                }
1504                // Config is an empty struct, so no need to check it
1505            }
1506            if let Some(fts) = &string.fts_index {
1507                if fts.enabled {
1508                    return false;
1509                }
1510                // Config is an empty struct, so no need to check it
1511            }
1512        }
1513
1514        // Check float field
1515        if let Some(float) = &value_types.float {
1516            if let Some(float_inverted) = &float.float_inverted_index {
1517                if !float_inverted.enabled {
1518                    return false;
1519                }
1520                // Config is an empty struct, so no need to check it
1521            }
1522        }
1523
1524        // Check int field
1525        if let Some(int) = &value_types.int {
1526            if let Some(int_inverted) = &int.int_inverted_index {
1527                if !int_inverted.enabled {
1528                    return false;
1529                }
1530                // Config is an empty struct, so no need to check it
1531            }
1532        }
1533
1534        // Check boolean field
1535        if let Some(boolean) = &value_types.boolean {
1536            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1537                if !bool_inverted.enabled {
1538                    return false;
1539                }
1540                // Config is an empty struct, so no need to check it
1541            }
1542        }
1543
1544        // Check float_list field (vector index should be disabled)
1545        if let Some(float_list) = &value_types.float_list {
1546            if let Some(vector_index) = &float_list.vector_index {
1547                if vector_index.enabled {
1548                    return false;
1549                }
1550                // Check that the config has default structure
1551                // We allow space and embedding_function to vary, but check structure
1552                if vector_index.config.source_key.is_some() {
1553                    return false;
1554                }
1555                // Check that either hnsw or spann config is present (not both, not neither)
1556                // and that the config values are default
1557                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1558                    (Some(hnsw_config), None) => {
1559                        if !hnsw_config.is_default() {
1560                            return false;
1561                        }
1562                    }
1563                    (None, Some(spann_config)) => {
1564                        if !spann_config.is_default() {
1565                            return false;
1566                        }
1567                    }
1568                    (Some(_), Some(_)) => return false, // Both present
1569                    (None, None) => {}
1570                }
1571            }
1572        }
1573
1574        // Check sparse_vector field (should be disabled)
1575        if let Some(sparse_vector) = &value_types.sparse_vector {
1576            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1577                if sparse_index.enabled {
1578                    return false;
1579                }
1580                // Check config structure
1581                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1582                    return false;
1583                }
1584                if sparse_index.config.source_key.is_some() {
1585                    return false;
1586                }
1587                if let Some(bm25) = &sparse_index.config.bm25 {
1588                    if bm25 != &false {
1589                        return false;
1590                    }
1591                }
1592            }
1593        }
1594
1595        true
1596    }
1597
1598    /// Check if ValueTypes for #embedding key are in default state
1599    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1600        // For #embedding, only float_list should be set
1601        if value_types.string.is_some()
1602            || value_types.float.is_some()
1603            || value_types.int.is_some()
1604            || value_types.boolean.is_some()
1605            || value_types.sparse_vector.is_some()
1606        {
1607            return false;
1608        }
1609
1610        // Check float_list field (vector index should be enabled)
1611        if let Some(float_list) = &value_types.float_list {
1612            if let Some(vector_index) = &float_list.vector_index {
1613                if !vector_index.enabled {
1614                    return false;
1615                }
1616                // Check that embedding_function is default
1617                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1618                    return false;
1619                }
1620                // Check that source_key is #document
1621                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1622                    return false;
1623                }
1624                // Check that either hnsw or spann config is present (not both, not neither)
1625                // and that the config values are default
1626                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1627                    (Some(hnsw_config), None) => {
1628                        if !hnsw_config.is_default() {
1629                            return false;
1630                        }
1631                    }
1632                    (None, Some(spann_config)) => {
1633                        if !spann_config.is_default() {
1634                            return false;
1635                        }
1636                    }
1637                    (Some(_), Some(_)) => return false, // Both present
1638                    (None, None) => {}
1639                }
1640            }
1641        }
1642
1643        true
1644    }
1645
1646    /// Check if ValueTypes for #document key are in default state
1647    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1648        // For #document, only string should be set
1649        if value_types.float_list.is_some()
1650            || value_types.float.is_some()
1651            || value_types.int.is_some()
1652            || value_types.boolean.is_some()
1653            || value_types.sparse_vector.is_some()
1654        {
1655            return false;
1656        }
1657
1658        // Check string field
1659        if let Some(string) = &value_types.string {
1660            if let Some(fts) = &string.fts_index {
1661                if !fts.enabled {
1662                    return false;
1663                }
1664                // Config is an empty struct, so no need to check it
1665            }
1666            if let Some(string_inverted) = &string.string_inverted_index {
1667                if string_inverted.enabled {
1668                    return false;
1669                }
1670                // Config is an empty struct, so no need to check it
1671            }
1672        }
1673
1674        true
1675    }
1676
1677    /// Convert InternalCollectionConfiguration to Schema
1678    fn convert_collection_config_to_schema(
1679        collection_config: &InternalCollectionConfiguration,
1680    ) -> Result<Schema, SchemaError> {
1681        // Start with a default schema structure
1682        let mut schema = Schema::new_default(KnnIndex::Spann); // Default to HNSW, will be overridden
1683
1684        // Convert vector index configuration
1685        let vector_config = match &collection_config.vector_index {
1686            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1687                space: Some(hnsw_config.space.clone()),
1688                embedding_function: collection_config.embedding_function.clone(),
1689                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1690                hnsw: Some(HnswIndexConfig {
1691                    ef_construction: Some(hnsw_config.ef_construction),
1692                    max_neighbors: Some(hnsw_config.max_neighbors),
1693                    ef_search: Some(hnsw_config.ef_search),
1694                    num_threads: Some(hnsw_config.num_threads),
1695                    batch_size: Some(hnsw_config.batch_size),
1696                    sync_threshold: Some(hnsw_config.sync_threshold),
1697                    resize_factor: Some(hnsw_config.resize_factor),
1698                }),
1699                spann: None,
1700            },
1701            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1702                space: Some(spann_config.space.clone()),
1703                embedding_function: collection_config.embedding_function.clone(),
1704                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1705                hnsw: None,
1706                spann: Some(SpannIndexConfig {
1707                    search_nprobe: Some(spann_config.search_nprobe),
1708                    search_rng_factor: Some(spann_config.search_rng_factor),
1709                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1710                    nreplica_count: Some(spann_config.nreplica_count),
1711                    write_rng_factor: Some(spann_config.write_rng_factor),
1712                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1713                    split_threshold: Some(spann_config.split_threshold),
1714                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1715                    initial_lambda: Some(spann_config.initial_lambda),
1716                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1717                    merge_threshold: Some(spann_config.merge_threshold),
1718                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1719                    write_nprobe: Some(spann_config.write_nprobe),
1720                    ef_construction: Some(spann_config.ef_construction),
1721                    ef_search: Some(spann_config.ef_search),
1722                    max_neighbors: Some(spann_config.max_neighbors),
1723                }),
1724            },
1725        };
1726
1727        // Update defaults (keep enabled=false, just update the config)
1728        // This serves as the template for any new float_list fields
1729        if let Some(float_list) = &mut schema.defaults.float_list {
1730            if let Some(vector_index) = &mut float_list.vector_index {
1731                vector_index.config = vector_config.clone();
1732            }
1733        }
1734
1735        // Update the vector_index in the existing #embedding key override
1736        // Keep enabled=true (already set by new_default) and update the config
1737        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1738            if let Some(float_list) = &mut embedding_types.float_list {
1739                if let Some(vector_index) = &mut float_list.vector_index {
1740                    vector_index.config = vector_config;
1741                }
1742            }
1743        }
1744
1745        Ok(schema)
1746    }
1747
1748    /// Check if a specific metadata key-value should be indexed based on schema configuration
1749    pub fn is_metadata_type_index_enabled(
1750        &self,
1751        key: &str,
1752        value_type: MetadataValueType,
1753    ) -> Result<bool, SchemaError> {
1754        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1755
1756        match value_type {
1757            MetadataValueType::Bool => match &v_type.boolean {
1758                Some(bool_type) => match &bool_type.bool_inverted_index {
1759                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1760                    None => Err(SchemaError::MissingIndexConfiguration {
1761                        key: key.to_string(),
1762                        value_type: "bool".to_string(),
1763                    }),
1764                },
1765                None => match &self.defaults.boolean {
1766                    Some(bool_type) => match &bool_type.bool_inverted_index {
1767                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1768                        None => Err(SchemaError::MissingIndexConfiguration {
1769                            key: key.to_string(),
1770                            value_type: "bool".to_string(),
1771                        }),
1772                    },
1773                    None => Err(SchemaError::MissingIndexConfiguration {
1774                        key: key.to_string(),
1775                        value_type: "bool".to_string(),
1776                    }),
1777                },
1778            },
1779            MetadataValueType::Int => match &v_type.int {
1780                Some(int_type) => match &int_type.int_inverted_index {
1781                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1782                    None => Err(SchemaError::MissingIndexConfiguration {
1783                        key: key.to_string(),
1784                        value_type: "int".to_string(),
1785                    }),
1786                },
1787                None => match &self.defaults.int {
1788                    Some(int_type) => match &int_type.int_inverted_index {
1789                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1790                        None => Err(SchemaError::MissingIndexConfiguration {
1791                            key: key.to_string(),
1792                            value_type: "int".to_string(),
1793                        }),
1794                    },
1795                    None => Err(SchemaError::MissingIndexConfiguration {
1796                        key: key.to_string(),
1797                        value_type: "int".to_string(),
1798                    }),
1799                },
1800            },
1801            MetadataValueType::Float => match &v_type.float {
1802                Some(float_type) => match &float_type.float_inverted_index {
1803                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1804                    None => Err(SchemaError::MissingIndexConfiguration {
1805                        key: key.to_string(),
1806                        value_type: "float".to_string(),
1807                    }),
1808                },
1809                None => match &self.defaults.float {
1810                    Some(float_type) => match &float_type.float_inverted_index {
1811                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1812                        None => Err(SchemaError::MissingIndexConfiguration {
1813                            key: key.to_string(),
1814                            value_type: "float".to_string(),
1815                        }),
1816                    },
1817                    None => Err(SchemaError::MissingIndexConfiguration {
1818                        key: key.to_string(),
1819                        value_type: "float".to_string(),
1820                    }),
1821                },
1822            },
1823            MetadataValueType::Str => match &v_type.string {
1824                Some(string_type) => match &string_type.string_inverted_index {
1825                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1826                    None => Err(SchemaError::MissingIndexConfiguration {
1827                        key: key.to_string(),
1828                        value_type: "string".to_string(),
1829                    }),
1830                },
1831                None => match &self.defaults.string {
1832                    Some(string_type) => match &string_type.string_inverted_index {
1833                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1834                        None => Err(SchemaError::MissingIndexConfiguration {
1835                            key: key.to_string(),
1836                            value_type: "string".to_string(),
1837                        }),
1838                    },
1839                    None => Err(SchemaError::MissingIndexConfiguration {
1840                        key: key.to_string(),
1841                        value_type: "string".to_string(),
1842                    }),
1843                },
1844            },
1845            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1846                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1847                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1848                    None => Err(SchemaError::MissingIndexConfiguration {
1849                        key: key.to_string(),
1850                        value_type: "sparse_vector".to_string(),
1851                    }),
1852                },
1853                None => match &self.defaults.sparse_vector {
1854                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1855                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1856                        None => Err(SchemaError::MissingIndexConfiguration {
1857                            key: key.to_string(),
1858                            value_type: "sparse_vector".to_string(),
1859                        }),
1860                    },
1861                    None => Err(SchemaError::MissingIndexConfiguration {
1862                        key: key.to_string(),
1863                        value_type: "sparse_vector".to_string(),
1864                    }),
1865                },
1866            },
1867        }
1868    }
1869
1870    pub fn is_metadata_where_indexing_enabled(
1871        &self,
1872        where_clause: &Where,
1873    ) -> Result<(), FilterValidationError> {
1874        match where_clause {
1875            Where::Composite(composite) => {
1876                for child in &composite.children {
1877                    self.is_metadata_where_indexing_enabled(child)?;
1878                }
1879                Ok(())
1880            }
1881            Where::Document(_) => Ok(()),
1882            Where::Metadata(expression) => {
1883                let value_type = match &expression.comparison {
1884                    MetadataComparison::Primitive(_, value) => value.value_type(),
1885                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1886                };
1887                let is_enabled = self
1888                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1889                    .map_err(FilterValidationError::Schema)?;
1890                if !is_enabled {
1891                    return Err(FilterValidationError::IndexingDisabled {
1892                        key: expression.key.clone(),
1893                        value_type,
1894                    });
1895                }
1896                Ok(())
1897            }
1898        }
1899    }
1900
1901    pub fn is_knn_key_indexing_enabled(
1902        &self,
1903        key: &str,
1904        query: &QueryVector,
1905    ) -> Result<(), FilterValidationError> {
1906        match query {
1907            QueryVector::Sparse(_) => {
1908                let is_enabled = self
1909                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1910                    .map_err(FilterValidationError::Schema)?;
1911                if !is_enabled {
1912                    return Err(FilterValidationError::IndexingDisabled {
1913                        key: key.to_string(),
1914                        value_type: MetadataValueType::SparseVector,
1915                    });
1916                }
1917                Ok(())
1918            }
1919            QueryVector::Dense(_) => {
1920                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1921                // Dense vectors are always indexed
1922                Ok(())
1923            }
1924        }
1925    }
1926
1927    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1928        if key.starts_with(CHROMA_KEY) {
1929            return false;
1930        }
1931        let value_types = self.keys.entry(key.to_string()).or_default();
1932        match value_type {
1933            MetadataValueType::Bool => {
1934                if value_types.boolean.is_none() {
1935                    value_types.boolean = self.defaults.boolean.clone();
1936                    return true;
1937                }
1938            }
1939            MetadataValueType::Int => {
1940                if value_types.int.is_none() {
1941                    value_types.int = self.defaults.int.clone();
1942                    return true;
1943                }
1944            }
1945            MetadataValueType::Float => {
1946                if value_types.float.is_none() {
1947                    value_types.float = self.defaults.float.clone();
1948                    return true;
1949                }
1950            }
1951            MetadataValueType::Str => {
1952                if value_types.string.is_none() {
1953                    value_types.string = self.defaults.string.clone();
1954                    return true;
1955                }
1956            }
1957            MetadataValueType::SparseVector => {
1958                if value_types.sparse_vector.is_none() {
1959                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
1960                    return true;
1961                }
1962            }
1963        }
1964        false
1965    }
1966
1967    // ========================================================================
1968    // BUILDER PATTERN METHODS
1969    // ========================================================================
1970
1971    /// Create an index configuration (builder pattern)
1972    ///
1973    /// This method allows fluent, chainable configuration of indexes on a schema.
1974    /// It matches the Python API's `.create_index()` method.
1975    ///
1976    /// # Arguments
1977    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
1978    /// * `config` - Index configuration to create
1979    ///
1980    /// # Returns
1981    /// `Self` for method chaining
1982    ///
1983    /// # Errors
1984    /// Returns error if:
1985    /// - Attempting to create index on special keys (`#document`, `#embedding`)
1986    /// - Invalid configuration (e.g., vector index on non-embedding key)
1987    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
1988    ///
1989    /// # Examples
1990    /// ```
1991    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space};
1992    ///
1993    /// # fn main() -> Result<(), SchemaBuilderError> {
1994    /// let schema = Schema::default()
1995    ///     .create_index(None, VectorIndexConfig {
1996    ///         space: Some(Space::Cosine),
1997    ///         embedding_function: None,
1998    ///         source_key: None,
1999    ///         hnsw: None,
2000    ///         spann: None,
2001    ///     }.into())?
2002    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2003    /// # Ok(())
2004    /// # }
2005    /// ```
2006    pub fn create_index(
2007        mut self,
2008        key: Option<&str>,
2009        config: IndexConfig,
2010    ) -> Result<Self, SchemaBuilderError> {
2011        // Handle special cases: Vector and FTS (global configs only)
2012        match (&key, &config) {
2013            (None, IndexConfig::Vector(cfg)) => {
2014                self._set_vector_index_config_builder(cfg.clone());
2015                return Ok(self);
2016            }
2017            (None, IndexConfig::Fts(cfg)) => {
2018                self._set_fts_index_config_builder(cfg.clone());
2019                return Ok(self);
2020            }
2021            (Some(k), IndexConfig::Vector(_)) => {
2022                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2023            }
2024            (Some(k), IndexConfig::Fts(_)) => {
2025                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2026            }
2027            _ => {}
2028        }
2029
2030        // Validate special keys
2031        if let Some(k) = key {
2032            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2033                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2034                    key: k.to_string(),
2035                });
2036            }
2037        }
2038
2039        // Validate sparse vector requires key
2040        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2041            return Err(SchemaBuilderError::SparseVectorRequiresKey);
2042        }
2043
2044        // Dispatch to appropriate helper
2045        match key {
2046            Some(k) => self._set_index_for_key_builder(k, config, true)?,
2047            None => self._set_index_in_defaults_builder(config, true)?,
2048        }
2049
2050        Ok(self)
2051    }
2052
2053    /// Delete/disable an index configuration (builder pattern)
2054    ///
2055    /// This method allows disabling indexes on a schema.
2056    /// It matches the Python API's `.delete_index()` method.
2057    ///
2058    /// # Arguments
2059    /// * `key` - Optional key name for per-key index. `None` applies to defaults
2060    /// * `config` - Index configuration to disable
2061    ///
2062    /// # Returns
2063    /// `Self` for method chaining
2064    ///
2065    /// # Errors
2066    /// Returns error if:
2067    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
2068    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
2069    ///
2070    /// # Examples
2071    /// ```
2072    /// use chroma_types::{Schema, StringInvertedIndexConfig};
2073    ///
2074    /// # fn main() -> Result<(), SchemaBuilderError> {
2075    /// let schema = Schema::default()
2076    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2077    /// # Ok(())
2078    /// # }
2079    /// ```
2080    pub fn delete_index(
2081        mut self,
2082        key: Option<&str>,
2083        config: IndexConfig,
2084    ) -> Result<Self, SchemaBuilderError> {
2085        // Validate special keys
2086        if let Some(k) = key {
2087            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2088                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2089                    key: k.to_string(),
2090                });
2091            }
2092        }
2093
2094        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
2095        match &config {
2096            IndexConfig::Vector(_) => {
2097                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2098            }
2099            IndexConfig::Fts(_) => {
2100                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2101            }
2102            IndexConfig::SparseVector(_) => {
2103                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2104            }
2105            _ => {}
2106        }
2107
2108        // Dispatch to appropriate helper (enabled=false)
2109        match key {
2110            Some(k) => self._set_index_for_key_builder(k, config, false)?,
2111            None => self._set_index_in_defaults_builder(config, false)?,
2112        }
2113
2114        Ok(self)
2115    }
2116
2117    /// Set vector index config globally (applies to #embedding)
2118    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2119        // Update defaults (disabled, just config update)
2120        if let Some(float_list) = &mut self.defaults.float_list {
2121            if let Some(vector_index) = &mut float_list.vector_index {
2122                vector_index.config = config.clone();
2123            }
2124        }
2125
2126        // Update #embedding key (enabled, config update, preserve source_key=#document)
2127        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2128            if let Some(float_list) = &mut embedding_types.float_list {
2129                if let Some(vector_index) = &mut float_list.vector_index {
2130                    let mut updated_config = config;
2131                    // Preserve source_key as #document
2132                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2133                    vector_index.config = updated_config;
2134                }
2135            }
2136        }
2137    }
2138
2139    /// Set FTS index config globally (applies to #document)
2140    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2141        // Update defaults (disabled, just config update)
2142        if let Some(string) = &mut self.defaults.string {
2143            if let Some(fts_index) = &mut string.fts_index {
2144                fts_index.config = config.clone();
2145            }
2146        }
2147
2148        // Update #document key (enabled, config update)
2149        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2150            if let Some(string) = &mut document_types.string {
2151                if let Some(fts_index) = &mut string.fts_index {
2152                    fts_index.config = config;
2153                }
2154            }
2155        }
2156    }
2157
2158    /// Set index configuration for a specific key
2159    fn _set_index_for_key_builder(
2160        &mut self,
2161        key: &str,
2162        config: IndexConfig,
2163        enabled: bool,
2164    ) -> Result<(), SchemaBuilderError> {
2165        // Check for multiple sparse vector indexes BEFORE getting mutable reference
2166        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2167            // Find existing sparse vector index
2168            let existing_key = self
2169                .keys
2170                .iter()
2171                .find(|(k, v)| {
2172                    k.as_str() != key
2173                        && v.sparse_vector
2174                            .as_ref()
2175                            .and_then(|sv| sv.sparse_vector_index.as_ref())
2176                            .map(|idx| idx.enabled)
2177                            .unwrap_or(false)
2178                })
2179                .map(|(k, _)| k.clone());
2180
2181            if let Some(existing_key) = existing_key {
2182                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2183            }
2184        }
2185
2186        // Get or create ValueTypes for this key
2187        let value_types = self.keys.entry(key.to_string()).or_default();
2188
2189        // Set the appropriate index based on config type
2190        match config {
2191            IndexConfig::Vector(_) => {
2192                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2193                    key: key.to_string(),
2194                });
2195            }
2196            IndexConfig::Fts(_) => {
2197                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2198                    key: key.to_string(),
2199                });
2200            }
2201            IndexConfig::SparseVector(cfg) => {
2202                value_types.sparse_vector = Some(SparseVectorValueType {
2203                    sparse_vector_index: Some(SparseVectorIndexType {
2204                        enabled,
2205                        config: cfg,
2206                    }),
2207                });
2208            }
2209            IndexConfig::StringInverted(cfg) => {
2210                if value_types.string.is_none() {
2211                    value_types.string = Some(StringValueType {
2212                        fts_index: None,
2213                        string_inverted_index: None,
2214                    });
2215                }
2216                if let Some(string) = &mut value_types.string {
2217                    string.string_inverted_index = Some(StringInvertedIndexType {
2218                        enabled,
2219                        config: cfg,
2220                    });
2221                }
2222            }
2223            IndexConfig::IntInverted(cfg) => {
2224                value_types.int = Some(IntValueType {
2225                    int_inverted_index: Some(IntInvertedIndexType {
2226                        enabled,
2227                        config: cfg,
2228                    }),
2229                });
2230            }
2231            IndexConfig::FloatInverted(cfg) => {
2232                value_types.float = Some(FloatValueType {
2233                    float_inverted_index: Some(FloatInvertedIndexType {
2234                        enabled,
2235                        config: cfg,
2236                    }),
2237                });
2238            }
2239            IndexConfig::BoolInverted(cfg) => {
2240                value_types.boolean = Some(BoolValueType {
2241                    bool_inverted_index: Some(BoolInvertedIndexType {
2242                        enabled,
2243                        config: cfg,
2244                    }),
2245                });
2246            }
2247        }
2248
2249        Ok(())
2250    }
2251
2252    /// Set index configuration in defaults
2253    fn _set_index_in_defaults_builder(
2254        &mut self,
2255        config: IndexConfig,
2256        enabled: bool,
2257    ) -> Result<(), SchemaBuilderError> {
2258        match config {
2259            IndexConfig::Vector(_) => {
2260                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2261                    key: "defaults".to_string(),
2262                });
2263            }
2264            IndexConfig::Fts(_) => {
2265                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2266                    key: "defaults".to_string(),
2267                });
2268            }
2269            IndexConfig::SparseVector(cfg) => {
2270                self.defaults.sparse_vector = Some(SparseVectorValueType {
2271                    sparse_vector_index: Some(SparseVectorIndexType {
2272                        enabled,
2273                        config: cfg,
2274                    }),
2275                });
2276            }
2277            IndexConfig::StringInverted(cfg) => {
2278                if self.defaults.string.is_none() {
2279                    self.defaults.string = Some(StringValueType {
2280                        fts_index: None,
2281                        string_inverted_index: None,
2282                    });
2283                }
2284                if let Some(string) = &mut self.defaults.string {
2285                    string.string_inverted_index = Some(StringInvertedIndexType {
2286                        enabled,
2287                        config: cfg,
2288                    });
2289                }
2290            }
2291            IndexConfig::IntInverted(cfg) => {
2292                self.defaults.int = Some(IntValueType {
2293                    int_inverted_index: Some(IntInvertedIndexType {
2294                        enabled,
2295                        config: cfg,
2296                    }),
2297                });
2298            }
2299            IndexConfig::FloatInverted(cfg) => {
2300                self.defaults.float = Some(FloatValueType {
2301                    float_inverted_index: Some(FloatInvertedIndexType {
2302                        enabled,
2303                        config: cfg,
2304                    }),
2305                });
2306            }
2307            IndexConfig::BoolInverted(cfg) => {
2308                self.defaults.boolean = Some(BoolValueType {
2309                    bool_inverted_index: Some(BoolInvertedIndexType {
2310                        enabled,
2311                        config: cfg,
2312                    }),
2313                });
2314            }
2315        }
2316
2317        Ok(())
2318    }
2319}
2320
2321// ============================================================================
2322// INDEX CONFIGURATION STRUCTURES
2323// ============================================================================
2324
2325#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2326#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2327#[serde(deny_unknown_fields)]
2328pub struct VectorIndexConfig {
2329    /// Vector space for similarity calculation (cosine, l2, ip)
2330    #[serde(skip_serializing_if = "Option::is_none")]
2331    pub space: Option<Space>,
2332    /// Embedding function configuration
2333    #[serde(skip_serializing_if = "Option::is_none")]
2334    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2335    /// Key to source the vector from
2336    #[serde(skip_serializing_if = "Option::is_none")]
2337    pub source_key: Option<String>,
2338    /// HNSW algorithm configuration
2339    #[serde(skip_serializing_if = "Option::is_none")]
2340    pub hnsw: Option<HnswIndexConfig>,
2341    /// SPANN algorithm configuration
2342    #[serde(skip_serializing_if = "Option::is_none")]
2343    pub spann: Option<SpannIndexConfig>,
2344}
2345
2346/// Configuration for HNSW vector index algorithm parameters
2347#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2348#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2349#[serde(deny_unknown_fields)]
2350pub struct HnswIndexConfig {
2351    #[serde(skip_serializing_if = "Option::is_none")]
2352    pub ef_construction: Option<usize>,
2353    #[serde(skip_serializing_if = "Option::is_none")]
2354    pub max_neighbors: Option<usize>,
2355    #[serde(skip_serializing_if = "Option::is_none")]
2356    pub ef_search: Option<usize>,
2357    #[serde(skip_serializing_if = "Option::is_none")]
2358    pub num_threads: Option<usize>,
2359    #[serde(skip_serializing_if = "Option::is_none")]
2360    #[validate(range(min = 2))]
2361    pub batch_size: Option<usize>,
2362    #[serde(skip_serializing_if = "Option::is_none")]
2363    #[validate(range(min = 2))]
2364    pub sync_threshold: Option<usize>,
2365    #[serde(skip_serializing_if = "Option::is_none")]
2366    pub resize_factor: Option<f64>,
2367}
2368
2369impl HnswIndexConfig {
2370    /// Check if this config has default values
2371    /// None values are considered default (not set by user)
2372    /// Note: We skip num_threads as it's variable based on available_parallelism
2373    pub fn is_default(&self) -> bool {
2374        if let Some(ef_construction) = self.ef_construction {
2375            if ef_construction != default_construction_ef() {
2376                return false;
2377            }
2378        }
2379        if let Some(max_neighbors) = self.max_neighbors {
2380            if max_neighbors != default_m() {
2381                return false;
2382            }
2383        }
2384        if let Some(ef_search) = self.ef_search {
2385            if ef_search != default_search_ef() {
2386                return false;
2387            }
2388        }
2389        if let Some(batch_size) = self.batch_size {
2390            if batch_size != default_batch_size() {
2391                return false;
2392            }
2393        }
2394        if let Some(sync_threshold) = self.sync_threshold {
2395            if sync_threshold != default_sync_threshold() {
2396                return false;
2397            }
2398        }
2399        if let Some(resize_factor) = self.resize_factor {
2400            if resize_factor != default_resize_factor() {
2401                return false;
2402            }
2403        }
2404        // Skip num_threads check as it's system-dependent
2405        true
2406    }
2407}
2408
2409/// Configuration for SPANN vector index algorithm parameters
2410#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2411#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2412#[serde(deny_unknown_fields)]
2413pub struct SpannIndexConfig {
2414    #[serde(skip_serializing_if = "Option::is_none")]
2415    #[validate(range(max = 128))]
2416    pub search_nprobe: Option<u32>,
2417    #[serde(skip_serializing_if = "Option::is_none")]
2418    #[validate(range(min = 1.0, max = 1.0))]
2419    pub search_rng_factor: Option<f32>,
2420    #[serde(skip_serializing_if = "Option::is_none")]
2421    #[validate(range(min = 5.0, max = 10.0))]
2422    pub search_rng_epsilon: Option<f32>,
2423    #[serde(skip_serializing_if = "Option::is_none")]
2424    #[validate(range(max = 8))]
2425    pub nreplica_count: Option<u32>,
2426    #[serde(skip_serializing_if = "Option::is_none")]
2427    #[validate(range(min = 1.0, max = 1.0))]
2428    pub write_rng_factor: Option<f32>,
2429    #[serde(skip_serializing_if = "Option::is_none")]
2430    #[validate(range(min = 5.0, max = 10.0))]
2431    pub write_rng_epsilon: Option<f32>,
2432    #[serde(skip_serializing_if = "Option::is_none")]
2433    #[validate(range(min = 50, max = 200))]
2434    pub split_threshold: Option<u32>,
2435    #[serde(skip_serializing_if = "Option::is_none")]
2436    #[validate(range(max = 1000))]
2437    pub num_samples_kmeans: Option<usize>,
2438    #[serde(skip_serializing_if = "Option::is_none")]
2439    #[validate(range(min = 100.0, max = 100.0))]
2440    pub initial_lambda: Option<f32>,
2441    #[serde(skip_serializing_if = "Option::is_none")]
2442    #[validate(range(max = 64))]
2443    pub reassign_neighbor_count: Option<u32>,
2444    #[serde(skip_serializing_if = "Option::is_none")]
2445    #[validate(range(min = 25, max = 100))]
2446    pub merge_threshold: Option<u32>,
2447    #[serde(skip_serializing_if = "Option::is_none")]
2448    #[validate(range(max = 8))]
2449    pub num_centers_to_merge_to: Option<u32>,
2450    #[serde(skip_serializing_if = "Option::is_none")]
2451    #[validate(range(max = 64))]
2452    pub write_nprobe: Option<u32>,
2453    #[serde(skip_serializing_if = "Option::is_none")]
2454    #[validate(range(max = 200))]
2455    pub ef_construction: Option<usize>,
2456    #[serde(skip_serializing_if = "Option::is_none")]
2457    #[validate(range(max = 200))]
2458    pub ef_search: Option<usize>,
2459    #[serde(skip_serializing_if = "Option::is_none")]
2460    #[validate(range(max = 64))]
2461    pub max_neighbors: Option<usize>,
2462}
2463
2464impl SpannIndexConfig {
2465    /// Check if this config has default values
2466    /// None values are considered default (not set by user)
2467    pub fn is_default(&self) -> bool {
2468        if let Some(search_nprobe) = self.search_nprobe {
2469            if search_nprobe != default_search_nprobe() {
2470                return false;
2471            }
2472        }
2473        if let Some(search_rng_factor) = self.search_rng_factor {
2474            if search_rng_factor != default_search_rng_factor() {
2475                return false;
2476            }
2477        }
2478        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2479            if search_rng_epsilon != default_search_rng_epsilon() {
2480                return false;
2481            }
2482        }
2483        if let Some(nreplica_count) = self.nreplica_count {
2484            if nreplica_count != default_nreplica_count() {
2485                return false;
2486            }
2487        }
2488        if let Some(write_rng_factor) = self.write_rng_factor {
2489            if write_rng_factor != default_write_rng_factor() {
2490                return false;
2491            }
2492        }
2493        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2494            if write_rng_epsilon != default_write_rng_epsilon() {
2495                return false;
2496            }
2497        }
2498        if let Some(split_threshold) = self.split_threshold {
2499            if split_threshold != default_split_threshold() {
2500                return false;
2501            }
2502        }
2503        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2504            if num_samples_kmeans != default_num_samples_kmeans() {
2505                return false;
2506            }
2507        }
2508        if let Some(initial_lambda) = self.initial_lambda {
2509            if initial_lambda != default_initial_lambda() {
2510                return false;
2511            }
2512        }
2513        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2514            if reassign_neighbor_count != default_reassign_neighbor_count() {
2515                return false;
2516            }
2517        }
2518        if let Some(merge_threshold) = self.merge_threshold {
2519            if merge_threshold != default_merge_threshold() {
2520                return false;
2521            }
2522        }
2523        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2524            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2525                return false;
2526            }
2527        }
2528        if let Some(write_nprobe) = self.write_nprobe {
2529            if write_nprobe != default_write_nprobe() {
2530                return false;
2531            }
2532        }
2533        if let Some(ef_construction) = self.ef_construction {
2534            if ef_construction != default_construction_ef_spann() {
2535                return false;
2536            }
2537        }
2538        if let Some(ef_search) = self.ef_search {
2539            if ef_search != default_search_ef_spann() {
2540                return false;
2541            }
2542        }
2543        if let Some(max_neighbors) = self.max_neighbors {
2544            if max_neighbors != default_m_spann() {
2545                return false;
2546            }
2547        }
2548        true
2549    }
2550}
2551
2552#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2553#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2554#[serde(deny_unknown_fields)]
2555pub struct SparseVectorIndexConfig {
2556    /// Embedding function configuration
2557    #[serde(skip_serializing_if = "Option::is_none")]
2558    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2559    /// Key to source the sparse vector from
2560    #[serde(skip_serializing_if = "Option::is_none")]
2561    pub source_key: Option<String>,
2562    /// Whether this embedding is BM25
2563    #[serde(skip_serializing_if = "Option::is_none")]
2564    pub bm25: Option<bool>,
2565}
2566
2567#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2568#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2569#[serde(deny_unknown_fields)]
2570pub struct FtsIndexConfig {
2571    // FTS index typically has no additional parameters
2572}
2573
2574#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2575#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2576#[serde(deny_unknown_fields)]
2577pub struct StringInvertedIndexConfig {
2578    // String inverted index typically has no additional parameters
2579}
2580
2581#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2582#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2583#[serde(deny_unknown_fields)]
2584pub struct IntInvertedIndexConfig {
2585    // Integer inverted index typically has no additional parameters
2586}
2587
2588#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2589#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2590#[serde(deny_unknown_fields)]
2591pub struct FloatInvertedIndexConfig {
2592    // Float inverted index typically has no additional parameters
2593}
2594
2595#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2596#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2597#[serde(deny_unknown_fields)]
2598pub struct BoolInvertedIndexConfig {
2599    // Boolean inverted index typically has no additional parameters
2600}
2601
2602// ============================================================================
2603// BUILDER PATTERN SUPPORT
2604// ============================================================================
2605
2606/// Union type for all index configurations (used by builder pattern)
2607#[derive(Clone, Debug)]
2608pub enum IndexConfig {
2609    Vector(VectorIndexConfig),
2610    SparseVector(SparseVectorIndexConfig),
2611    Fts(FtsIndexConfig),
2612    StringInverted(StringInvertedIndexConfig),
2613    IntInverted(IntInvertedIndexConfig),
2614    FloatInverted(FloatInvertedIndexConfig),
2615    BoolInverted(BoolInvertedIndexConfig),
2616}
2617
2618// Convenience From implementations for ergonomic usage
2619impl From<VectorIndexConfig> for IndexConfig {
2620    fn from(config: VectorIndexConfig) -> Self {
2621        IndexConfig::Vector(config)
2622    }
2623}
2624
2625impl From<SparseVectorIndexConfig> for IndexConfig {
2626    fn from(config: SparseVectorIndexConfig) -> Self {
2627        IndexConfig::SparseVector(config)
2628    }
2629}
2630
2631impl From<FtsIndexConfig> for IndexConfig {
2632    fn from(config: FtsIndexConfig) -> Self {
2633        IndexConfig::Fts(config)
2634    }
2635}
2636
2637impl From<StringInvertedIndexConfig> for IndexConfig {
2638    fn from(config: StringInvertedIndexConfig) -> Self {
2639        IndexConfig::StringInverted(config)
2640    }
2641}
2642
2643impl From<IntInvertedIndexConfig> for IndexConfig {
2644    fn from(config: IntInvertedIndexConfig) -> Self {
2645        IndexConfig::IntInverted(config)
2646    }
2647}
2648
2649impl From<FloatInvertedIndexConfig> for IndexConfig {
2650    fn from(config: FloatInvertedIndexConfig) -> Self {
2651        IndexConfig::FloatInverted(config)
2652    }
2653}
2654
2655impl From<BoolInvertedIndexConfig> for IndexConfig {
2656    fn from(config: BoolInvertedIndexConfig) -> Self {
2657        IndexConfig::BoolInverted(config)
2658    }
2659}
2660
2661#[cfg(test)]
2662mod tests {
2663    use super::*;
2664    use crate::hnsw_configuration::Space;
2665    use crate::metadata::SparseVector;
2666    use crate::{
2667        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2668    };
2669    use serde_json::json;
2670
2671    #[test]
2672    fn test_reconcile_with_defaults_none_user_schema() {
2673        // Test that when no user schema is provided, we get the default schema
2674        let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2675        let expected = Schema::new_default(KnnIndex::Spann);
2676        assert_eq!(result, expected);
2677    }
2678
2679    #[test]
2680    fn test_reconcile_with_defaults_empty_user_schema() {
2681        // Test merging with an empty user schema
2682        let user_schema = Schema {
2683            defaults: ValueTypes::default(),
2684            keys: HashMap::new(),
2685        };
2686
2687        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2688        let expected = Schema::new_default(KnnIndex::Spann);
2689        assert_eq!(result, expected);
2690    }
2691
2692    #[test]
2693    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2694        // Test that user can override string inverted index enabled state
2695        let mut user_schema = Schema {
2696            defaults: ValueTypes::default(),
2697            keys: HashMap::new(),
2698        };
2699
2700        user_schema.defaults.string = Some(StringValueType {
2701            string_inverted_index: Some(StringInvertedIndexType {
2702                enabled: false, // Override default (true) to false
2703                config: StringInvertedIndexConfig {},
2704            }),
2705            fts_index: None,
2706        });
2707
2708        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2709
2710        // Check that the user override took precedence
2711        assert!(
2712            !result
2713                .defaults
2714                .string
2715                .as_ref()
2716                .unwrap()
2717                .string_inverted_index
2718                .as_ref()
2719                .unwrap()
2720                .enabled
2721        );
2722        // Check that other defaults are still present
2723        assert!(result.defaults.float.is_some());
2724        assert!(result.defaults.int.is_some());
2725    }
2726
2727    #[test]
2728    fn test_reconcile_with_defaults_user_overrides_vector_config() {
2729        // Test field-level merging for vector configurations
2730        let mut user_schema = Schema {
2731            defaults: ValueTypes::default(),
2732            keys: HashMap::new(),
2733        };
2734
2735        user_schema.defaults.float_list = Some(FloatListValueType {
2736            vector_index: Some(VectorIndexType {
2737                enabled: true, // Enable vector index (default is false)
2738                config: VectorIndexConfig {
2739                    space: Some(Space::L2),                     // Override default space
2740                    embedding_function: None,                   // Will use default
2741                    source_key: Some("custom_key".to_string()), // Override default
2742                    hnsw: Some(HnswIndexConfig {
2743                        ef_construction: Some(500), // Override default
2744                        max_neighbors: None,        // Will use default
2745                        ef_search: None,            // Will use default
2746                        num_threads: None,
2747                        batch_size: None,
2748                        sync_threshold: None,
2749                        resize_factor: None,
2750                    }),
2751                    spann: None,
2752                },
2753            }),
2754        });
2755
2756        // Use HNSW defaults for this test so we have HNSW config to merge with
2757        let result = {
2758            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2759            let merged_defaults = Schema::merge_value_types(
2760                &default_schema.defaults,
2761                &user_schema.defaults,
2762                KnnIndex::Hnsw,
2763            )
2764            .unwrap();
2765            let mut merged_keys = default_schema.keys.clone();
2766            for (key, user_value_types) in user_schema.keys {
2767                if let Some(default_value_types) = merged_keys.get(&key) {
2768                    let merged_value_types = Schema::merge_value_types(
2769                        default_value_types,
2770                        &user_value_types,
2771                        KnnIndex::Hnsw,
2772                    )
2773                    .unwrap();
2774                    merged_keys.insert(key, merged_value_types);
2775                } else {
2776                    merged_keys.insert(key, user_value_types);
2777                }
2778            }
2779            Schema {
2780                defaults: merged_defaults,
2781                keys: merged_keys,
2782            }
2783        };
2784
2785        let vector_config = &result
2786            .defaults
2787            .float_list
2788            .as_ref()
2789            .unwrap()
2790            .vector_index
2791            .as_ref()
2792            .unwrap()
2793            .config;
2794
2795        // Check user overrides took precedence
2796        assert_eq!(vector_config.space, Some(Space::L2));
2797        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2798        assert_eq!(
2799            vector_config.hnsw.as_ref().unwrap().ef_construction,
2800            Some(500)
2801        );
2802
2803        // Check defaults were preserved for unspecified fields
2804        assert_eq!(vector_config.embedding_function, None);
2805        // Since user provided HNSW config, the default max_neighbors should be merged in
2806        assert_eq!(
2807            vector_config.hnsw.as_ref().unwrap().max_neighbors,
2808            Some(default_m())
2809        );
2810    }
2811
2812    #[test]
2813    fn test_reconcile_with_defaults_keys() {
2814        // Test that key overrides are properly merged
2815        let mut user_schema = Schema {
2816            defaults: ValueTypes::default(),
2817            keys: HashMap::new(),
2818        };
2819
2820        // Add a custom key override
2821        let custom_key_types = ValueTypes {
2822            string: Some(StringValueType {
2823                fts_index: Some(FtsIndexType {
2824                    enabled: true,
2825                    config: FtsIndexConfig {},
2826                }),
2827                string_inverted_index: Some(StringInvertedIndexType {
2828                    enabled: false,
2829                    config: StringInvertedIndexConfig {},
2830                }),
2831            }),
2832            ..Default::default()
2833        };
2834        user_schema
2835            .keys
2836            .insert("custom_key".to_string(), custom_key_types);
2837
2838        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2839
2840        // Check that default key overrides are preserved
2841        assert!(result.keys.contains_key(EMBEDDING_KEY));
2842        assert!(result.keys.contains_key(DOCUMENT_KEY));
2843
2844        // Check that user key override was added
2845        assert!(result.keys.contains_key("custom_key"));
2846        let custom_override = result.keys.get("custom_key").unwrap();
2847        assert!(
2848            custom_override
2849                .string
2850                .as_ref()
2851                .unwrap()
2852                .fts_index
2853                .as_ref()
2854                .unwrap()
2855                .enabled
2856        );
2857    }
2858
2859    #[test]
2860    fn test_reconcile_with_defaults_override_existing_key() {
2861        // Test overriding an existing key override (like #embedding)
2862        let mut user_schema = Schema {
2863            defaults: ValueTypes::default(),
2864            keys: HashMap::new(),
2865        };
2866
2867        // Override the #embedding key with custom settings
2868        let embedding_override = ValueTypes {
2869            float_list: Some(FloatListValueType {
2870                vector_index: Some(VectorIndexType {
2871                    enabled: false, // Override default enabled=true to false
2872                    config: VectorIndexConfig {
2873                        space: Some(Space::Ip), // Override default space
2874                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2875                        source_key: Some("custom_embedding_key".to_string()),
2876                        hnsw: None,
2877                        spann: None,
2878                    },
2879                }),
2880            }),
2881            ..Default::default()
2882        };
2883        user_schema
2884            .keys
2885            .insert(EMBEDDING_KEY.to_string(), embedding_override);
2886
2887        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2888
2889        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
2890        let vector_config = &embedding_config
2891            .float_list
2892            .as_ref()
2893            .unwrap()
2894            .vector_index
2895            .as_ref()
2896            .unwrap();
2897
2898        // Check user overrides took precedence
2899        assert!(!vector_config.enabled);
2900        assert_eq!(vector_config.config.space, Some(Space::Ip));
2901        assert_eq!(
2902            vector_config.config.source_key,
2903            Some("custom_embedding_key".to_string())
2904        );
2905    }
2906
2907    #[test]
2908    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
2909        let collection_config = InternalCollectionConfiguration {
2910            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2911                space: Space::Cosine,
2912                ef_construction: 128,
2913                ef_search: 96,
2914                max_neighbors: 42,
2915                num_threads: 8,
2916                resize_factor: 1.5,
2917                sync_threshold: 2_000,
2918                batch_size: 256,
2919            }),
2920            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2921                EmbeddingFunctionNewConfiguration {
2922                    name: "custom".to_string(),
2923                    config: json!({"alpha": 1}),
2924                },
2925            )),
2926        };
2927
2928        let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2929        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2930
2931        assert_eq!(reconstructed, collection_config);
2932    }
2933
2934    #[test]
2935    fn test_convert_schema_to_collection_config_spann_roundtrip() {
2936        let spann_config = InternalSpannConfiguration {
2937            space: Space::Cosine,
2938            search_nprobe: 11,
2939            search_rng_factor: 1.7,
2940            write_nprobe: 5,
2941            nreplica_count: 3,
2942            split_threshold: 150,
2943            merge_threshold: 80,
2944            ef_construction: 120,
2945            ef_search: 90,
2946            max_neighbors: 40,
2947            ..Default::default()
2948        };
2949
2950        let collection_config = InternalCollectionConfiguration {
2951            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
2952            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2953                EmbeddingFunctionNewConfiguration {
2954                    name: "custom".to_string(),
2955                    config: json!({"beta": true}),
2956                },
2957            )),
2958        };
2959
2960        let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2961        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2962
2963        assert_eq!(reconstructed, collection_config);
2964    }
2965
2966    #[test]
2967    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
2968        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2969        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
2970            if let Some(float_list) = &mut embedding.float_list {
2971                if let Some(vector_index) = &mut float_list.vector_index {
2972                    vector_index.config.spann = Some(SpannIndexConfig {
2973                        search_nprobe: Some(1),
2974                        search_rng_factor: Some(1.0),
2975                        search_rng_epsilon: Some(0.1),
2976                        nreplica_count: Some(1),
2977                        write_rng_factor: Some(1.0),
2978                        write_rng_epsilon: Some(0.1),
2979                        split_threshold: Some(100),
2980                        num_samples_kmeans: Some(10),
2981                        initial_lambda: Some(0.5),
2982                        reassign_neighbor_count: Some(10),
2983                        merge_threshold: Some(50),
2984                        num_centers_to_merge_to: Some(3),
2985                        write_nprobe: Some(1),
2986                        ef_construction: Some(50),
2987                        ef_search: Some(40),
2988                        max_neighbors: Some(20),
2989                    });
2990                }
2991            }
2992        }
2993
2994        let result = InternalCollectionConfiguration::try_from(&schema);
2995        assert!(result.is_err());
2996    }
2997
2998    #[test]
2999    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3000        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3001        let before = schema.clone();
3002        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3003        assert!(!modified);
3004        assert_eq!(schema, before);
3005    }
3006
3007    #[test]
3008    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3009        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3010        assert!(!schema.keys.contains_key("custom_field"));
3011
3012        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3013
3014        assert!(modified);
3015        let entry = schema
3016            .keys
3017            .get("custom_field")
3018            .expect("expected new key override to be inserted");
3019        assert_eq!(entry.boolean, schema.defaults.boolean);
3020        assert!(entry.string.is_none());
3021        assert!(entry.int.is_none());
3022        assert!(entry.float.is_none());
3023        assert!(entry.float_list.is_none());
3024        assert!(entry.sparse_vector.is_none());
3025    }
3026
3027    #[test]
3028    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3029        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3030        let initial_len = schema.keys.len();
3031        schema.keys.insert(
3032            "custom_field".to_string(),
3033            ValueTypes {
3034                string: schema.defaults.string.clone(),
3035                ..Default::default()
3036            },
3037        );
3038
3039        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3040
3041        assert!(modified);
3042        assert_eq!(schema.keys.len(), initial_len + 1);
3043        let entry = schema
3044            .keys
3045            .get("custom_field")
3046            .expect("expected key override to exist after ensure call");
3047        assert!(entry.string.is_some());
3048        assert_eq!(entry.boolean, schema.defaults.boolean);
3049    }
3050
3051    #[test]
3052    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3053        let schema = Schema::new_default(KnnIndex::Spann);
3054        let result = schema.is_knn_key_indexing_enabled(
3055            "custom_sparse",
3056            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
3057        );
3058
3059        let err = result.expect_err("expected indexing disabled error");
3060        match err {
3061            FilterValidationError::IndexingDisabled { key, value_type } => {
3062                assert_eq!(key, "custom_sparse");
3063                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3064            }
3065            other => panic!("unexpected error variant: {other:?}"),
3066        }
3067    }
3068
3069    #[test]
3070    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3071        let mut schema = Schema::new_default(KnnIndex::Spann);
3072        schema.keys.insert(
3073            "sparse_enabled".to_string(),
3074            ValueTypes {
3075                sparse_vector: Some(SparseVectorValueType {
3076                    sparse_vector_index: Some(SparseVectorIndexType {
3077                        enabled: true,
3078                        config: SparseVectorIndexConfig {
3079                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3080                            source_key: None,
3081                            bm25: None,
3082                        },
3083                    }),
3084                }),
3085                ..Default::default()
3086            },
3087        );
3088
3089        let result = schema.is_knn_key_indexing_enabled(
3090            "sparse_enabled",
3091            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
3092        );
3093
3094        assert!(result.is_ok());
3095    }
3096
3097    #[test]
3098    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3099        let schema = Schema::new_default(KnnIndex::Spann);
3100        let result = schema.is_knn_key_indexing_enabled(
3101            EMBEDDING_KEY,
3102            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3103        );
3104
3105        assert!(result.is_ok());
3106    }
3107
3108    #[test]
3109    fn test_merge_hnsw_configs_field_level() {
3110        // Test field-level merging for HNSW configurations
3111        let default_hnsw = HnswIndexConfig {
3112            ef_construction: Some(200),
3113            max_neighbors: Some(16),
3114            ef_search: Some(10),
3115            num_threads: Some(4),
3116            batch_size: Some(100),
3117            sync_threshold: Some(1000),
3118            resize_factor: Some(1.2),
3119        };
3120
3121        let user_hnsw = HnswIndexConfig {
3122            ef_construction: Some(300), // Override
3123            max_neighbors: None,        // Will use default
3124            ef_search: Some(20),        // Override
3125            num_threads: None,          // Will use default
3126            batch_size: None,           // Will use default
3127            sync_threshold: Some(2000), // Override
3128            resize_factor: None,        // Will use default
3129        };
3130
3131        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3132
3133        // Check user overrides
3134        assert_eq!(result.ef_construction, Some(300));
3135        assert_eq!(result.ef_search, Some(20));
3136        assert_eq!(result.sync_threshold, Some(2000));
3137
3138        // Check defaults preserved
3139        assert_eq!(result.max_neighbors, Some(16));
3140        assert_eq!(result.num_threads, Some(4));
3141        assert_eq!(result.batch_size, Some(100));
3142        assert_eq!(result.resize_factor, Some(1.2));
3143    }
3144
3145    #[test]
3146    fn test_merge_spann_configs_field_level() {
3147        // Test field-level merging for SPANN configurations
3148        let default_spann = SpannIndexConfig {
3149            search_nprobe: Some(10),
3150            search_rng_factor: Some(1.0),  // Must be exactly 1.0
3151            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
3152            nreplica_count: Some(3),
3153            write_rng_factor: Some(1.0),  // Must be exactly 1.0
3154            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
3155            split_threshold: Some(100),   // Must be 50-200
3156            num_samples_kmeans: Some(100),
3157            initial_lambda: Some(100.0), // Must be exactly 100.0
3158            reassign_neighbor_count: Some(50),
3159            merge_threshold: Some(50),        // Must be 25-100
3160            num_centers_to_merge_to: Some(4), // Max is 8
3161            write_nprobe: Some(5),
3162            ef_construction: Some(100),
3163            ef_search: Some(10),
3164            max_neighbors: Some(16),
3165        };
3166
3167        let user_spann = SpannIndexConfig {
3168            search_nprobe: Some(20),       // Override
3169            search_rng_factor: None,       // Will use default
3170            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
3171            nreplica_count: None,          // Will use default
3172            write_rng_factor: None,
3173            write_rng_epsilon: None,
3174            split_threshold: Some(150), // Override (valid: 50-200)
3175            num_samples_kmeans: None,
3176            initial_lambda: None,
3177            reassign_neighbor_count: None,
3178            merge_threshold: None,
3179            num_centers_to_merge_to: None,
3180            write_nprobe: None,
3181            ef_construction: None,
3182            ef_search: None,
3183            max_neighbors: None,
3184        };
3185
3186        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3187
3188        // Check user overrides
3189        assert_eq!(result.search_nprobe, Some(20));
3190        assert_eq!(result.search_rng_epsilon, Some(8.0));
3191        assert_eq!(result.split_threshold, Some(150));
3192
3193        // Check defaults preserved
3194        assert_eq!(result.search_rng_factor, Some(1.0));
3195        assert_eq!(result.nreplica_count, Some(3));
3196        assert_eq!(result.initial_lambda, Some(100.0));
3197    }
3198
3199    #[test]
3200    fn test_spann_index_config_into_internal_configuration() {
3201        let config = SpannIndexConfig {
3202            search_nprobe: Some(33),
3203            search_rng_factor: Some(1.2),
3204            search_rng_epsilon: None,
3205            nreplica_count: None,
3206            write_rng_factor: Some(1.5),
3207            write_rng_epsilon: None,
3208            split_threshold: Some(75),
3209            num_samples_kmeans: None,
3210            initial_lambda: Some(0.9),
3211            reassign_neighbor_count: Some(40),
3212            merge_threshold: None,
3213            num_centers_to_merge_to: Some(4),
3214            write_nprobe: Some(60),
3215            ef_construction: Some(180),
3216            ef_search: Some(170),
3217            max_neighbors: Some(32),
3218        };
3219
3220        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3221        assert_eq!(with_space.space, Space::Cosine);
3222        assert_eq!(with_space.search_nprobe, 33);
3223        assert_eq!(with_space.search_rng_factor, 1.2);
3224        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3225        assert_eq!(with_space.write_rng_factor, 1.5);
3226        assert_eq!(with_space.write_nprobe, 60);
3227        assert_eq!(with_space.ef_construction, 180);
3228        assert_eq!(with_space.ef_search, 170);
3229        assert_eq!(with_space.max_neighbors, 32);
3230        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3231
3232        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3233        assert_eq!(default_space_config.space, default_space());
3234    }
3235
3236    #[test]
3237    fn test_merge_string_type_combinations() {
3238        // Test all combinations of default and user StringValueType
3239
3240        // Both Some - should merge
3241        let default = StringValueType {
3242            string_inverted_index: Some(StringInvertedIndexType {
3243                enabled: true,
3244                config: StringInvertedIndexConfig {},
3245            }),
3246            fts_index: Some(FtsIndexType {
3247                enabled: false,
3248                config: FtsIndexConfig {},
3249            }),
3250        };
3251
3252        let user = StringValueType {
3253            string_inverted_index: Some(StringInvertedIndexType {
3254                enabled: false, // Override
3255                config: StringInvertedIndexConfig {},
3256            }),
3257            fts_index: None, // Will use default
3258        };
3259
3260        let result = Schema::merge_string_type(Some(&default), Some(&user))
3261            .unwrap()
3262            .unwrap();
3263        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3264        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3265
3266        // Default Some, User None - should return default
3267        let result = Schema::merge_string_type(Some(&default), None)
3268            .unwrap()
3269            .unwrap();
3270        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3271
3272        // Default None, User Some - should return user
3273        let result = Schema::merge_string_type(None, Some(&user))
3274            .unwrap()
3275            .unwrap();
3276        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3277
3278        // Both None - should return None
3279        let result = Schema::merge_string_type(None, None).unwrap();
3280        assert!(result.is_none());
3281    }
3282
3283    #[test]
3284    fn test_merge_vector_index_config_comprehensive() {
3285        // Test comprehensive vector index config merging
3286        let default_config = VectorIndexConfig {
3287            space: Some(Space::Cosine),
3288            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3289            source_key: Some("default_key".to_string()),
3290            hnsw: Some(HnswIndexConfig {
3291                ef_construction: Some(200),
3292                max_neighbors: Some(16),
3293                ef_search: Some(10),
3294                num_threads: Some(4),
3295                batch_size: Some(100),
3296                sync_threshold: Some(1000),
3297                resize_factor: Some(1.2),
3298            }),
3299            spann: None,
3300        };
3301
3302        let user_config = VectorIndexConfig {
3303            space: Some(Space::L2),                   // Override
3304            embedding_function: None,                 // Will use default
3305            source_key: Some("user_key".to_string()), // Override
3306            hnsw: Some(HnswIndexConfig {
3307                ef_construction: Some(300), // Override
3308                max_neighbors: None,        // Will use default
3309                ef_search: None,            // Will use default
3310                num_threads: None,
3311                batch_size: None,
3312                sync_threshold: None,
3313                resize_factor: None,
3314            }),
3315            spann: Some(SpannIndexConfig {
3316                search_nprobe: Some(15),
3317                search_rng_factor: None,
3318                search_rng_epsilon: None,
3319                nreplica_count: None,
3320                write_rng_factor: None,
3321                write_rng_epsilon: None,
3322                split_threshold: None,
3323                num_samples_kmeans: None,
3324                initial_lambda: None,
3325                reassign_neighbor_count: None,
3326                merge_threshold: None,
3327                num_centers_to_merge_to: None,
3328                write_nprobe: None,
3329                ef_construction: None,
3330                ef_search: None,
3331                max_neighbors: None,
3332            }), // Add SPANN config
3333        };
3334
3335        let result =
3336            Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3337
3338        // Check field-level merging
3339        assert_eq!(result.space, Some(Space::L2)); // User override
3340        assert_eq!(
3341            result.embedding_function,
3342            Some(EmbeddingFunctionConfiguration::Legacy)
3343        ); // Default preserved
3344        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3345
3346        // Check HNSW merging
3347        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3348        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3349
3350        // Check SPANN is not present, since merging in the context of HNSW
3351        assert!(result.spann.is_none());
3352    }
3353
3354    #[test]
3355    fn test_merge_sparse_vector_index_config() {
3356        // Test sparse vector index config merging
3357        let default_config = SparseVectorIndexConfig {
3358            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3359            source_key: Some("default_sparse_key".to_string()),
3360            bm25: None,
3361        };
3362
3363        let user_config = SparseVectorIndexConfig {
3364            embedding_function: None,                        // Will use default
3365            source_key: Some("user_sparse_key".to_string()), // Override
3366            bm25: None,
3367        };
3368
3369        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3370
3371        // Check user override
3372        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3373        // Check default preserved
3374        assert_eq!(
3375            result.embedding_function,
3376            Some(EmbeddingFunctionConfiguration::Legacy)
3377        );
3378    }
3379
3380    #[test]
3381    fn test_complex_nested_merging_scenario() {
3382        // Test a complex scenario with multiple levels of merging
3383        let mut user_schema = Schema {
3384            defaults: ValueTypes::default(),
3385            keys: HashMap::new(),
3386        };
3387
3388        // Set up complex user defaults
3389        user_schema.defaults.string = Some(StringValueType {
3390            string_inverted_index: Some(StringInvertedIndexType {
3391                enabled: false,
3392                config: StringInvertedIndexConfig {},
3393            }),
3394            fts_index: Some(FtsIndexType {
3395                enabled: true,
3396                config: FtsIndexConfig {},
3397            }),
3398        });
3399
3400        user_schema.defaults.float_list = Some(FloatListValueType {
3401            vector_index: Some(VectorIndexType {
3402                enabled: true,
3403                config: VectorIndexConfig {
3404                    space: Some(Space::Ip),
3405                    embedding_function: None, // Will use default
3406                    source_key: Some("custom_vector_key".to_string()),
3407                    hnsw: Some(HnswIndexConfig {
3408                        ef_construction: Some(400),
3409                        max_neighbors: Some(32),
3410                        ef_search: None, // Will use default
3411                        num_threads: None,
3412                        batch_size: None,
3413                        sync_threshold: None,
3414                        resize_factor: None,
3415                    }),
3416                    spann: None,
3417                },
3418            }),
3419        });
3420
3421        // Set up key overrides
3422        let custom_key_override = ValueTypes {
3423            string: Some(StringValueType {
3424                fts_index: Some(FtsIndexType {
3425                    enabled: true,
3426                    config: FtsIndexConfig {},
3427                }),
3428                string_inverted_index: None,
3429            }),
3430            ..Default::default()
3431        };
3432        user_schema
3433            .keys
3434            .insert("custom_field".to_string(), custom_key_override);
3435
3436        // Use HNSW defaults for this test so we have HNSW config to merge with
3437        let result = {
3438            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3439            let merged_defaults = Schema::merge_value_types(
3440                &default_schema.defaults,
3441                &user_schema.defaults,
3442                KnnIndex::Hnsw,
3443            )
3444            .unwrap();
3445            let mut merged_keys = default_schema.keys.clone();
3446            for (key, user_value_types) in user_schema.keys {
3447                if let Some(default_value_types) = merged_keys.get(&key) {
3448                    let merged_value_types = Schema::merge_value_types(
3449                        default_value_types,
3450                        &user_value_types,
3451                        KnnIndex::Hnsw,
3452                    )
3453                    .unwrap();
3454                    merged_keys.insert(key, merged_value_types);
3455                } else {
3456                    merged_keys.insert(key, user_value_types);
3457                }
3458            }
3459            Schema {
3460                defaults: merged_defaults,
3461                keys: merged_keys,
3462            }
3463        };
3464
3465        // Verify complex merging worked correctly
3466
3467        // Check defaults merging
3468        assert!(
3469            !result
3470                .defaults
3471                .string
3472                .as_ref()
3473                .unwrap()
3474                .string_inverted_index
3475                .as_ref()
3476                .unwrap()
3477                .enabled
3478        );
3479        assert!(
3480            result
3481                .defaults
3482                .string
3483                .as_ref()
3484                .unwrap()
3485                .fts_index
3486                .as_ref()
3487                .unwrap()
3488                .enabled
3489        );
3490
3491        let vector_config = &result
3492            .defaults
3493            .float_list
3494            .as_ref()
3495            .unwrap()
3496            .vector_index
3497            .as_ref()
3498            .unwrap()
3499            .config;
3500        assert_eq!(vector_config.space, Some(Space::Ip));
3501        assert_eq!(vector_config.embedding_function, None); // Default preserved
3502        assert_eq!(
3503            vector_config.source_key,
3504            Some("custom_vector_key".to_string())
3505        );
3506        assert_eq!(
3507            vector_config.hnsw.as_ref().unwrap().ef_construction,
3508            Some(400)
3509        );
3510        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3511        assert_eq!(
3512            vector_config.hnsw.as_ref().unwrap().ef_search,
3513            Some(default_search_ef())
3514        ); // Default preserved
3515
3516        // Check key overrides
3517        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
3518        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
3519        assert!(result.keys.contains_key("custom_field")); // User added
3520
3521        let custom_override = result.keys.get("custom_field").unwrap();
3522        assert!(
3523            custom_override
3524                .string
3525                .as_ref()
3526                .unwrap()
3527                .fts_index
3528                .as_ref()
3529                .unwrap()
3530                .enabled
3531        );
3532        assert!(custom_override
3533            .string
3534            .as_ref()
3535            .unwrap()
3536            .string_inverted_index
3537            .is_none());
3538    }
3539
3540    #[test]
3541    fn test_reconcile_with_collection_config_default_config() {
3542        // Test that when collection config is default, schema is returned as-is
3543        let collection_config = InternalCollectionConfiguration::default_hnsw();
3544        let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
3545
3546        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3547        assert_eq!(result, schema);
3548    }
3549
3550    #[test]
3551    fn test_reconcile_with_collection_config_both_non_default() {
3552        // Test that when both schema and collection config are non-default, it returns an error
3553        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3554        schema.defaults.string = Some(StringValueType {
3555            fts_index: Some(FtsIndexType {
3556                enabled: true,
3557                config: FtsIndexConfig {},
3558            }),
3559            string_inverted_index: None,
3560        });
3561
3562        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
3563        // Make collection config non-default by changing a parameter
3564        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
3565        {
3566            hnsw_config.ef_construction = 500; // Non-default value
3567        }
3568
3569        // Use reconcile_schema_and_config which has the early validation
3570        let result = Schema::reconcile_schema_and_config(
3571            Some(&schema),
3572            Some(&collection_config),
3573            KnnIndex::Spann,
3574        );
3575        assert!(result.is_err());
3576        assert!(matches!(
3577            result.unwrap_err(),
3578            SchemaError::ConfigAndSchemaConflict
3579        ));
3580    }
3581
3582    #[test]
3583    fn test_reconcile_with_collection_config_hnsw_override() {
3584        // Test that non-default HNSW collection config overrides default schema
3585        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
3586
3587        let collection_config = InternalCollectionConfiguration {
3588            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3589                ef_construction: 300,
3590                max_neighbors: 32,
3591                ef_search: 50,
3592                num_threads: 8,
3593                batch_size: 200,
3594                sync_threshold: 2000,
3595                resize_factor: 1.5,
3596                space: Space::L2,
3597            }),
3598            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3599        };
3600
3601        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3602
3603        // Check that #embedding key override was created with the collection config settings
3604        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3605        let vector_index = embedding_override
3606            .float_list
3607            .as_ref()
3608            .unwrap()
3609            .vector_index
3610            .as_ref()
3611            .unwrap();
3612
3613        assert!(vector_index.enabled);
3614        assert_eq!(vector_index.config.space, Some(Space::L2));
3615        assert_eq!(
3616            vector_index.config.embedding_function,
3617            Some(EmbeddingFunctionConfiguration::Legacy)
3618        );
3619        assert_eq!(
3620            vector_index.config.source_key,
3621            Some(DOCUMENT_KEY.to_string())
3622        );
3623
3624        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
3625        assert_eq!(hnsw_config.ef_construction, Some(300));
3626        assert_eq!(hnsw_config.max_neighbors, Some(32));
3627        assert_eq!(hnsw_config.ef_search, Some(50));
3628        assert_eq!(hnsw_config.num_threads, Some(8));
3629        assert_eq!(hnsw_config.batch_size, Some(200));
3630        assert_eq!(hnsw_config.sync_threshold, Some(2000));
3631        assert_eq!(hnsw_config.resize_factor, Some(1.5));
3632
3633        assert!(vector_index.config.spann.is_none());
3634    }
3635
3636    #[test]
3637    fn test_reconcile_with_collection_config_spann_override() {
3638        // Test that non-default SPANN collection config overrides default schema
3639        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
3640
3641        let collection_config = InternalCollectionConfiguration {
3642            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
3643                search_nprobe: 20,
3644                search_rng_factor: 3.0,
3645                search_rng_epsilon: 0.2,
3646                nreplica_count: 5,
3647                write_rng_factor: 2.0,
3648                write_rng_epsilon: 0.1,
3649                split_threshold: 2000,
3650                num_samples_kmeans: 200,
3651                initial_lambda: 0.8,
3652                reassign_neighbor_count: 100,
3653                merge_threshold: 800,
3654                num_centers_to_merge_to: 20,
3655                write_nprobe: 10,
3656                ef_construction: 400,
3657                ef_search: 60,
3658                max_neighbors: 24,
3659                space: Space::Cosine,
3660            }),
3661            embedding_function: None,
3662        };
3663
3664        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3665
3666        // Check that #embedding key override was created with the collection config settings
3667        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3668        let vector_index = embedding_override
3669            .float_list
3670            .as_ref()
3671            .unwrap()
3672            .vector_index
3673            .as_ref()
3674            .unwrap();
3675
3676        assert!(vector_index.enabled);
3677        assert_eq!(vector_index.config.space, Some(Space::Cosine));
3678        assert_eq!(vector_index.config.embedding_function, None);
3679        assert_eq!(
3680            vector_index.config.source_key,
3681            Some(DOCUMENT_KEY.to_string())
3682        );
3683
3684        assert!(vector_index.config.hnsw.is_none());
3685
3686        let spann_config = vector_index.config.spann.as_ref().unwrap();
3687        assert_eq!(spann_config.search_nprobe, Some(20));
3688        assert_eq!(spann_config.search_rng_factor, Some(3.0));
3689        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
3690        assert_eq!(spann_config.nreplica_count, Some(5));
3691        assert_eq!(spann_config.write_rng_factor, Some(2.0));
3692        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
3693        assert_eq!(spann_config.split_threshold, Some(2000));
3694        assert_eq!(spann_config.num_samples_kmeans, Some(200));
3695        assert_eq!(spann_config.initial_lambda, Some(0.8));
3696        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
3697        assert_eq!(spann_config.merge_threshold, Some(800));
3698        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
3699        assert_eq!(spann_config.write_nprobe, Some(10));
3700        assert_eq!(spann_config.ef_construction, Some(400));
3701        assert_eq!(spann_config.ef_search, Some(60));
3702        assert_eq!(spann_config.max_neighbors, Some(24));
3703    }
3704
3705    #[test]
3706    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
3707        // Test that collection config updates BOTH defaults.float_list.vector_index
3708        // AND keys["embedding"].float_list.vector_index
3709        let schema = Schema::new_default(KnnIndex::Hnsw);
3710
3711        let collection_config = InternalCollectionConfiguration {
3712            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3713                ef_construction: 300,
3714                max_neighbors: 32,
3715                ef_search: 50,
3716                num_threads: 8,
3717                batch_size: 200,
3718                sync_threshold: 2000,
3719                resize_factor: 1.5,
3720                space: Space::L2,
3721            }),
3722            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3723        };
3724
3725        let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3726
3727        // Check that defaults.float_list.vector_index was updated
3728        let defaults_vector_index = result
3729            .defaults
3730            .float_list
3731            .as_ref()
3732            .unwrap()
3733            .vector_index
3734            .as_ref()
3735            .unwrap();
3736
3737        // Should be disabled in defaults (template for new keys)
3738        assert!(!defaults_vector_index.enabled);
3739        // But config should be updated
3740        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
3741        assert_eq!(
3742            defaults_vector_index.config.embedding_function,
3743            Some(EmbeddingFunctionConfiguration::Legacy)
3744        );
3745        assert_eq!(
3746            defaults_vector_index.config.source_key,
3747            Some(DOCUMENT_KEY.to_string())
3748        );
3749        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
3750        assert_eq!(defaults_hnsw.ef_construction, Some(300));
3751        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
3752
3753        // Check that #embedding key override was also updated
3754        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3755        let embedding_vector_index = embedding_override
3756            .float_list
3757            .as_ref()
3758            .unwrap()
3759            .vector_index
3760            .as_ref()
3761            .unwrap();
3762
3763        // Should be enabled on #embedding
3764        assert!(embedding_vector_index.enabled);
3765        // Config should match defaults
3766        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
3767        assert_eq!(
3768            embedding_vector_index.config.embedding_function,
3769            Some(EmbeddingFunctionConfiguration::Legacy)
3770        );
3771        assert_eq!(
3772            embedding_vector_index.config.source_key,
3773            Some(DOCUMENT_KEY.to_string())
3774        );
3775        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
3776        assert_eq!(embedding_hnsw.ef_construction, Some(300));
3777        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
3778    }
3779
3780    #[test]
3781    fn test_is_schema_default() {
3782        // Test that actual default schemas are correctly identified
3783        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
3784        assert!(default_hnsw_schema.is_default());
3785
3786        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
3787        assert!(default_spann_schema.is_default());
3788
3789        // Test that a modified default schema is not considered default
3790        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
3791        // Make a clear modification - change the string inverted index enabled state
3792        if let Some(ref mut string_type) = modified_schema.defaults.string {
3793            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
3794                string_inverted.enabled = false; // Default is true, so this should make it non-default
3795            }
3796        }
3797        assert!(!modified_schema.is_default());
3798
3799        // Test that schema with additional key overrides is not default
3800        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
3801        schema_with_extra_overrides
3802            .keys
3803            .insert("custom_key".to_string(), ValueTypes::default());
3804        assert!(!schema_with_extra_overrides.is_default());
3805    }
3806
3807    #[test]
3808    fn test_add_merges_keys_by_value_type() {
3809        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3810        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3811
3812        let string_override = ValueTypes {
3813            string: Some(StringValueType {
3814                string_inverted_index: Some(StringInvertedIndexType {
3815                    enabled: true,
3816                    config: StringInvertedIndexConfig {},
3817                }),
3818                fts_index: None,
3819            }),
3820            ..Default::default()
3821        };
3822        schema_a
3823            .keys
3824            .insert("custom_field".to_string(), string_override);
3825
3826        let float_override = ValueTypes {
3827            float: Some(FloatValueType {
3828                float_inverted_index: Some(FloatInvertedIndexType {
3829                    enabled: true,
3830                    config: FloatInvertedIndexConfig {},
3831                }),
3832            }),
3833            ..Default::default()
3834        };
3835        schema_b
3836            .keys
3837            .insert("custom_field".to_string(), float_override);
3838
3839        let merged = schema_a.merge(&schema_b).unwrap();
3840        let merged_override = merged.keys.get("custom_field").unwrap();
3841
3842        assert!(merged_override.string.is_some());
3843        assert!(merged_override.float.is_some());
3844        assert!(
3845            merged_override
3846                .string
3847                .as_ref()
3848                .unwrap()
3849                .string_inverted_index
3850                .as_ref()
3851                .unwrap()
3852                .enabled
3853        );
3854        assert!(
3855            merged_override
3856                .float
3857                .as_ref()
3858                .unwrap()
3859                .float_inverted_index
3860                .as_ref()
3861                .unwrap()
3862                .enabled
3863        );
3864    }
3865
3866    #[test]
3867    fn test_add_rejects_different_defaults() {
3868        let schema_a = Schema::new_default(KnnIndex::Hnsw);
3869        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3870
3871        if let Some(string_type) = schema_b.defaults.string.as_mut() {
3872            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
3873                string_index.enabled = false;
3874            }
3875        }
3876
3877        let err = schema_a.merge(&schema_b).unwrap_err();
3878        assert!(matches!(err, SchemaError::DefaultsMismatch));
3879    }
3880
3881    #[test]
3882    fn test_add_detects_conflicting_value_type_configuration() {
3883        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3884        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3885
3886        let string_override_enabled = ValueTypes {
3887            string: Some(StringValueType {
3888                string_inverted_index: Some(StringInvertedIndexType {
3889                    enabled: true,
3890                    config: StringInvertedIndexConfig {},
3891                }),
3892                fts_index: None,
3893            }),
3894            ..Default::default()
3895        };
3896        schema_a
3897            .keys
3898            .insert("custom_field".to_string(), string_override_enabled);
3899
3900        let string_override_disabled = ValueTypes {
3901            string: Some(StringValueType {
3902                string_inverted_index: Some(StringInvertedIndexType {
3903                    enabled: false,
3904                    config: StringInvertedIndexConfig {},
3905                }),
3906                fts_index: None,
3907            }),
3908            ..Default::default()
3909        };
3910        schema_b
3911            .keys
3912            .insert("custom_field".to_string(), string_override_disabled);
3913
3914        let err = schema_a.merge(&schema_b).unwrap_err();
3915        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
3916    }
3917
3918    // TODO(Sanket): Remove this test once deployed
3919    #[test]
3920    fn test_backward_compatibility_aliases() {
3921        // Test that old format with # and $ prefixes and key_overrides can be deserialized
3922        let old_format_json = r###"{
3923            "defaults": {
3924                "#string": {
3925                    "$fts_index": {
3926                        "enabled": true,
3927                        "config": {}
3928                    }
3929                },
3930                "#int": {
3931                    "$int_inverted_index": {
3932                        "enabled": true,
3933                        "config": {}
3934                    }
3935                },
3936                "#float_list": {
3937                    "$vector_index": {
3938                        "enabled": true,
3939                        "config": {
3940                            "spann": {
3941                                "search_nprobe": 10
3942                            }
3943                        }
3944                    }
3945                }
3946            },
3947            "key_overrides": {
3948                "#document": {
3949                    "#string": {
3950                        "$fts_index": {
3951                            "enabled": false,
3952                            "config": {}
3953                        }
3954                    }
3955                }
3956            }
3957        }"###;
3958
3959        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
3960
3961        // Test that new format without prefixes and keys can be deserialized
3962        let new_format_json = r###"{
3963            "defaults": {
3964                "string": {
3965                    "fts_index": {
3966                        "enabled": true,
3967                        "config": {}
3968                    }
3969                },
3970                "int": {
3971                    "int_inverted_index": {
3972                        "enabled": true,
3973                        "config": {}
3974                    }
3975                },
3976                "float_list": {
3977                    "vector_index": {
3978                        "enabled": true,
3979                        "config": {
3980                            "spann": {
3981                                "search_nprobe": 10
3982                            }
3983                        }
3984                    }
3985                }
3986            },
3987            "keys": {
3988                "#document": {
3989                    "string": {
3990                        "fts_index": {
3991                            "enabled": false,
3992                            "config": {}
3993                        }
3994                    }
3995                }
3996            }
3997        }"###;
3998
3999        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4000
4001        // Both should deserialize to the same structure
4002        assert_eq!(schema_from_old, schema_from_new);
4003
4004        // Verify the deserialized content is correct
4005        assert!(schema_from_old.defaults.string.is_some());
4006        assert!(schema_from_old
4007            .defaults
4008            .string
4009            .as_ref()
4010            .unwrap()
4011            .fts_index
4012            .is_some());
4013        assert!(
4014            schema_from_old
4015                .defaults
4016                .string
4017                .as_ref()
4018                .unwrap()
4019                .fts_index
4020                .as_ref()
4021                .unwrap()
4022                .enabled
4023        );
4024
4025        assert!(schema_from_old.defaults.int.is_some());
4026        assert!(schema_from_old
4027            .defaults
4028            .int
4029            .as_ref()
4030            .unwrap()
4031            .int_inverted_index
4032            .is_some());
4033
4034        assert!(schema_from_old.defaults.float_list.is_some());
4035        assert!(schema_from_old
4036            .defaults
4037            .float_list
4038            .as_ref()
4039            .unwrap()
4040            .vector_index
4041            .is_some());
4042
4043        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4044        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4045        assert!(doc_override.string.is_some());
4046        assert!(
4047            !doc_override
4048                .string
4049                .as_ref()
4050                .unwrap()
4051                .fts_index
4052                .as_ref()
4053                .unwrap()
4054                .enabled
4055        );
4056
4057        // Test that serialization always outputs the new format (without prefixes)
4058        let serialized = serde_json::to_string(&schema_from_old).unwrap();
4059
4060        // Should contain new format keys
4061        assert!(serialized.contains(r#""keys":"#));
4062        assert!(serialized.contains(r#""string":"#));
4063        assert!(serialized.contains(r#""fts_index":"#));
4064        assert!(serialized.contains(r#""int_inverted_index":"#));
4065        assert!(serialized.contains(r#""vector_index":"#));
4066
4067        // Should NOT contain old format keys
4068        assert!(!serialized.contains(r#""key_overrides":"#));
4069        assert!(!serialized.contains(r###""#string":"###));
4070        assert!(!serialized.contains(r###""$fts_index":"###));
4071        assert!(!serialized.contains(r###""$int_inverted_index":"###));
4072        assert!(!serialized.contains(r###""$vector_index":"###));
4073    }
4074
4075    #[test]
4076    fn test_hnsw_index_config_validation() {
4077        use validator::Validate;
4078
4079        // Valid configuration - should pass
4080        let valid_config = HnswIndexConfig {
4081            batch_size: Some(10),
4082            sync_threshold: Some(100),
4083            ef_construction: Some(100),
4084            max_neighbors: Some(16),
4085            ..Default::default()
4086        };
4087        assert!(valid_config.validate().is_ok());
4088
4089        // Invalid: batch_size too small (min 2)
4090        let invalid_batch_size = HnswIndexConfig {
4091            batch_size: Some(1),
4092            ..Default::default()
4093        };
4094        assert!(invalid_batch_size.validate().is_err());
4095
4096        // Invalid: sync_threshold too small (min 2)
4097        let invalid_sync_threshold = HnswIndexConfig {
4098            sync_threshold: Some(1),
4099            ..Default::default()
4100        };
4101        assert!(invalid_sync_threshold.validate().is_err());
4102
4103        // Valid: boundary values (exactly 2) should pass
4104        let boundary_config = HnswIndexConfig {
4105            batch_size: Some(2),
4106            sync_threshold: Some(2),
4107            ..Default::default()
4108        };
4109        assert!(boundary_config.validate().is_ok());
4110
4111        // Valid: None values should pass validation
4112        let all_none_config = HnswIndexConfig {
4113            ..Default::default()
4114        };
4115        assert!(all_none_config.validate().is_ok());
4116
4117        // Valid: fields without validation can be any value
4118        let other_fields_config = HnswIndexConfig {
4119            ef_construction: Some(1),
4120            max_neighbors: Some(1),
4121            ef_search: Some(1),
4122            num_threads: Some(1),
4123            resize_factor: Some(0.1),
4124            ..Default::default()
4125        };
4126        assert!(other_fields_config.validate().is_ok());
4127    }
4128
4129    #[test]
4130    fn test_spann_index_config_validation() {
4131        use validator::Validate;
4132
4133        // Valid configuration - should pass
4134        let valid_config = SpannIndexConfig {
4135            write_nprobe: Some(32),
4136            nreplica_count: Some(4),
4137            split_threshold: Some(100),
4138            merge_threshold: Some(50),
4139            reassign_neighbor_count: Some(32),
4140            num_centers_to_merge_to: Some(4),
4141            ef_construction: Some(100),
4142            ef_search: Some(100),
4143            max_neighbors: Some(32),
4144            search_rng_factor: Some(1.0),
4145            write_rng_factor: Some(1.0),
4146            search_rng_epsilon: Some(7.5),
4147            write_rng_epsilon: Some(7.5),
4148            ..Default::default()
4149        };
4150        assert!(valid_config.validate().is_ok());
4151
4152        // Invalid: write_nprobe too large (max 64)
4153        let invalid_write_nprobe = SpannIndexConfig {
4154            write_nprobe: Some(200),
4155            ..Default::default()
4156        };
4157        assert!(invalid_write_nprobe.validate().is_err());
4158
4159        // Invalid: split_threshold too small (min 50)
4160        let invalid_split_threshold = SpannIndexConfig {
4161            split_threshold: Some(10),
4162            ..Default::default()
4163        };
4164        assert!(invalid_split_threshold.validate().is_err());
4165
4166        // Invalid: split_threshold too large (max 200)
4167        let invalid_split_threshold_high = SpannIndexConfig {
4168            split_threshold: Some(250),
4169            ..Default::default()
4170        };
4171        assert!(invalid_split_threshold_high.validate().is_err());
4172
4173        // Invalid: nreplica_count too large (max 8)
4174        let invalid_nreplica = SpannIndexConfig {
4175            nreplica_count: Some(10),
4176            ..Default::default()
4177        };
4178        assert!(invalid_nreplica.validate().is_err());
4179
4180        // Invalid: reassign_neighbor_count too large (max 64)
4181        let invalid_reassign = SpannIndexConfig {
4182            reassign_neighbor_count: Some(100),
4183            ..Default::default()
4184        };
4185        assert!(invalid_reassign.validate().is_err());
4186
4187        // Invalid: merge_threshold out of range (min 25, max 100)
4188        let invalid_merge_threshold_low = SpannIndexConfig {
4189            merge_threshold: Some(5),
4190            ..Default::default()
4191        };
4192        assert!(invalid_merge_threshold_low.validate().is_err());
4193
4194        let invalid_merge_threshold_high = SpannIndexConfig {
4195            merge_threshold: Some(150),
4196            ..Default::default()
4197        };
4198        assert!(invalid_merge_threshold_high.validate().is_err());
4199
4200        // Invalid: num_centers_to_merge_to too large (max 8)
4201        let invalid_num_centers = SpannIndexConfig {
4202            num_centers_to_merge_to: Some(10),
4203            ..Default::default()
4204        };
4205        assert!(invalid_num_centers.validate().is_err());
4206
4207        // Invalid: ef_construction too large (max 200)
4208        let invalid_ef_construction = SpannIndexConfig {
4209            ef_construction: Some(300),
4210            ..Default::default()
4211        };
4212        assert!(invalid_ef_construction.validate().is_err());
4213
4214        // Invalid: ef_search too large (max 200)
4215        let invalid_ef_search = SpannIndexConfig {
4216            ef_search: Some(300),
4217            ..Default::default()
4218        };
4219        assert!(invalid_ef_search.validate().is_err());
4220
4221        // Invalid: max_neighbors too large (max 64)
4222        let invalid_max_neighbors = SpannIndexConfig {
4223            max_neighbors: Some(100),
4224            ..Default::default()
4225        };
4226        assert!(invalid_max_neighbors.validate().is_err());
4227
4228        // Invalid: search_nprobe too large (max 128)
4229        let invalid_search_nprobe = SpannIndexConfig {
4230            search_nprobe: Some(200),
4231            ..Default::default()
4232        };
4233        assert!(invalid_search_nprobe.validate().is_err());
4234
4235        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
4236        let invalid_search_rng_factor_low = SpannIndexConfig {
4237            search_rng_factor: Some(0.9),
4238            ..Default::default()
4239        };
4240        assert!(invalid_search_rng_factor_low.validate().is_err());
4241
4242        let invalid_search_rng_factor_high = SpannIndexConfig {
4243            search_rng_factor: Some(1.1),
4244            ..Default::default()
4245        };
4246        assert!(invalid_search_rng_factor_high.validate().is_err());
4247
4248        // Valid: search_rng_factor exactly 1.0
4249        let valid_search_rng_factor = SpannIndexConfig {
4250            search_rng_factor: Some(1.0),
4251            ..Default::default()
4252        };
4253        assert!(valid_search_rng_factor.validate().is_ok());
4254
4255        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
4256        let invalid_search_rng_epsilon_low = SpannIndexConfig {
4257            search_rng_epsilon: Some(4.0),
4258            ..Default::default()
4259        };
4260        assert!(invalid_search_rng_epsilon_low.validate().is_err());
4261
4262        let invalid_search_rng_epsilon_high = SpannIndexConfig {
4263            search_rng_epsilon: Some(11.0),
4264            ..Default::default()
4265        };
4266        assert!(invalid_search_rng_epsilon_high.validate().is_err());
4267
4268        // Valid: search_rng_epsilon within range
4269        let valid_search_rng_epsilon = SpannIndexConfig {
4270            search_rng_epsilon: Some(7.5),
4271            ..Default::default()
4272        };
4273        assert!(valid_search_rng_epsilon.validate().is_ok());
4274
4275        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
4276        let invalid_write_rng_factor_low = SpannIndexConfig {
4277            write_rng_factor: Some(0.9),
4278            ..Default::default()
4279        };
4280        assert!(invalid_write_rng_factor_low.validate().is_err());
4281
4282        let invalid_write_rng_factor_high = SpannIndexConfig {
4283            write_rng_factor: Some(1.1),
4284            ..Default::default()
4285        };
4286        assert!(invalid_write_rng_factor_high.validate().is_err());
4287
4288        // Valid: write_rng_factor exactly 1.0
4289        let valid_write_rng_factor = SpannIndexConfig {
4290            write_rng_factor: Some(1.0),
4291            ..Default::default()
4292        };
4293        assert!(valid_write_rng_factor.validate().is_ok());
4294
4295        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
4296        let invalid_write_rng_epsilon_low = SpannIndexConfig {
4297            write_rng_epsilon: Some(4.0),
4298            ..Default::default()
4299        };
4300        assert!(invalid_write_rng_epsilon_low.validate().is_err());
4301
4302        let invalid_write_rng_epsilon_high = SpannIndexConfig {
4303            write_rng_epsilon: Some(11.0),
4304            ..Default::default()
4305        };
4306        assert!(invalid_write_rng_epsilon_high.validate().is_err());
4307
4308        // Valid: write_rng_epsilon within range
4309        let valid_write_rng_epsilon = SpannIndexConfig {
4310            write_rng_epsilon: Some(7.5),
4311            ..Default::default()
4312        };
4313        assert!(valid_write_rng_epsilon.validate().is_ok());
4314
4315        // Invalid: num_samples_kmeans too large (max 1000)
4316        let invalid_num_samples_kmeans = SpannIndexConfig {
4317            num_samples_kmeans: Some(1500),
4318            ..Default::default()
4319        };
4320        assert!(invalid_num_samples_kmeans.validate().is_err());
4321
4322        // Valid: num_samples_kmeans within range
4323        let valid_num_samples_kmeans = SpannIndexConfig {
4324            num_samples_kmeans: Some(500),
4325            ..Default::default()
4326        };
4327        assert!(valid_num_samples_kmeans.validate().is_ok());
4328
4329        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
4330        let invalid_initial_lambda_high = SpannIndexConfig {
4331            initial_lambda: Some(150.0),
4332            ..Default::default()
4333        };
4334        assert!(invalid_initial_lambda_high.validate().is_err());
4335
4336        let invalid_initial_lambda_low = SpannIndexConfig {
4337            initial_lambda: Some(50.0),
4338            ..Default::default()
4339        };
4340        assert!(invalid_initial_lambda_low.validate().is_err());
4341
4342        // Valid: initial_lambda exactly 100.0
4343        let valid_initial_lambda = SpannIndexConfig {
4344            initial_lambda: Some(100.0),
4345            ..Default::default()
4346        };
4347        assert!(valid_initial_lambda.validate().is_ok());
4348
4349        // Valid: None values should pass validation
4350        let all_none_config = SpannIndexConfig {
4351            ..Default::default()
4352        };
4353        assert!(all_none_config.validate().is_ok());
4354    }
4355
4356    #[test]
4357    fn test_builder_pattern_crud_workflow() {
4358        // Test comprehensive CRUD workflow using the builder pattern
4359
4360        // CREATE: Build a schema with multiple indexes
4361        let schema = Schema::new_default(KnnIndex::Hnsw)
4362            .create_index(
4363                None,
4364                IndexConfig::Vector(VectorIndexConfig {
4365                    space: Some(Space::Cosine),
4366                    embedding_function: None,
4367                    source_key: None,
4368                    hnsw: Some(HnswIndexConfig {
4369                        ef_construction: Some(200),
4370                        max_neighbors: Some(32),
4371                        ef_search: Some(50),
4372                        num_threads: None,
4373                        batch_size: None,
4374                        sync_threshold: None,
4375                        resize_factor: None,
4376                    }),
4377                    spann: None,
4378                }),
4379            )
4380            .expect("vector config should succeed")
4381            .create_index(
4382                Some("category"),
4383                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4384            )
4385            .expect("string inverted on key should succeed")
4386            .create_index(
4387                Some("year"),
4388                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4389            )
4390            .expect("int inverted on key should succeed")
4391            .create_index(
4392                Some("rating"),
4393                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
4394            )
4395            .expect("float inverted on key should succeed")
4396            .create_index(
4397                Some("is_active"),
4398                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
4399            )
4400            .expect("bool inverted on key should succeed");
4401
4402        // READ: Verify the schema was built correctly
4403        // Check vector config
4404        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4405        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4406        assert!(embedding.float_list.is_some());
4407        let vector_index = embedding
4408            .float_list
4409            .as_ref()
4410            .unwrap()
4411            .vector_index
4412            .as_ref()
4413            .unwrap();
4414        assert!(vector_index.enabled);
4415        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4416        assert_eq!(
4417            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
4418            Some(200)
4419        );
4420
4421        // Check per-key indexes
4422        assert!(schema.keys.contains_key("category"));
4423        assert!(schema.keys.contains_key("year"));
4424        assert!(schema.keys.contains_key("rating"));
4425        assert!(schema.keys.contains_key("is_active"));
4426
4427        // Verify category string inverted index
4428        let category = schema.keys.get("category").unwrap();
4429        assert!(category.string.is_some());
4430        let string_idx = category
4431            .string
4432            .as_ref()
4433            .unwrap()
4434            .string_inverted_index
4435            .as_ref()
4436            .unwrap();
4437        assert!(string_idx.enabled);
4438
4439        // Verify year int inverted index
4440        let year = schema.keys.get("year").unwrap();
4441        assert!(year.int.is_some());
4442        let int_idx = year
4443            .int
4444            .as_ref()
4445            .unwrap()
4446            .int_inverted_index
4447            .as_ref()
4448            .unwrap();
4449        assert!(int_idx.enabled);
4450
4451        // UPDATE/DELETE: Disable some indexes
4452        let schema = schema
4453            .delete_index(
4454                Some("category"),
4455                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4456            )
4457            .expect("delete string inverted should succeed")
4458            .delete_index(
4459                Some("year"),
4460                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4461            )
4462            .expect("delete int inverted should succeed");
4463
4464        // VERIFY DELETE: Check that indexes were disabled
4465        let category = schema.keys.get("category").unwrap();
4466        let string_idx = category
4467            .string
4468            .as_ref()
4469            .unwrap()
4470            .string_inverted_index
4471            .as_ref()
4472            .unwrap();
4473        assert!(!string_idx.enabled); // Should be disabled now
4474
4475        let year = schema.keys.get("year").unwrap();
4476        let int_idx = year
4477            .int
4478            .as_ref()
4479            .unwrap()
4480            .int_inverted_index
4481            .as_ref()
4482            .unwrap();
4483        assert!(!int_idx.enabled); // Should be disabled now
4484
4485        // Verify other indexes still enabled
4486        let rating = schema.keys.get("rating").unwrap();
4487        let float_idx = rating
4488            .float
4489            .as_ref()
4490            .unwrap()
4491            .float_inverted_index
4492            .as_ref()
4493            .unwrap();
4494        assert!(float_idx.enabled); // Should still be enabled
4495
4496        let is_active = schema.keys.get("is_active").unwrap();
4497        let bool_idx = is_active
4498            .boolean
4499            .as_ref()
4500            .unwrap()
4501            .bool_inverted_index
4502            .as_ref()
4503            .unwrap();
4504        assert!(bool_idx.enabled); // Should still be enabled
4505    }
4506
4507    #[test]
4508    fn test_builder_create_index_validation_errors() {
4509        // Test all validation errors for create_index() as documented in the docstring:
4510        // - Attempting to create index on special keys (#document, #embedding)
4511        // - Invalid configuration (e.g., vector index on non-embedding key)
4512        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
4513
4514        // Error: Vector index on specific key (must be global)
4515        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4516            Some("my_vectors"),
4517            IndexConfig::Vector(VectorIndexConfig {
4518                space: Some(Space::L2),
4519                embedding_function: None,
4520                source_key: None,
4521                hnsw: None,
4522                spann: None,
4523            }),
4524        );
4525        assert!(result.is_err());
4526        assert!(matches!(
4527            result.unwrap_err(),
4528            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
4529        ));
4530
4531        // Error: FTS index on specific key (must be global)
4532        let result = Schema::new_default(KnnIndex::Hnsw)
4533            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
4534        assert!(result.is_err());
4535        assert!(matches!(
4536            result.unwrap_err(),
4537            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
4538        ));
4539
4540        // Error: Cannot create index on special key #document
4541        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4542            Some(DOCUMENT_KEY),
4543            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4544        );
4545        assert!(result.is_err());
4546        assert!(matches!(
4547            result.unwrap_err(),
4548            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4549        ));
4550
4551        // Error: Cannot create index on special key #embedding
4552        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4553            Some(EMBEDDING_KEY),
4554            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4555        );
4556        assert!(result.is_err());
4557        assert!(matches!(
4558            result.unwrap_err(),
4559            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4560        ));
4561
4562        // Error: Sparse vector without key (must specify key)
4563        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4564            None,
4565            IndexConfig::SparseVector(SparseVectorIndexConfig {
4566                embedding_function: None,
4567                source_key: None,
4568                bm25: None,
4569            }),
4570        );
4571        assert!(result.is_err());
4572        assert!(matches!(
4573            result.unwrap_err(),
4574            SchemaBuilderError::SparseVectorRequiresKey
4575        ));
4576
4577        // Error: Multiple sparse vector indexes (only one allowed per collection)
4578        let result = Schema::new_default(KnnIndex::Hnsw)
4579            .create_index(
4580                Some("sparse1"),
4581                IndexConfig::SparseVector(SparseVectorIndexConfig {
4582                    embedding_function: None,
4583                    source_key: None,
4584                    bm25: None,
4585                }),
4586            )
4587            .expect("first sparse should succeed")
4588            .create_index(
4589                Some("sparse2"),
4590                IndexConfig::SparseVector(SparseVectorIndexConfig {
4591                    embedding_function: None,
4592                    source_key: None,
4593                    bm25: None,
4594                }),
4595            );
4596        assert!(result.is_err());
4597        assert!(matches!(
4598            result.unwrap_err(),
4599            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
4600        ));
4601    }
4602
4603    #[test]
4604    fn test_builder_delete_index_validation_errors() {
4605        // Test all validation errors for delete_index() as documented in the docstring:
4606        // - Attempting to delete index on special keys (#document, #embedding)
4607        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
4608
4609        // Error: Delete on special key #embedding
4610        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4611            Some(EMBEDDING_KEY),
4612            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4613        );
4614        assert!(result.is_err());
4615        assert!(matches!(
4616            result.unwrap_err(),
4617            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4618        ));
4619
4620        // Error: Delete on special key #document
4621        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4622            Some(DOCUMENT_KEY),
4623            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4624        );
4625        assert!(result.is_err());
4626        assert!(matches!(
4627            result.unwrap_err(),
4628            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4629        ));
4630
4631        // Error: Delete vector index (not currently supported)
4632        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4633            None,
4634            IndexConfig::Vector(VectorIndexConfig {
4635                space: None,
4636                embedding_function: None,
4637                source_key: None,
4638                hnsw: None,
4639                spann: None,
4640            }),
4641        );
4642        assert!(result.is_err());
4643        assert!(matches!(
4644            result.unwrap_err(),
4645            SchemaBuilderError::VectorIndexDeletionNotSupported
4646        ));
4647
4648        // Error: Delete FTS index (not currently supported)
4649        let result = Schema::new_default(KnnIndex::Hnsw)
4650            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
4651        assert!(result.is_err());
4652        assert!(matches!(
4653            result.unwrap_err(),
4654            SchemaBuilderError::FtsIndexDeletionNotSupported
4655        ));
4656
4657        // Error: Delete sparse vector index (not currently supported)
4658        let result = Schema::new_default(KnnIndex::Hnsw)
4659            .create_index(
4660                Some("sparse"),
4661                IndexConfig::SparseVector(SparseVectorIndexConfig {
4662                    embedding_function: None,
4663                    source_key: None,
4664                    bm25: None,
4665                }),
4666            )
4667            .expect("create should succeed")
4668            .delete_index(
4669                Some("sparse"),
4670                IndexConfig::SparseVector(SparseVectorIndexConfig {
4671                    embedding_function: None,
4672                    source_key: None,
4673                    bm25: None,
4674                }),
4675            );
4676        assert!(result.is_err());
4677        assert!(matches!(
4678            result.unwrap_err(),
4679            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
4680        ));
4681    }
4682
4683    #[test]
4684    fn test_builder_pattern_chaining() {
4685        // Test complex chaining scenario
4686        let schema = Schema::new_default(KnnIndex::Hnsw)
4687            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
4688            .unwrap()
4689            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4690            .unwrap()
4691            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
4692            .unwrap()
4693            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
4694            .unwrap()
4695            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4696            .unwrap()
4697            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
4698            .unwrap();
4699
4700        // Verify tag1 is enabled
4701        assert!(
4702            schema
4703                .keys
4704                .get("tag1")
4705                .unwrap()
4706                .string
4707                .as_ref()
4708                .unwrap()
4709                .string_inverted_index
4710                .as_ref()
4711                .unwrap()
4712                .enabled
4713        );
4714
4715        // Verify tag2 is disabled
4716        assert!(
4717            !schema
4718                .keys
4719                .get("tag2")
4720                .unwrap()
4721                .string
4722                .as_ref()
4723                .unwrap()
4724                .string_inverted_index
4725                .as_ref()
4726                .unwrap()
4727                .enabled
4728        );
4729
4730        // Verify tag3 is enabled
4731        assert!(
4732            schema
4733                .keys
4734                .get("tag3")
4735                .unwrap()
4736                .string
4737                .as_ref()
4738                .unwrap()
4739                .string_inverted_index
4740                .as_ref()
4741                .unwrap()
4742                .enabled
4743        );
4744
4745        // Verify count is enabled
4746        assert!(
4747            schema
4748                .keys
4749                .get("count")
4750                .unwrap()
4751                .int
4752                .as_ref()
4753                .unwrap()
4754                .int_inverted_index
4755                .as_ref()
4756                .unwrap()
4757                .enabled
4758        );
4759
4760        // Verify score is enabled
4761        assert!(
4762            schema
4763                .keys
4764                .get("score")
4765                .unwrap()
4766                .float
4767                .as_ref()
4768                .unwrap()
4769                .float_inverted_index
4770                .as_ref()
4771                .unwrap()
4772                .enabled
4773        );
4774    }
4775
4776    #[test]
4777    fn test_schema_default_matches_python() {
4778        // Test that Schema::default() matches Python's Schema() behavior exactly
4779        let schema = Schema::default();
4780
4781        // ============================================================================
4782        // VERIFY DEFAULTS (match Python's _initialize_defaults)
4783        // ============================================================================
4784
4785        // String defaults: FTS disabled, string inverted enabled
4786        assert!(schema.defaults.string.is_some());
4787        let string = schema.defaults.string.as_ref().unwrap();
4788        assert!(!string.fts_index.as_ref().unwrap().enabled);
4789        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
4790
4791        // Float list defaults: vector index disabled
4792        assert!(schema.defaults.float_list.is_some());
4793        let float_list = schema.defaults.float_list.as_ref().unwrap();
4794        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
4795        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
4796        assert_eq!(vector_config.space, None); // Python leaves as None
4797        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
4798        assert_eq!(vector_config.spann, None); // Python doesn't specify
4799        assert_eq!(vector_config.source_key, None);
4800
4801        // Sparse vector defaults: disabled
4802        assert!(schema.defaults.sparse_vector.is_some());
4803        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
4804        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
4805
4806        // Int defaults: inverted index enabled
4807        assert!(schema.defaults.int.is_some());
4808        assert!(
4809            schema
4810                .defaults
4811                .int
4812                .as_ref()
4813                .unwrap()
4814                .int_inverted_index
4815                .as_ref()
4816                .unwrap()
4817                .enabled
4818        );
4819
4820        // Float defaults: inverted index enabled
4821        assert!(schema.defaults.float.is_some());
4822        assert!(
4823            schema
4824                .defaults
4825                .float
4826                .as_ref()
4827                .unwrap()
4828                .float_inverted_index
4829                .as_ref()
4830                .unwrap()
4831                .enabled
4832        );
4833
4834        // Bool defaults: inverted index enabled
4835        assert!(schema.defaults.boolean.is_some());
4836        assert!(
4837            schema
4838                .defaults
4839                .boolean
4840                .as_ref()
4841                .unwrap()
4842                .bool_inverted_index
4843                .as_ref()
4844                .unwrap()
4845                .enabled
4846        );
4847
4848        // ============================================================================
4849        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
4850        // ============================================================================
4851
4852        // #document: FTS enabled, string inverted disabled
4853        assert!(schema.keys.contains_key(DOCUMENT_KEY));
4854        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
4855        assert!(doc.string.is_some());
4856        assert!(
4857            doc.string
4858                .as_ref()
4859                .unwrap()
4860                .fts_index
4861                .as_ref()
4862                .unwrap()
4863                .enabled
4864        );
4865        assert!(
4866            !doc.string
4867                .as_ref()
4868                .unwrap()
4869                .string_inverted_index
4870                .as_ref()
4871                .unwrap()
4872                .enabled
4873        );
4874
4875        // #embedding: vector index enabled with source_key=#document
4876        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4877        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4878        assert!(embedding.float_list.is_some());
4879        let vec_idx = embedding
4880            .float_list
4881            .as_ref()
4882            .unwrap()
4883            .vector_index
4884            .as_ref()
4885            .unwrap();
4886        assert!(vec_idx.enabled);
4887        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
4888        assert_eq!(vec_idx.config.space, None); // Python leaves as None
4889        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
4890        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
4891
4892        // Verify only these two special keys exist
4893        assert_eq!(schema.keys.len(), 2);
4894    }
4895
4896    #[test]
4897    fn test_schema_default_works_with_builder() {
4898        // Test that Schema::default() can be used with builder pattern
4899        let schema = Schema::default()
4900            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
4901            .expect("should succeed");
4902
4903        // Verify the new index was added
4904        assert!(schema.keys.contains_key("category"));
4905        assert!(schema.keys.contains_key(DOCUMENT_KEY));
4906        assert!(schema.keys.contains_key(EMBEDDING_KEY));
4907        assert_eq!(schema.keys.len(), 3);
4908    }
4909}