chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11    EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12    UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18    default_batch_size, default_construction_ef, default_construction_ef_spann,
19    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
20    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
21    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
22    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
23    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
24    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
25    HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
26    InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
27};
28
29impl ChromaError for SchemaError {
30    fn code(&self) -> ErrorCodes {
31        match self {
32            // Internal errors (500)
33            // These indicate system/internal issues during schema operations
34            SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
35            SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
36            // DefaultsMismatch and ConfigurationConflict only occur during schema merge()
37            // which happens internally during compaction, not from user input
38            SchemaError::DefaultsMismatch => ErrorCodes::Internal,
39            SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
40
41            // User/External errors (400)
42            // These indicate user-provided invalid input
43            SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
44            SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
45            SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
46            SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
47            SchemaError::Builder(e) => e.code(),
48        }
49    }
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaError {
54    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
55    MissingIndexConfiguration { key: String, value_type: String },
56    #[error("Schema reconciliation failed: {reason}")]
57    InvalidSchema { reason: String },
58    #[error("Cannot set both collection config and schema simultaneously")]
59    ConfigAndSchemaConflict,
60    #[error("Cannot merge schemas with differing defaults")]
61    DefaultsMismatch,
62    #[error("Conflicting configuration for {context}")]
63    ConfigurationConflict { context: String },
64    #[error("Invalid HNSW configuration: {0}")]
65    InvalidHnswConfig(validator::ValidationErrors),
66    #[error("Invalid SPANN configuration: {0}")]
67    InvalidSpannConfig(validator::ValidationErrors),
68    #[error("Invalid schema input: {reason}")]
69    InvalidUserInput { reason: String },
70    #[error(transparent)]
71    Builder(#[from] SchemaBuilderError),
72}
73
74#[derive(Debug, Error)]
75pub enum SchemaBuilderError {
76    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
77    VectorIndexMustBeGlobal { key: String },
78    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
79    FtsIndexMustBeGlobal { key: String },
80    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
81    SpecialKeyModificationNotAllowed { key: String },
82    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
83    SparseVectorRequiresKey,
84    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
85    MultipleSparseVectorIndexes { existing_key: String },
86    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
87    VectorIndexDeletionNotSupported,
88    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
89    FtsIndexDeletionNotSupported,
90    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91    SparseVectorIndexDeletionNotSupported,
92}
93
94#[derive(Debug, Error)]
95pub enum FilterValidationError {
96    #[error(
97        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
98    )]
99    IndexingDisabled {
100        key: String,
101        value_type: MetadataValueType,
102    },
103    #[error(transparent)]
104    Schema(#[from] SchemaError),
105}
106
107impl ChromaError for SchemaBuilderError {
108    fn code(&self) -> ErrorCodes {
109        ErrorCodes::InvalidArgument
110    }
111}
112
113impl ChromaError for FilterValidationError {
114    fn code(&self) -> ErrorCodes {
115        match self {
116            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
117            FilterValidationError::Schema(_) => ErrorCodes::Internal,
118        }
119    }
120}
121
122// ============================================================================
123// SCHEMA CONSTANTS
124// ============================================================================
125// These constants must match the Python constants in chromadb/api/types.py
126
127// Value type name constants
128pub const STRING_VALUE_NAME: &str = "string";
129pub const INT_VALUE_NAME: &str = "int";
130pub const BOOL_VALUE_NAME: &str = "bool";
131pub const FLOAT_VALUE_NAME: &str = "float";
132pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
133pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
134
135// Index type name constants
136pub const FTS_INDEX_NAME: &str = "fts_index";
137pub const VECTOR_INDEX_NAME: &str = "vector_index";
138pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
139pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
140pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
141pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
142pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
143
144// Special metadata keys - must match Python constants in chromadb/api/types.py
145pub const DOCUMENT_KEY: &str = "#document";
146pub const EMBEDDING_KEY: &str = "#embedding";
147
148// Static regex pattern to validate CMEK for GCP
149static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
150    Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
151        .expect("The CMEK pattern for GCP should be valid")
152});
153
154/// Customer-managed encryption key for storage encryption.
155///
156/// CMEK allows you to use your own encryption keys managed by cloud providers'
157/// key management services (KMS) instead of default provider-managed keys.
158#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Cmek {
161    /// Google Cloud Platform KMS key resource name.
162    ///
163    /// Format: `projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{cryptoKey}`
164    Gcp(Arc<String>),
165}
166
167impl Cmek {
168    /// Create a GCP CMEK from a KMS resource name
169    ///
170    /// # Example
171    /// ```
172    /// use chroma_types::Cmek;
173    /// let cmek = Cmek::gcp(
174    ///     "projects/my-project/locations/us-central1/keyRings/my-ring/cryptoKeys/my-key".to_string()
175    /// );
176    /// ```
177    pub fn gcp(resource: String) -> Self {
178        Cmek::Gcp(Arc::new(resource))
179    }
180
181    /// Validates that the CMEK resource name matches the expected pattern.
182    ///
183    /// Returns `true` if the resource name is well-formed according to the
184    /// provider's format requirements. Does not verify that the key exists
185    /// or is accessible.
186    pub fn validate_pattern(&self) -> bool {
187        match self {
188            Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
189        }
190    }
191}
192
193impl TryFrom<chroma_proto::Cmek> for Cmek {
194    type Error = ConversionError;
195
196    fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
197        match proto.provider {
198            Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
199            None => Err(ConversionError::DecodeError),
200        }
201    }
202}
203
204impl From<Cmek> for chroma_proto::Cmek {
205    fn from(cmek: Cmek) -> Self {
206        match cmek {
207            Cmek::Gcp(resource) => chroma_proto::Cmek {
208                provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
209            },
210        }
211    }
212}
213
214// ============================================================================
215// SCHEMA STRUCTURES
216// ============================================================================
217
218/// Schema representation for collection index configurations
219///
220/// This represents the server-side schema structure used for index management
221
222#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
223#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
224pub struct Schema {
225    /// Default index configurations for each value type
226    pub defaults: ValueTypes,
227    /// Key-specific index overrides
228    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
229    #[serde(rename = "keys", alias = "key_overrides")]
230    pub keys: HashMap<String, ValueTypes>,
231    /// Customer-managed encryption key for collection data
232    #[serde(skip_serializing_if = "Option::is_none")]
233    #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
234    pub cmek: Option<Cmek>,
235    /// ID of the attached function that created this output collection (if applicable)
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub source_attached_function_id: Option<String>,
238}
239
240impl Schema {
241    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
242        if let Some(vector_update) = &configuration.vector_index {
243            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
244                Self::apply_vector_index_update(default_vector_index, vector_update);
245            }
246            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
247                Self::apply_vector_index_update(embedding_vector_index, vector_update);
248            }
249        }
250
251        if let Some(embedding_function) = configuration.embedding_function.as_ref() {
252            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
253                default_vector_index.config.embedding_function = Some(embedding_function.clone());
254            }
255            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
256                embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
257            }
258        }
259    }
260
261    fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
262        self.defaults
263            .float_list
264            .as_mut()
265            .and_then(|float_list| float_list.vector_index.as_mut())
266    }
267
268    fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
269        self.keys
270            .get_mut(EMBEDDING_KEY)
271            .and_then(|value_types| value_types.float_list.as_mut())
272            .and_then(|float_list| float_list.vector_index.as_mut())
273    }
274
275    fn apply_vector_index_update(
276        vector_index: &mut VectorIndexType,
277        update: &UpdateVectorIndexConfiguration,
278    ) {
279        match update {
280            UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
281                if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
282                    if let Some(ef_search) = hnsw_update.ef_search {
283                        hnsw_config.ef_search = Some(ef_search);
284                    }
285                    if let Some(max_neighbors) = hnsw_update.max_neighbors {
286                        hnsw_config.max_neighbors = Some(max_neighbors);
287                    }
288                    if let Some(num_threads) = hnsw_update.num_threads {
289                        hnsw_config.num_threads = Some(num_threads);
290                    }
291                    if let Some(resize_factor) = hnsw_update.resize_factor {
292                        hnsw_config.resize_factor = Some(resize_factor);
293                    }
294                    if let Some(sync_threshold) = hnsw_update.sync_threshold {
295                        hnsw_config.sync_threshold = Some(sync_threshold);
296                    }
297                    if let Some(batch_size) = hnsw_update.batch_size {
298                        hnsw_config.batch_size = Some(batch_size);
299                    }
300                }
301            }
302            UpdateVectorIndexConfiguration::Hnsw(None) => {}
303            UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
304                if let Some(spann_config) = vector_index.config.spann.as_mut() {
305                    if let Some(search_nprobe) = spann_update.search_nprobe {
306                        spann_config.search_nprobe = Some(search_nprobe);
307                    }
308                    if let Some(ef_search) = spann_update.ef_search {
309                        spann_config.ef_search = Some(ef_search);
310                    }
311                }
312            }
313            UpdateVectorIndexConfiguration::Spann(None) => {}
314        }
315    }
316
317    pub fn is_sparse_index_enabled(&self) -> bool {
318        let defaults_enabled = self
319            .defaults
320            .sparse_vector
321            .as_ref()
322            .and_then(|sv| sv.sparse_vector_index.as_ref())
323            .is_some_and(|idx| idx.enabled);
324        let key_enabled = self.keys.values().any(|value_types| {
325            value_types
326                .sparse_vector
327                .as_ref()
328                .and_then(|sv| sv.sparse_vector_index.as_ref())
329                .is_some_and(|idx| idx.enabled)
330        });
331        defaults_enabled || key_enabled
332    }
333}
334
335impl Default for Schema {
336    /// Create a default Schema that matches Python's behavior exactly.
337    ///
338    /// Python creates a Schema with:
339    /// - All inverted indexes enabled by default (string, int, float, bool)
340    /// - Vector and FTS indexes disabled in defaults
341    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
342    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
343    ///
344    /// # Examples
345    /// ```
346    /// use chroma_types::Schema;
347    ///
348    /// let schema = Schema::default();
349    /// assert!(schema.keys.contains_key("#document"));
350    /// assert!(schema.keys.contains_key("#embedding"));
351    /// ```
352    fn default() -> Self {
353        // Initialize defaults - match Python's _initialize_defaults()
354        let defaults = ValueTypes {
355            string: Some(StringValueType {
356                fts_index: Some(FtsIndexType {
357                    enabled: false,
358                    config: FtsIndexConfig {},
359                }),
360                string_inverted_index: Some(StringInvertedIndexType {
361                    enabled: true,
362                    config: StringInvertedIndexConfig {},
363                }),
364            }),
365            float_list: Some(FloatListValueType {
366                vector_index: Some(VectorIndexType {
367                    enabled: false,
368                    config: VectorIndexConfig {
369                        space: None, // Python leaves as None (resolved on serialization)
370                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
371                        source_key: None,
372                        hnsw: None,  // Python doesn't specify
373                        spann: None, // Python doesn't specify
374                    },
375                }),
376            }),
377            sparse_vector: Some(SparseVectorValueType {
378                sparse_vector_index: Some(SparseVectorIndexType {
379                    enabled: false,
380                    config: SparseVectorIndexConfig {
381                        embedding_function: None,
382                        source_key: None,
383                        bm25: None,
384                    },
385                }),
386            }),
387            int: Some(IntValueType {
388                int_inverted_index: Some(IntInvertedIndexType {
389                    enabled: true,
390                    config: IntInvertedIndexConfig {},
391                }),
392            }),
393            float: Some(FloatValueType {
394                float_inverted_index: Some(FloatInvertedIndexType {
395                    enabled: true,
396                    config: FloatInvertedIndexConfig {},
397                }),
398            }),
399            boolean: Some(BoolValueType {
400                bool_inverted_index: Some(BoolInvertedIndexType {
401                    enabled: true,
402                    config: BoolInvertedIndexConfig {},
403                }),
404            }),
405        };
406
407        // Initialize key-specific overrides - match Python's _initialize_keys()
408        let mut keys = HashMap::new();
409
410        // #document: FTS enabled, string inverted disabled
411        keys.insert(
412            DOCUMENT_KEY.to_string(),
413            ValueTypes {
414                string: Some(StringValueType {
415                    fts_index: Some(FtsIndexType {
416                        enabled: true,
417                        config: FtsIndexConfig {},
418                    }),
419                    string_inverted_index: Some(StringInvertedIndexType {
420                        enabled: false,
421                        config: StringInvertedIndexConfig {},
422                    }),
423                }),
424                ..Default::default()
425            },
426        );
427
428        // #embedding: Vector index enabled with source_key=#document
429        keys.insert(
430            EMBEDDING_KEY.to_string(),
431            ValueTypes {
432                float_list: Some(FloatListValueType {
433                    vector_index: Some(VectorIndexType {
434                        enabled: true,
435                        config: VectorIndexConfig {
436                            space: None, // Python leaves as None (resolved on serialization)
437                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
438                            source_key: Some(DOCUMENT_KEY.to_string()),
439                            hnsw: None,  // Python doesn't specify
440                            spann: None, // Python doesn't specify
441                        },
442                    }),
443                }),
444                ..Default::default()
445            },
446        );
447
448        Schema {
449            defaults,
450            keys,
451            cmek: None,
452            source_attached_function_id: None,
453        }
454    }
455}
456
457pub fn is_embedding_function_default(
458    embedding_function: &Option<EmbeddingFunctionConfiguration>,
459) -> bool {
460    match embedding_function {
461        None => true,
462        Some(embedding_function) => embedding_function.is_default(),
463    }
464}
465
466/// Check if space is default (None means default, or if present, should be default space)
467pub fn is_space_default(space: &Option<Space>) -> bool {
468    match space {
469        None => true,                     // None means default
470        Some(s) => *s == default_space(), // If present, check if it's the default space
471    }
472}
473
474/// Check if HNSW config is default
475pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
476    hnsw_config.ef_construction == Some(default_construction_ef())
477        && hnsw_config.ef_search == Some(default_search_ef())
478        && hnsw_config.max_neighbors == Some(default_m())
479        && hnsw_config.num_threads == Some(default_num_threads())
480        && hnsw_config.batch_size == Some(default_batch_size())
481        && hnsw_config.sync_threshold == Some(default_sync_threshold())
482        && hnsw_config.resize_factor == Some(default_resize_factor())
483}
484
485// ============================================================================
486// NEW STRONGLY-TYPED SCHEMA STRUCTURES
487// ============================================================================
488
489/// Strongly-typed value type configurations
490/// Contains optional configurations for each supported value type
491#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
492#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
493pub struct ValueTypes {
494    #[serde(
495        rename = "string",
496        alias = "#string",
497        skip_serializing_if = "Option::is_none"
498    )] // STRING_VALUE_NAME
499    pub string: Option<StringValueType>,
500
501    #[serde(
502        rename = "float_list",
503        alias = "#float_list",
504        skip_serializing_if = "Option::is_none"
505    )]
506    // FLOAT_LIST_VALUE_NAME
507    pub float_list: Option<FloatListValueType>,
508
509    #[serde(
510        rename = "sparse_vector",
511        alias = "#sparse_vector",
512        skip_serializing_if = "Option::is_none"
513    )]
514    // SPARSE_VECTOR_VALUE_NAME
515    pub sparse_vector: Option<SparseVectorValueType>,
516
517    #[serde(
518        rename = "int",
519        alias = "#int",
520        skip_serializing_if = "Option::is_none"
521    )] // INT_VALUE_NAME
522    pub int: Option<IntValueType>,
523
524    #[serde(
525        rename = "float",
526        alias = "#float",
527        skip_serializing_if = "Option::is_none"
528    )] // FLOAT_VALUE_NAME
529    pub float: Option<FloatValueType>,
530
531    #[serde(
532        rename = "bool",
533        alias = "#bool",
534        skip_serializing_if = "Option::is_none"
535    )] // BOOL_VALUE_NAME
536    pub boolean: Option<BoolValueType>,
537}
538
539/// String value type index configurations
540#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
541#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
542pub struct StringValueType {
543    #[serde(
544        rename = "fts_index",
545        alias = "$fts_index",
546        skip_serializing_if = "Option::is_none"
547    )] // FTS_INDEX_NAME
548    pub fts_index: Option<FtsIndexType>,
549
550    #[serde(
551        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
552        alias = "$string_inverted_index",
553        skip_serializing_if = "Option::is_none"
554    )]
555    pub string_inverted_index: Option<StringInvertedIndexType>,
556}
557
558/// Float list value type index configurations (for vectors)
559#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
560#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
561pub struct FloatListValueType {
562    #[serde(
563        rename = "vector_index",
564        alias = "$vector_index",
565        skip_serializing_if = "Option::is_none"
566    )] // VECTOR_INDEX_NAME
567    pub vector_index: Option<VectorIndexType>,
568}
569
570/// Sparse vector value type index configurations
571#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
572#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
573pub struct SparseVectorValueType {
574    #[serde(
575        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
576        alias = "$sparse_vector_index",
577        skip_serializing_if = "Option::is_none"
578    )]
579    pub sparse_vector_index: Option<SparseVectorIndexType>,
580}
581
582/// Integer value type index configurations
583#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
584#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
585pub struct IntValueType {
586    #[serde(
587        rename = "int_inverted_index",
588        alias = "$int_inverted_index",
589        skip_serializing_if = "Option::is_none"
590    )]
591    // INT_INVERTED_INDEX_NAME
592    pub int_inverted_index: Option<IntInvertedIndexType>,
593}
594
595/// Float value type index configurations
596#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
597#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
598pub struct FloatValueType {
599    #[serde(
600        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
601        alias = "$float_inverted_index",
602        skip_serializing_if = "Option::is_none"
603    )]
604    pub float_inverted_index: Option<FloatInvertedIndexType>,
605}
606
607/// Boolean value type index configurations
608#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
609#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
610pub struct BoolValueType {
611    #[serde(
612        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
613        alias = "$bool_inverted_index",
614        skip_serializing_if = "Option::is_none"
615    )]
616    pub bool_inverted_index: Option<BoolInvertedIndexType>,
617}
618
619// Individual index type structs with enabled status and config
620#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
621#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
622pub struct FtsIndexType {
623    pub enabled: bool,
624    pub config: FtsIndexConfig,
625}
626
627#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
628#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
629pub struct VectorIndexType {
630    pub enabled: bool,
631    pub config: VectorIndexConfig,
632}
633
634#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
635#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
636pub struct SparseVectorIndexType {
637    pub enabled: bool,
638    pub config: SparseVectorIndexConfig,
639}
640
641#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
642#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
643pub struct StringInvertedIndexType {
644    pub enabled: bool,
645    pub config: StringInvertedIndexConfig,
646}
647
648#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
649#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
650pub struct IntInvertedIndexType {
651    pub enabled: bool,
652    pub config: IntInvertedIndexConfig,
653}
654
655#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
656#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
657pub struct FloatInvertedIndexType {
658    pub enabled: bool,
659    pub config: FloatInvertedIndexConfig,
660}
661
662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
664pub struct BoolInvertedIndexType {
665    pub enabled: bool,
666    pub config: BoolInvertedIndexConfig,
667}
668
669impl Schema {
670    /// Create a new Schema with strongly-typed default configurations
671    pub fn new_default(default_knn_index: KnnIndex) -> Self {
672        // Vector index disabled on all keys except #embedding.
673        let vector_config = VectorIndexType {
674            enabled: false,
675            config: VectorIndexConfig {
676                space: Some(default_space()),
677                embedding_function: None,
678                source_key: None,
679                hnsw: match default_knn_index {
680                    KnnIndex::Hnsw => Some(HnswIndexConfig {
681                        ef_construction: Some(default_construction_ef()),
682                        max_neighbors: Some(default_m()),
683                        ef_search: Some(default_search_ef()),
684                        num_threads: Some(default_num_threads()),
685                        batch_size: Some(default_batch_size()),
686                        sync_threshold: Some(default_sync_threshold()),
687                        resize_factor: Some(default_resize_factor()),
688                    }),
689                    KnnIndex::Spann => None,
690                },
691                spann: match default_knn_index {
692                    KnnIndex::Hnsw => None,
693                    KnnIndex::Spann => Some(SpannIndexConfig {
694                        search_nprobe: Some(default_search_nprobe()),
695                        search_rng_factor: Some(default_search_rng_factor()),
696                        search_rng_epsilon: Some(default_search_rng_epsilon()),
697                        nreplica_count: Some(default_nreplica_count()),
698                        write_rng_factor: Some(default_write_rng_factor()),
699                        write_rng_epsilon: Some(default_write_rng_epsilon()),
700                        split_threshold: Some(default_split_threshold()),
701                        num_samples_kmeans: Some(default_num_samples_kmeans()),
702                        initial_lambda: Some(default_initial_lambda()),
703                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
704                        merge_threshold: Some(default_merge_threshold()),
705                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
706                        write_nprobe: Some(default_write_nprobe()),
707                        ef_construction: Some(default_construction_ef_spann()),
708                        ef_search: Some(default_search_ef_spann()),
709                        max_neighbors: Some(default_m_spann()),
710                    }),
711                },
712            },
713        };
714
715        // Initialize defaults struct directly instead of using Default::default() + field assignments
716        let defaults = ValueTypes {
717            string: Some(StringValueType {
718                string_inverted_index: Some(StringInvertedIndexType {
719                    enabled: true,
720                    config: StringInvertedIndexConfig {},
721                }),
722                fts_index: Some(FtsIndexType {
723                    enabled: false,
724                    config: FtsIndexConfig {},
725                }),
726            }),
727            float: Some(FloatValueType {
728                float_inverted_index: Some(FloatInvertedIndexType {
729                    enabled: true,
730                    config: FloatInvertedIndexConfig {},
731                }),
732            }),
733            int: Some(IntValueType {
734                int_inverted_index: Some(IntInvertedIndexType {
735                    enabled: true,
736                    config: IntInvertedIndexConfig {},
737                }),
738            }),
739            boolean: Some(BoolValueType {
740                bool_inverted_index: Some(BoolInvertedIndexType {
741                    enabled: true,
742                    config: BoolInvertedIndexConfig {},
743                }),
744            }),
745            float_list: Some(FloatListValueType {
746                vector_index: Some(vector_config),
747            }),
748            sparse_vector: Some(SparseVectorValueType {
749                sparse_vector_index: Some(SparseVectorIndexType {
750                    enabled: false,
751                    config: SparseVectorIndexConfig {
752                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
753                        source_key: None,
754                        bm25: Some(false),
755                    },
756                }),
757            }),
758        };
759
760        // Set up key overrides
761        let mut keys = HashMap::new();
762
763        // Enable vector index for #embedding.
764        let embedding_defaults = ValueTypes {
765            float_list: Some(FloatListValueType {
766                vector_index: Some(VectorIndexType {
767                    enabled: true,
768                    config: VectorIndexConfig {
769                        space: Some(default_space()),
770                        embedding_function: None,
771                        source_key: Some(DOCUMENT_KEY.to_string()),
772                        hnsw: match default_knn_index {
773                            KnnIndex::Hnsw => Some(HnswIndexConfig {
774                                ef_construction: Some(default_construction_ef()),
775                                max_neighbors: Some(default_m()),
776                                ef_search: Some(default_search_ef()),
777                                num_threads: Some(default_num_threads()),
778                                batch_size: Some(default_batch_size()),
779                                sync_threshold: Some(default_sync_threshold()),
780                                resize_factor: Some(default_resize_factor()),
781                            }),
782                            KnnIndex::Spann => None,
783                        },
784                        spann: match default_knn_index {
785                            KnnIndex::Hnsw => None,
786                            KnnIndex::Spann => Some(SpannIndexConfig {
787                                search_nprobe: Some(default_search_nprobe()),
788                                search_rng_factor: Some(default_search_rng_factor()),
789                                search_rng_epsilon: Some(default_search_rng_epsilon()),
790                                nreplica_count: Some(default_nreplica_count()),
791                                write_rng_factor: Some(default_write_rng_factor()),
792                                write_rng_epsilon: Some(default_write_rng_epsilon()),
793                                split_threshold: Some(default_split_threshold()),
794                                num_samples_kmeans: Some(default_num_samples_kmeans()),
795                                initial_lambda: Some(default_initial_lambda()),
796                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
797                                merge_threshold: Some(default_merge_threshold()),
798                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
799                                write_nprobe: Some(default_write_nprobe()),
800                                ef_construction: Some(default_construction_ef_spann()),
801                                ef_search: Some(default_search_ef_spann()),
802                                max_neighbors: Some(default_m_spann()),
803                            }),
804                        },
805                    },
806                }),
807            }),
808            ..Default::default()
809        };
810        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
811
812        // Document defaults - initialize directly instead of Default::default() + field assignment
813        let document_defaults = ValueTypes {
814            string: Some(StringValueType {
815                fts_index: Some(FtsIndexType {
816                    enabled: true,
817                    config: FtsIndexConfig {},
818                }),
819                string_inverted_index: Some(StringInvertedIndexType {
820                    enabled: false,
821                    config: StringInvertedIndexConfig {},
822                }),
823            }),
824            ..Default::default()
825        };
826        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
827
828        Schema {
829            defaults,
830            keys,
831            cmek: None,
832            source_attached_function_id: None,
833        }
834    }
835
836    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
837        let to_internal = |vector_index: &VectorIndexType| {
838            let space = vector_index.config.space.clone();
839            vector_index
840                .config
841                .spann
842                .clone()
843                .map(|config| (space.as_ref(), &config).into())
844        };
845
846        self.keys
847            .get(EMBEDDING_KEY)
848            .and_then(|value_types| value_types.float_list.as_ref())
849            .and_then(|float_list| float_list.vector_index.as_ref())
850            .and_then(to_internal)
851            .or_else(|| {
852                self.defaults
853                    .float_list
854                    .as_ref()
855                    .and_then(|float_list| float_list.vector_index.as_ref())
856                    .and_then(to_internal)
857            })
858    }
859
860    pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
861        let to_internal = |vector_index: &VectorIndexType| {
862            if vector_index.config.spann.is_some() {
863                return None;
864            }
865            let space = vector_index.config.space.as_ref();
866            let hnsw_config = vector_index.config.hnsw.as_ref();
867            Some((space, hnsw_config).into())
868        };
869
870        self.keys
871            .get(EMBEDDING_KEY)
872            .and_then(|value_types| value_types.float_list.as_ref())
873            .and_then(|float_list| float_list.vector_index.as_ref())
874            .and_then(to_internal)
875            .or_else(|| {
876                self.defaults
877                    .float_list
878                    .as_ref()
879                    .and_then(|float_list| float_list.vector_index.as_ref())
880                    .and_then(to_internal)
881            })
882    }
883
884    pub fn get_internal_hnsw_config_with_legacy_fallback(
885        &self,
886        segment: &Segment,
887    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
888        if let Some(config) = self.get_internal_hnsw_config() {
889            let config_from_metadata =
890                InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
891
892            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
893                return Ok(Some(config_from_metadata));
894            }
895
896            return Ok(Some(config));
897        }
898
899        Ok(None)
900    }
901
902    /// Reconcile user-provided schema with system defaults
903    ///
904    /// This method merges user configurations with system defaults, ensuring that:
905    /// - User overrides take precedence over defaults
906    /// - Missing user configurations fall back to system defaults
907    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
908    pub fn reconcile_with_defaults(
909        user_schema: Option<&Schema>,
910        knn_index: KnnIndex,
911    ) -> Result<Self, SchemaError> {
912        let default_schema = Schema::new_default(knn_index);
913
914        match user_schema {
915            Some(user) => {
916                // Merge defaults with user overrides
917                let merged_defaults =
918                    Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
919
920                // Merge key overrides
921                let mut merged_keys = default_schema.keys.clone();
922                for (key, user_value_types) in &user.keys {
923                    if let Some(default_value_types) = merged_keys.get(key) {
924                        // Merge with existing default key override
925                        let merged_value_types = Self::merge_value_types(
926                            default_value_types,
927                            user_value_types,
928                            knn_index,
929                        )?;
930                        merged_keys.insert(key.clone(), merged_value_types);
931                    } else {
932                        // New key override from user
933                        merged_keys.insert(key.clone(), user_value_types.clone());
934                    }
935                }
936
937                Ok(Schema {
938                    defaults: merged_defaults,
939                    keys: merged_keys,
940                    cmek: user.cmek.clone().or(default_schema.cmek.clone()),
941                    source_attached_function_id: user
942                        .source_attached_function_id
943                        .clone()
944                        .or(default_schema.source_attached_function_id.clone()),
945                })
946            }
947            None => Ok(default_schema),
948        }
949    }
950
951    /// Merge two schemas together, combining key overrides when possible.
952    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
953        if self.defaults != other.defaults {
954            return Err(SchemaError::DefaultsMismatch);
955        }
956
957        let mut keys = self.keys.clone();
958
959        for (key, other_value_types) in &other.keys {
960            if let Some(existing) = keys.get(key).cloned() {
961                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
962                keys.insert(key.clone(), merged);
963            } else {
964                keys.insert(key.clone(), other_value_types.clone());
965            }
966        }
967
968        Ok(Schema {
969            defaults: self.defaults.clone(),
970            keys,
971            cmek: other.cmek.clone().or(self.cmek.clone()),
972            source_attached_function_id: other
973                .source_attached_function_id
974                .clone()
975                .or(self.source_attached_function_id.clone()),
976        })
977    }
978
979    fn merge_override_value_types(
980        key: &str,
981        left: &ValueTypes,
982        right: &ValueTypes,
983    ) -> Result<ValueTypes, SchemaError> {
984        Ok(ValueTypes {
985            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
986            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
987            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
988            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
989            float_list: Self::merge_float_list_override(
990                key,
991                left.float_list.as_ref(),
992                right.float_list.as_ref(),
993            )?,
994            sparse_vector: Self::merge_sparse_vector_override(
995                key,
996                left.sparse_vector.as_ref(),
997                right.sparse_vector.as_ref(),
998            )?,
999        })
1000    }
1001
1002    fn merge_string_override(
1003        key: &str,
1004        left: Option<&StringValueType>,
1005        right: Option<&StringValueType>,
1006    ) -> Result<Option<StringValueType>, SchemaError> {
1007        match (left, right) {
1008            (Some(l), Some(r)) => Ok(Some(StringValueType {
1009                string_inverted_index: Self::merge_index_or_error(
1010                    l.string_inverted_index.as_ref(),
1011                    r.string_inverted_index.as_ref(),
1012                    &format!("key '{key}' string.string_inverted_index"),
1013                )?,
1014                fts_index: Self::merge_index_or_error(
1015                    l.fts_index.as_ref(),
1016                    r.fts_index.as_ref(),
1017                    &format!("key '{key}' string.fts_index"),
1018                )?,
1019            })),
1020            (Some(l), None) => Ok(Some(l.clone())),
1021            (None, Some(r)) => Ok(Some(r.clone())),
1022            (None, None) => Ok(None),
1023        }
1024    }
1025
1026    fn merge_float_override(
1027        key: &str,
1028        left: Option<&FloatValueType>,
1029        right: Option<&FloatValueType>,
1030    ) -> Result<Option<FloatValueType>, SchemaError> {
1031        match (left, right) {
1032            (Some(l), Some(r)) => Ok(Some(FloatValueType {
1033                float_inverted_index: Self::merge_index_or_error(
1034                    l.float_inverted_index.as_ref(),
1035                    r.float_inverted_index.as_ref(),
1036                    &format!("key '{key}' float.float_inverted_index"),
1037                )?,
1038            })),
1039            (Some(l), None) => Ok(Some(l.clone())),
1040            (None, Some(r)) => Ok(Some(r.clone())),
1041            (None, None) => Ok(None),
1042        }
1043    }
1044
1045    fn merge_int_override(
1046        key: &str,
1047        left: Option<&IntValueType>,
1048        right: Option<&IntValueType>,
1049    ) -> Result<Option<IntValueType>, SchemaError> {
1050        match (left, right) {
1051            (Some(l), Some(r)) => Ok(Some(IntValueType {
1052                int_inverted_index: Self::merge_index_or_error(
1053                    l.int_inverted_index.as_ref(),
1054                    r.int_inverted_index.as_ref(),
1055                    &format!("key '{key}' int.int_inverted_index"),
1056                )?,
1057            })),
1058            (Some(l), None) => Ok(Some(l.clone())),
1059            (None, Some(r)) => Ok(Some(r.clone())),
1060            (None, None) => Ok(None),
1061        }
1062    }
1063
1064    fn merge_bool_override(
1065        key: &str,
1066        left: Option<&BoolValueType>,
1067        right: Option<&BoolValueType>,
1068    ) -> Result<Option<BoolValueType>, SchemaError> {
1069        match (left, right) {
1070            (Some(l), Some(r)) => Ok(Some(BoolValueType {
1071                bool_inverted_index: Self::merge_index_or_error(
1072                    l.bool_inverted_index.as_ref(),
1073                    r.bool_inverted_index.as_ref(),
1074                    &format!("key '{key}' bool.bool_inverted_index"),
1075                )?,
1076            })),
1077            (Some(l), None) => Ok(Some(l.clone())),
1078            (None, Some(r)) => Ok(Some(r.clone())),
1079            (None, None) => Ok(None),
1080        }
1081    }
1082
1083    fn merge_float_list_override(
1084        key: &str,
1085        left: Option<&FloatListValueType>,
1086        right: Option<&FloatListValueType>,
1087    ) -> Result<Option<FloatListValueType>, SchemaError> {
1088        match (left, right) {
1089            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1090                vector_index: Self::merge_index_or_error(
1091                    l.vector_index.as_ref(),
1092                    r.vector_index.as_ref(),
1093                    &format!("key '{key}' float_list.vector_index"),
1094                )?,
1095            })),
1096            (Some(l), None) => Ok(Some(l.clone())),
1097            (None, Some(r)) => Ok(Some(r.clone())),
1098            (None, None) => Ok(None),
1099        }
1100    }
1101
1102    fn merge_sparse_vector_override(
1103        key: &str,
1104        left: Option<&SparseVectorValueType>,
1105        right: Option<&SparseVectorValueType>,
1106    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1107        match (left, right) {
1108            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1109                sparse_vector_index: Self::merge_index_or_error(
1110                    l.sparse_vector_index.as_ref(),
1111                    r.sparse_vector_index.as_ref(),
1112                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
1113                )?,
1114            })),
1115            (Some(l), None) => Ok(Some(l.clone())),
1116            (None, Some(r)) => Ok(Some(r.clone())),
1117            (None, None) => Ok(None),
1118        }
1119    }
1120
1121    fn merge_index_or_error<T: Clone + PartialEq>(
1122        left: Option<&T>,
1123        right: Option<&T>,
1124        context: &str,
1125    ) -> Result<Option<T>, SchemaError> {
1126        match (left, right) {
1127            (Some(l), Some(r)) => {
1128                if l == r {
1129                    Ok(Some(l.clone()))
1130                } else {
1131                    Err(SchemaError::ConfigurationConflict {
1132                        context: context.to_string(),
1133                    })
1134                }
1135            }
1136            (Some(l), None) => Ok(Some(l.clone())),
1137            (None, Some(r)) => Ok(Some(r.clone())),
1138            (None, None) => Ok(None),
1139        }
1140    }
1141
1142    /// Merge two ValueTypes with field-level merging
1143    /// User values take precedence over default values
1144    fn merge_value_types(
1145        default: &ValueTypes,
1146        user: &ValueTypes,
1147        knn_index: KnnIndex,
1148    ) -> Result<ValueTypes, SchemaError> {
1149        // Merge float_list first
1150        let float_list = Self::merge_float_list_type(
1151            default.float_list.as_ref(),
1152            user.float_list.as_ref(),
1153            knn_index,
1154        );
1155
1156        // Validate the merged float_list (covers all merge cases)
1157        if let Some(ref fl) = float_list {
1158            Self::validate_float_list_value_type(fl)?;
1159        }
1160
1161        Ok(ValueTypes {
1162            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1163            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1164            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1165            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1166            float_list,
1167            sparse_vector: Self::merge_sparse_vector_type(
1168                default.sparse_vector.as_ref(),
1169                user.sparse_vector.as_ref(),
1170            )?,
1171        })
1172    }
1173
1174    /// Merge StringValueType configurations
1175    fn merge_string_type(
1176        default: Option<&StringValueType>,
1177        user: Option<&StringValueType>,
1178    ) -> Result<Option<StringValueType>, SchemaError> {
1179        match (default, user) {
1180            (Some(default), Some(user)) => Ok(Some(StringValueType {
1181                string_inverted_index: Self::merge_string_inverted_index_type(
1182                    default.string_inverted_index.as_ref(),
1183                    user.string_inverted_index.as_ref(),
1184                )?,
1185                fts_index: Self::merge_fts_index_type(
1186                    default.fts_index.as_ref(),
1187                    user.fts_index.as_ref(),
1188                )?,
1189            })),
1190            (Some(default), None) => Ok(Some(default.clone())),
1191            (None, Some(user)) => Ok(Some(user.clone())),
1192            (None, None) => Ok(None),
1193        }
1194    }
1195
1196    /// Merge FloatValueType configurations
1197    fn merge_float_type(
1198        default: Option<&FloatValueType>,
1199        user: Option<&FloatValueType>,
1200    ) -> Result<Option<FloatValueType>, SchemaError> {
1201        match (default, user) {
1202            (Some(default), Some(user)) => Ok(Some(FloatValueType {
1203                float_inverted_index: Self::merge_float_inverted_index_type(
1204                    default.float_inverted_index.as_ref(),
1205                    user.float_inverted_index.as_ref(),
1206                )?,
1207            })),
1208            (Some(default), None) => Ok(Some(default.clone())),
1209            (None, Some(user)) => Ok(Some(user.clone())),
1210            (None, None) => Ok(None),
1211        }
1212    }
1213
1214    /// Merge IntValueType configurations
1215    fn merge_int_type(
1216        default: Option<&IntValueType>,
1217        user: Option<&IntValueType>,
1218    ) -> Result<Option<IntValueType>, SchemaError> {
1219        match (default, user) {
1220            (Some(default), Some(user)) => Ok(Some(IntValueType {
1221                int_inverted_index: Self::merge_int_inverted_index_type(
1222                    default.int_inverted_index.as_ref(),
1223                    user.int_inverted_index.as_ref(),
1224                )?,
1225            })),
1226            (Some(default), None) => Ok(Some(default.clone())),
1227            (None, Some(user)) => Ok(Some(user.clone())),
1228            (None, None) => Ok(None),
1229        }
1230    }
1231
1232    /// Merge BoolValueType configurations
1233    fn merge_bool_type(
1234        default: Option<&BoolValueType>,
1235        user: Option<&BoolValueType>,
1236    ) -> Result<Option<BoolValueType>, SchemaError> {
1237        match (default, user) {
1238            (Some(default), Some(user)) => Ok(Some(BoolValueType {
1239                bool_inverted_index: Self::merge_bool_inverted_index_type(
1240                    default.bool_inverted_index.as_ref(),
1241                    user.bool_inverted_index.as_ref(),
1242                )?,
1243            })),
1244            (Some(default), None) => Ok(Some(default.clone())),
1245            (None, Some(user)) => Ok(Some(user.clone())),
1246            (None, None) => Ok(None),
1247        }
1248    }
1249
1250    /// Merge FloatListValueType configurations
1251    fn merge_float_list_type(
1252        default: Option<&FloatListValueType>,
1253        user: Option<&FloatListValueType>,
1254        knn_index: KnnIndex,
1255    ) -> Option<FloatListValueType> {
1256        match (default, user) {
1257            (Some(default), Some(user)) => Some(FloatListValueType {
1258                vector_index: Self::merge_vector_index_type(
1259                    default.vector_index.as_ref(),
1260                    user.vector_index.as_ref(),
1261                    knn_index,
1262                ),
1263            }),
1264            (Some(default), None) => Some(default.clone()),
1265            (None, Some(user)) => Some(user.clone()),
1266            (None, None) => None,
1267        }
1268    }
1269
1270    /// Merge SparseVectorValueType configurations
1271    fn merge_sparse_vector_type(
1272        default: Option<&SparseVectorValueType>,
1273        user: Option<&SparseVectorValueType>,
1274    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1275        match (default, user) {
1276            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1277                sparse_vector_index: Self::merge_sparse_vector_index_type(
1278                    default.sparse_vector_index.as_ref(),
1279                    user.sparse_vector_index.as_ref(),
1280                )?,
1281            })),
1282            (Some(default), None) => Ok(Some(default.clone())),
1283            (None, Some(user)) => Ok(Some(user.clone())),
1284            (None, None) => Ok(None),
1285        }
1286    }
1287
1288    /// Merge individual index type configurations
1289    fn merge_string_inverted_index_type(
1290        default: Option<&StringInvertedIndexType>,
1291        user: Option<&StringInvertedIndexType>,
1292    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1293        match (default, user) {
1294            (Some(_default), Some(user)) => {
1295                Ok(Some(StringInvertedIndexType {
1296                    enabled: user.enabled,       // User enabled state takes precedence
1297                    config: user.config.clone(), // User config takes precedence
1298                }))
1299            }
1300            (Some(default), None) => Ok(Some(default.clone())),
1301            (None, Some(user)) => Ok(Some(user.clone())),
1302            (None, None) => Ok(None),
1303        }
1304    }
1305
1306    fn merge_fts_index_type(
1307        default: Option<&FtsIndexType>,
1308        user: Option<&FtsIndexType>,
1309    ) -> Result<Option<FtsIndexType>, SchemaError> {
1310        match (default, user) {
1311            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1312                enabled: user.enabled,
1313                config: user.config.clone(),
1314            })),
1315            (Some(default), None) => Ok(Some(default.clone())),
1316            (None, Some(user)) => Ok(Some(user.clone())),
1317            (None, None) => Ok(None),
1318        }
1319    }
1320
1321    fn merge_float_inverted_index_type(
1322        default: Option<&FloatInvertedIndexType>,
1323        user: Option<&FloatInvertedIndexType>,
1324    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1325        match (default, user) {
1326            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1327                enabled: user.enabled,
1328                config: user.config.clone(),
1329            })),
1330            (Some(default), None) => Ok(Some(default.clone())),
1331            (None, Some(user)) => Ok(Some(user.clone())),
1332            (None, None) => Ok(None),
1333        }
1334    }
1335
1336    fn merge_int_inverted_index_type(
1337        default: Option<&IntInvertedIndexType>,
1338        user: Option<&IntInvertedIndexType>,
1339    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1340        match (default, user) {
1341            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1342                enabled: user.enabled,
1343                config: user.config.clone(),
1344            })),
1345            (Some(default), None) => Ok(Some(default.clone())),
1346            (None, Some(user)) => Ok(Some(user.clone())),
1347            (None, None) => Ok(None),
1348        }
1349    }
1350
1351    fn merge_bool_inverted_index_type(
1352        default: Option<&BoolInvertedIndexType>,
1353        user: Option<&BoolInvertedIndexType>,
1354    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1355        match (default, user) {
1356            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1357                enabled: user.enabled,
1358                config: user.config.clone(),
1359            })),
1360            (Some(default), None) => Ok(Some(default.clone())),
1361            (None, Some(user)) => Ok(Some(user.clone())),
1362            (None, None) => Ok(None),
1363        }
1364    }
1365
1366    fn merge_vector_index_type(
1367        default: Option<&VectorIndexType>,
1368        user: Option<&VectorIndexType>,
1369        knn_index: KnnIndex,
1370    ) -> Option<VectorIndexType> {
1371        match (default, user) {
1372            (Some(default), Some(user)) => Some(VectorIndexType {
1373                enabled: user.enabled,
1374                config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1375            }),
1376            (Some(default), None) => Some(default.clone()),
1377            (None, Some(user)) => Some(user.clone()),
1378            (None, None) => None,
1379        }
1380    }
1381
1382    fn merge_sparse_vector_index_type(
1383        default: Option<&SparseVectorIndexType>,
1384        user: Option<&SparseVectorIndexType>,
1385    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1386        match (default, user) {
1387            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1388                enabled: user.enabled,
1389                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1390            })),
1391            (Some(default), None) => Ok(Some(default.clone())),
1392            (None, Some(user)) => Ok(Some(user.clone())),
1393            (None, None) => Ok(None),
1394        }
1395    }
1396
1397    /// Validate FloatListValueType vector index configurations
1398    /// This validates HNSW and SPANN configs within the merged float_list
1399    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1400        if let Some(vector_index) = &float_list.vector_index {
1401            if let Some(hnsw) = &vector_index.config.hnsw {
1402                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1403            }
1404            if let Some(spann) = &vector_index.config.spann {
1405                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1406            }
1407        }
1408        Ok(())
1409    }
1410
1411    /// Merge VectorIndexConfig with field-level merging
1412    fn merge_vector_index_config(
1413        default: &VectorIndexConfig,
1414        user: &VectorIndexConfig,
1415        knn_index: KnnIndex,
1416    ) -> VectorIndexConfig {
1417        match knn_index {
1418            KnnIndex::Hnsw => VectorIndexConfig {
1419                space: user.space.clone().or(default.space.clone()),
1420                embedding_function: user
1421                    .embedding_function
1422                    .clone()
1423                    .or(default.embedding_function.clone()),
1424                source_key: user.source_key.clone().or(default.source_key.clone()),
1425                hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1426                spann: None,
1427            },
1428            KnnIndex::Spann => VectorIndexConfig {
1429                space: user.space.clone().or(default.space.clone()),
1430                embedding_function: user
1431                    .embedding_function
1432                    .clone()
1433                    .or(default.embedding_function.clone()),
1434                source_key: user.source_key.clone().or(default.source_key.clone()),
1435                hnsw: None,
1436                spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1437            },
1438        }
1439    }
1440
1441    /// Merge SparseVectorIndexConfig with field-level merging
1442    fn merge_sparse_vector_index_config(
1443        default: &SparseVectorIndexConfig,
1444        user: &SparseVectorIndexConfig,
1445    ) -> SparseVectorIndexConfig {
1446        SparseVectorIndexConfig {
1447            embedding_function: user
1448                .embedding_function
1449                .clone()
1450                .or(default.embedding_function.clone()),
1451            source_key: user.source_key.clone().or(default.source_key.clone()),
1452            bm25: user.bm25.or(default.bm25),
1453        }
1454    }
1455
1456    /// Merge HNSW configurations with field-level merging
1457    fn merge_hnsw_configs(
1458        default_hnsw: Option<&HnswIndexConfig>,
1459        user_hnsw: Option<&HnswIndexConfig>,
1460    ) -> Option<HnswIndexConfig> {
1461        match (default_hnsw, user_hnsw) {
1462            (Some(default), Some(user)) => Some(HnswIndexConfig {
1463                ef_construction: user.ef_construction.or(default.ef_construction),
1464                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1465                ef_search: user.ef_search.or(default.ef_search),
1466                num_threads: user.num_threads.or(default.num_threads),
1467                batch_size: user.batch_size.or(default.batch_size),
1468                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1469                resize_factor: user.resize_factor.or(default.resize_factor),
1470            }),
1471            (Some(default), None) => Some(default.clone()),
1472            (None, Some(user)) => Some(user.clone()),
1473            (None, None) => None,
1474        }
1475    }
1476
1477    /// Merge SPANN configurations with field-level merging
1478    fn merge_spann_configs(
1479        default_spann: Option<&SpannIndexConfig>,
1480        user_spann: Option<&SpannIndexConfig>,
1481    ) -> Option<SpannIndexConfig> {
1482        match (default_spann, user_spann) {
1483            (Some(default), Some(user)) => Some(SpannIndexConfig {
1484                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1485                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1486                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1487                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1488                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1489                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1490                split_threshold: user.split_threshold.or(default.split_threshold),
1491                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1492                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1493                reassign_neighbor_count: user
1494                    .reassign_neighbor_count
1495                    .or(default.reassign_neighbor_count),
1496                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1497                num_centers_to_merge_to: user
1498                    .num_centers_to_merge_to
1499                    .or(default.num_centers_to_merge_to),
1500                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1501                ef_construction: user.ef_construction.or(default.ef_construction),
1502                ef_search: user.ef_search.or(default.ef_search),
1503                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1504            }),
1505            (Some(default), None) => Some(default.clone()),
1506            (None, Some(user)) => Some(user.clone()),
1507            (None, None) => None,
1508        }
1509    }
1510
1511    /// Reconcile Schema with InternalCollectionConfiguration
1512    ///
1513    /// Simple reconciliation logic:
1514    /// 1. If collection config is default → return schema (schema is source of truth)
1515    /// 2. If collection config is non-default and schema is default → override schema with collection config
1516    ///
1517    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1518    pub fn reconcile_with_collection_config(
1519        schema: &Schema,
1520        collection_config: &InternalCollectionConfiguration,
1521        default_knn_index: KnnIndex,
1522    ) -> Result<Schema, SchemaError> {
1523        // 1. Check if collection config is default
1524        if collection_config.is_default() {
1525            if schema.is_default() {
1526                // if both are default, use the schema, and apply the ef from config if available
1527                // for both defaults and #embedding key
1528                let mut new_schema = Schema::new_default(default_knn_index);
1529
1530                if collection_config.embedding_function.is_some() {
1531                    if let Some(float_list) = &mut new_schema.defaults.float_list {
1532                        if let Some(vector_index) = &mut float_list.vector_index {
1533                            vector_index.config.embedding_function =
1534                                collection_config.embedding_function.clone();
1535                        }
1536                    }
1537                    if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1538                        if let Some(float_list) = &mut embedding_types.float_list {
1539                            if let Some(vector_index) = &mut float_list.vector_index {
1540                                vector_index.config.embedding_function =
1541                                    collection_config.embedding_function.clone();
1542                            }
1543                        }
1544                    }
1545                }
1546                return Ok(new_schema);
1547            } else {
1548                // Collection config is default and schema is non-default → schema is source of truth
1549                return Ok(schema.clone());
1550            }
1551        }
1552
1553        // 2. Collection config is non-default, schema must be default (already validated earlier)
1554        // Convert collection config to schema
1555        Self::try_from(collection_config)
1556    }
1557
1558    pub fn reconcile_schema_and_config(
1559        schema: Option<&Schema>,
1560        configuration: Option<&InternalCollectionConfiguration>,
1561        knn_index: KnnIndex,
1562    ) -> Result<Schema, SchemaError> {
1563        // Early validation: check if both user-provided schema and config are non-default
1564        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1565            if !user_schema.is_default() && !config.is_default() {
1566                return Err(SchemaError::ConfigAndSchemaConflict);
1567            }
1568        }
1569
1570        let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1571        if let Some(config) = configuration {
1572            Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1573        } else {
1574            Ok(reconciled_schema)
1575        }
1576    }
1577
1578    pub fn default_with_embedding_function(
1579        embedding_function: EmbeddingFunctionConfiguration,
1580    ) -> Schema {
1581        let mut schema = Schema::new_default(KnnIndex::Spann);
1582        if let Some(float_list) = &mut schema.defaults.float_list {
1583            if let Some(vector_index) = &mut float_list.vector_index {
1584                vector_index.config.embedding_function = Some(embedding_function.clone());
1585            }
1586        }
1587        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1588            if let Some(float_list) = &mut embedding_types.float_list {
1589                if let Some(vector_index) = &mut float_list.vector_index {
1590                    vector_index.config.embedding_function = Some(embedding_function);
1591                }
1592            }
1593        }
1594        schema
1595    }
1596
1597    /// Check if schema is default by checking each field individually
1598    pub fn is_default(&self) -> bool {
1599        // Check if defaults are default (field by field)
1600        if !Self::is_value_types_default(&self.defaults) {
1601            return false;
1602        }
1603
1604        for key in self.keys.keys() {
1605            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1606                return false;
1607            }
1608        }
1609
1610        // Check #embedding key
1611        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1612            if !Self::is_embedding_value_types_default(embedding_value) {
1613                return false;
1614            }
1615        }
1616
1617        // Check #document key
1618        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1619            if !Self::is_document_value_types_default(document_value) {
1620                return false;
1621            }
1622        }
1623
1624        // Check CMEK is None (default)
1625        if self.cmek.is_some() {
1626            return false;
1627        }
1628
1629        true
1630    }
1631
1632    /// Check if ValueTypes (defaults) are in default state
1633    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1634        // Check string field
1635        if let Some(string) = &value_types.string {
1636            if let Some(string_inverted) = &string.string_inverted_index {
1637                if !string_inverted.enabled {
1638                    return false;
1639                }
1640                // Config is an empty struct, so no need to check it
1641            }
1642            if let Some(fts) = &string.fts_index {
1643                if fts.enabled {
1644                    return false;
1645                }
1646                // Config is an empty struct, so no need to check it
1647            }
1648        }
1649
1650        // Check float field
1651        if let Some(float) = &value_types.float {
1652            if let Some(float_inverted) = &float.float_inverted_index {
1653                if !float_inverted.enabled {
1654                    return false;
1655                }
1656                // Config is an empty struct, so no need to check it
1657            }
1658        }
1659
1660        // Check int field
1661        if let Some(int) = &value_types.int {
1662            if let Some(int_inverted) = &int.int_inverted_index {
1663                if !int_inverted.enabled {
1664                    return false;
1665                }
1666                // Config is an empty struct, so no need to check it
1667            }
1668        }
1669
1670        // Check boolean field
1671        if let Some(boolean) = &value_types.boolean {
1672            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1673                if !bool_inverted.enabled {
1674                    return false;
1675                }
1676                // Config is an empty struct, so no need to check it
1677            }
1678        }
1679
1680        // Check float_list field (vector index should be disabled)
1681        if let Some(float_list) = &value_types.float_list {
1682            if let Some(vector_index) = &float_list.vector_index {
1683                if vector_index.enabled {
1684                    return false;
1685                }
1686                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1687                    return false;
1688                }
1689                if !is_space_default(&vector_index.config.space) {
1690                    return false;
1691                }
1692                // Check that the config has default structure
1693                if vector_index.config.source_key.is_some() {
1694                    return false;
1695                }
1696                // Check that either hnsw or spann config is present (not both, not neither)
1697                // and that the config values are default
1698                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1699                    (Some(hnsw_config), None) => {
1700                        if !hnsw_config.is_default() {
1701                            return false;
1702                        }
1703                    }
1704                    (None, Some(spann_config)) => {
1705                        if !spann_config.is_default() {
1706                            return false;
1707                        }
1708                    }
1709                    (Some(_), Some(_)) => return false, // Both present
1710                    (None, None) => {}
1711                }
1712            }
1713        }
1714
1715        // Check sparse_vector field (should be disabled)
1716        if let Some(sparse_vector) = &value_types.sparse_vector {
1717            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1718                if sparse_index.enabled {
1719                    return false;
1720                }
1721                // Check config structure
1722                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1723                    return false;
1724                }
1725                if sparse_index.config.source_key.is_some() {
1726                    return false;
1727                }
1728                if let Some(bm25) = &sparse_index.config.bm25 {
1729                    if bm25 != &false {
1730                        return false;
1731                    }
1732                }
1733            }
1734        }
1735
1736        true
1737    }
1738
1739    /// Check if ValueTypes for #embedding key are in default state
1740    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1741        // For #embedding, only float_list should be set
1742        if value_types.string.is_some()
1743            || value_types.float.is_some()
1744            || value_types.int.is_some()
1745            || value_types.boolean.is_some()
1746            || value_types.sparse_vector.is_some()
1747        {
1748            return false;
1749        }
1750
1751        // Check float_list field (vector index should be enabled)
1752        if let Some(float_list) = &value_types.float_list {
1753            if let Some(vector_index) = &float_list.vector_index {
1754                if !vector_index.enabled {
1755                    return false;
1756                }
1757                if !is_space_default(&vector_index.config.space) {
1758                    return false;
1759                }
1760                // Check that embedding_function is default
1761                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1762                    return false;
1763                }
1764                // Check that source_key is #document
1765                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1766                    return false;
1767                }
1768                // Check that either hnsw or spann config is present (not both, not neither)
1769                // and that the config values are default
1770                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1771                    (Some(hnsw_config), None) => {
1772                        if !hnsw_config.is_default() {
1773                            return false;
1774                        }
1775                    }
1776                    (None, Some(spann_config)) => {
1777                        if !spann_config.is_default() {
1778                            return false;
1779                        }
1780                    }
1781                    (Some(_), Some(_)) => return false, // Both present
1782                    (None, None) => {}
1783                }
1784            }
1785        }
1786
1787        true
1788    }
1789
1790    /// Check if ValueTypes for #document key are in default state
1791    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1792        // For #document, only string should be set
1793        if value_types.float_list.is_some()
1794            || value_types.float.is_some()
1795            || value_types.int.is_some()
1796            || value_types.boolean.is_some()
1797            || value_types.sparse_vector.is_some()
1798        {
1799            return false;
1800        }
1801
1802        // Check string field
1803        if let Some(string) = &value_types.string {
1804            if let Some(fts) = &string.fts_index {
1805                if !fts.enabled {
1806                    return false;
1807                }
1808                // Config is an empty struct, so no need to check it
1809            }
1810            if let Some(string_inverted) = &string.string_inverted_index {
1811                if string_inverted.enabled {
1812                    return false;
1813                }
1814                // Config is an empty struct, so no need to check it
1815            }
1816        }
1817
1818        true
1819    }
1820
1821    /// Check if a specific metadata key-value should be indexed based on schema configuration
1822    pub fn is_metadata_type_index_enabled(
1823        &self,
1824        key: &str,
1825        value_type: MetadataValueType,
1826    ) -> Result<bool, SchemaError> {
1827        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1828
1829        match value_type {
1830            MetadataValueType::Bool => match &v_type.boolean {
1831                Some(bool_type) => match &bool_type.bool_inverted_index {
1832                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1833                    None => Err(SchemaError::MissingIndexConfiguration {
1834                        key: key.to_string(),
1835                        value_type: "bool".to_string(),
1836                    }),
1837                },
1838                None => match &self.defaults.boolean {
1839                    Some(bool_type) => match &bool_type.bool_inverted_index {
1840                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1841                        None => Err(SchemaError::MissingIndexConfiguration {
1842                            key: key.to_string(),
1843                            value_type: "bool".to_string(),
1844                        }),
1845                    },
1846                    None => Err(SchemaError::MissingIndexConfiguration {
1847                        key: key.to_string(),
1848                        value_type: "bool".to_string(),
1849                    }),
1850                },
1851            },
1852            MetadataValueType::Int => match &v_type.int {
1853                Some(int_type) => match &int_type.int_inverted_index {
1854                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1855                    None => Err(SchemaError::MissingIndexConfiguration {
1856                        key: key.to_string(),
1857                        value_type: "int".to_string(),
1858                    }),
1859                },
1860                None => match &self.defaults.int {
1861                    Some(int_type) => match &int_type.int_inverted_index {
1862                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1863                        None => Err(SchemaError::MissingIndexConfiguration {
1864                            key: key.to_string(),
1865                            value_type: "int".to_string(),
1866                        }),
1867                    },
1868                    None => Err(SchemaError::MissingIndexConfiguration {
1869                        key: key.to_string(),
1870                        value_type: "int".to_string(),
1871                    }),
1872                },
1873            },
1874            MetadataValueType::Float => match &v_type.float {
1875                Some(float_type) => match &float_type.float_inverted_index {
1876                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1877                    None => Err(SchemaError::MissingIndexConfiguration {
1878                        key: key.to_string(),
1879                        value_type: "float".to_string(),
1880                    }),
1881                },
1882                None => match &self.defaults.float {
1883                    Some(float_type) => match &float_type.float_inverted_index {
1884                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1885                        None => Err(SchemaError::MissingIndexConfiguration {
1886                            key: key.to_string(),
1887                            value_type: "float".to_string(),
1888                        }),
1889                    },
1890                    None => Err(SchemaError::MissingIndexConfiguration {
1891                        key: key.to_string(),
1892                        value_type: "float".to_string(),
1893                    }),
1894                },
1895            },
1896            MetadataValueType::Str => match &v_type.string {
1897                Some(string_type) => match &string_type.string_inverted_index {
1898                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1899                    None => Err(SchemaError::MissingIndexConfiguration {
1900                        key: key.to_string(),
1901                        value_type: "string".to_string(),
1902                    }),
1903                },
1904                None => match &self.defaults.string {
1905                    Some(string_type) => match &string_type.string_inverted_index {
1906                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1907                        None => Err(SchemaError::MissingIndexConfiguration {
1908                            key: key.to_string(),
1909                            value_type: "string".to_string(),
1910                        }),
1911                    },
1912                    None => Err(SchemaError::MissingIndexConfiguration {
1913                        key: key.to_string(),
1914                        value_type: "string".to_string(),
1915                    }),
1916                },
1917            },
1918            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1919                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1920                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1921                    None => Err(SchemaError::MissingIndexConfiguration {
1922                        key: key.to_string(),
1923                        value_type: "sparse_vector".to_string(),
1924                    }),
1925                },
1926                None => match &self.defaults.sparse_vector {
1927                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1928                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1929                        None => Err(SchemaError::MissingIndexConfiguration {
1930                            key: key.to_string(),
1931                            value_type: "sparse_vector".to_string(),
1932                        }),
1933                    },
1934                    None => Err(SchemaError::MissingIndexConfiguration {
1935                        key: key.to_string(),
1936                        value_type: "sparse_vector".to_string(),
1937                    }),
1938                },
1939            },
1940        }
1941    }
1942
1943    pub fn is_metadata_where_indexing_enabled(
1944        &self,
1945        where_clause: &Where,
1946    ) -> Result<(), FilterValidationError> {
1947        match where_clause {
1948            Where::Composite(composite) => {
1949                for child in &composite.children {
1950                    self.is_metadata_where_indexing_enabled(child)?;
1951                }
1952                Ok(())
1953            }
1954            Where::Document(_) => Ok(()),
1955            Where::Metadata(expression) => {
1956                let value_type = match &expression.comparison {
1957                    MetadataComparison::Primitive(_, value) => value.value_type(),
1958                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1959                };
1960                let is_enabled = self
1961                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1962                    .map_err(FilterValidationError::Schema)?;
1963                if !is_enabled {
1964                    return Err(FilterValidationError::IndexingDisabled {
1965                        key: expression.key.clone(),
1966                        value_type,
1967                    });
1968                }
1969                Ok(())
1970            }
1971        }
1972    }
1973
1974    pub fn is_knn_key_indexing_enabled(
1975        &self,
1976        key: &str,
1977        query: &QueryVector,
1978    ) -> Result<(), FilterValidationError> {
1979        match query {
1980            QueryVector::Sparse(_) => {
1981                let is_enabled = self
1982                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1983                    .map_err(FilterValidationError::Schema)?;
1984                if !is_enabled {
1985                    return Err(FilterValidationError::IndexingDisabled {
1986                        key: key.to_string(),
1987                        value_type: MetadataValueType::SparseVector,
1988                    });
1989                }
1990                Ok(())
1991            }
1992            QueryVector::Dense(_) => {
1993                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1994                // Dense vectors are always indexed
1995                Ok(())
1996            }
1997        }
1998    }
1999
2000    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2001        if key.starts_with(CHROMA_KEY) {
2002            return false;
2003        }
2004        let value_types = self.keys.entry(key.to_string()).or_default();
2005        match value_type {
2006            MetadataValueType::Bool => {
2007                if value_types.boolean.is_none() {
2008                    value_types.boolean = self.defaults.boolean.clone();
2009                    return true;
2010                }
2011            }
2012            MetadataValueType::Int => {
2013                if value_types.int.is_none() {
2014                    value_types.int = self.defaults.int.clone();
2015                    return true;
2016                }
2017            }
2018            MetadataValueType::Float => {
2019                if value_types.float.is_none() {
2020                    value_types.float = self.defaults.float.clone();
2021                    return true;
2022                }
2023            }
2024            MetadataValueType::Str => {
2025                if value_types.string.is_none() {
2026                    value_types.string = self.defaults.string.clone();
2027                    return true;
2028                }
2029            }
2030            MetadataValueType::SparseVector => {
2031                if value_types.sparse_vector.is_none() {
2032                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
2033                    return true;
2034                }
2035            }
2036        }
2037        false
2038    }
2039
2040    // ========================================================================
2041    // BUILDER PATTERN METHODS
2042    // ========================================================================
2043
2044    /// Create an index configuration (builder pattern)
2045    ///
2046    /// This method allows fluent, chainable configuration of indexes on a schema.
2047    /// It matches the Python API's `.create_index()` method.
2048    ///
2049    /// # Arguments
2050    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
2051    /// * `config` - Index configuration to create
2052    ///
2053    /// # Returns
2054    /// `Self` for method chaining
2055    ///
2056    /// # Errors
2057    /// Returns error if:
2058    /// - Attempting to create index on special keys (`#document`, `#embedding`)
2059    /// - Invalid configuration (e.g., vector index on non-embedding key)
2060    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
2061    ///
2062    /// # Examples
2063    /// ```
2064    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space, SchemaBuilderError};
2065    ///
2066    /// # fn main() -> Result<(), SchemaBuilderError> {
2067    /// let schema = Schema::default()
2068    ///     .create_index(None, VectorIndexConfig {
2069    ///         space: Some(Space::Cosine),
2070    ///         embedding_function: None,
2071    ///         source_key: None,
2072    ///         hnsw: None,
2073    ///         spann: None,
2074    ///     }.into())?
2075    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2076    /// # Ok(())
2077    /// # }
2078    /// ```
2079    pub fn create_index(
2080        mut self,
2081        key: Option<&str>,
2082        config: IndexConfig,
2083    ) -> Result<Self, SchemaBuilderError> {
2084        // Handle special cases: Vector and FTS (global configs only)
2085        match (&key, &config) {
2086            (None, IndexConfig::Vector(cfg)) => {
2087                self._set_vector_index_config_builder(cfg.clone());
2088                return Ok(self);
2089            }
2090            (None, IndexConfig::Fts(cfg)) => {
2091                self._set_fts_index_config_builder(cfg.clone());
2092                return Ok(self);
2093            }
2094            (Some(k), IndexConfig::Vector(_)) => {
2095                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2096            }
2097            (Some(k), IndexConfig::Fts(_)) => {
2098                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2099            }
2100            _ => {}
2101        }
2102
2103        // Validate special keys
2104        if let Some(k) = key {
2105            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2106                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2107                    key: k.to_string(),
2108                });
2109            }
2110        }
2111
2112        // Validate sparse vector requires key
2113        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2114            return Err(SchemaBuilderError::SparseVectorRequiresKey);
2115        }
2116
2117        // Dispatch to appropriate helper
2118        match key {
2119            Some(k) => self._set_index_for_key_builder(k, config, true)?,
2120            None => self._set_index_in_defaults_builder(config, true)?,
2121        }
2122
2123        Ok(self)
2124    }
2125
2126    /// Delete/disable an index configuration (builder pattern)
2127    ///
2128    /// This method allows disabling indexes on a schema.
2129    /// It matches the Python API's `.delete_index()` method.
2130    ///
2131    /// # Arguments
2132    /// * `key` - Optional key name for per-key index. `None` applies to defaults
2133    /// * `config` - Index configuration to disable
2134    ///
2135    /// # Returns
2136    /// `Self` for method chaining
2137    ///
2138    /// # Errors
2139    /// Returns error if:
2140    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
2141    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
2142    ///
2143    /// # Examples
2144    /// ```
2145    /// use chroma_types::{Schema, StringInvertedIndexConfig, SchemaBuilderError};
2146    ///
2147    /// # fn main() -> Result<(), SchemaBuilderError> {
2148    /// let schema = Schema::default()
2149    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2150    /// # Ok(())
2151    /// # }
2152    /// ```
2153    pub fn delete_index(
2154        mut self,
2155        key: Option<&str>,
2156        config: IndexConfig,
2157    ) -> Result<Self, SchemaBuilderError> {
2158        // Validate special keys
2159        if let Some(k) = key {
2160            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2161                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2162                    key: k.to_string(),
2163                });
2164            }
2165        }
2166
2167        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
2168        match &config {
2169            IndexConfig::Vector(_) => {
2170                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2171            }
2172            IndexConfig::Fts(_) => {
2173                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2174            }
2175            IndexConfig::SparseVector(_) => {
2176                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2177            }
2178            _ => {}
2179        }
2180
2181        // Dispatch to appropriate helper (enabled=false)
2182        match key {
2183            Some(k) => self._set_index_for_key_builder(k, config, false)?,
2184            None => self._set_index_in_defaults_builder(config, false)?,
2185        }
2186
2187        Ok(self)
2188    }
2189
2190    /// Set customer-managed encryption key for the collection (builder pattern)
2191    ///
2192    /// This method allows setting CMEK on a schema for fluent, chainable configuration.
2193    ///
2194    /// # Arguments
2195    /// * `cmek` - Customer-managed encryption key configuration
2196    ///
2197    /// # Returns
2198    /// `Self` for method chaining
2199    ///
2200    /// # Examples
2201    /// ```
2202    /// use chroma_types::{Schema, Cmek};
2203    ///
2204    /// let schema = Schema::default()
2205    ///     .with_cmek(Cmek::gcp("projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key".to_string()));
2206    /// ```
2207    pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2208        self.cmek = Some(cmek);
2209        self
2210    }
2211
2212    /// Set vector index config globally (applies to #embedding)
2213    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2214        // Update defaults (disabled, just config update)
2215        if let Some(float_list) = &mut self.defaults.float_list {
2216            if let Some(vector_index) = &mut float_list.vector_index {
2217                vector_index.config = config.clone();
2218            }
2219        }
2220
2221        // Update #embedding key (enabled, config update, preserve source_key=#document)
2222        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2223            if let Some(float_list) = &mut embedding_types.float_list {
2224                if let Some(vector_index) = &mut float_list.vector_index {
2225                    let mut updated_config = config;
2226                    // Preserve source_key as #document
2227                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2228                    vector_index.config = updated_config;
2229                }
2230            }
2231        }
2232    }
2233
2234    /// Set FTS index config globally (applies to #document)
2235    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2236        // Update defaults (disabled, just config update)
2237        if let Some(string) = &mut self.defaults.string {
2238            if let Some(fts_index) = &mut string.fts_index {
2239                fts_index.config = config.clone();
2240            }
2241        }
2242
2243        // Update #document key (enabled, config update)
2244        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2245            if let Some(string) = &mut document_types.string {
2246                if let Some(fts_index) = &mut string.fts_index {
2247                    fts_index.config = config;
2248                }
2249            }
2250        }
2251    }
2252
2253    /// Set index configuration for a specific key
2254    fn _set_index_for_key_builder(
2255        &mut self,
2256        key: &str,
2257        config: IndexConfig,
2258        enabled: bool,
2259    ) -> Result<(), SchemaBuilderError> {
2260        // Check for multiple sparse vector indexes BEFORE getting mutable reference
2261        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2262            // Find existing sparse vector index
2263            let existing_key = self
2264                .keys
2265                .iter()
2266                .find(|(k, v)| {
2267                    k.as_str() != key
2268                        && v.sparse_vector
2269                            .as_ref()
2270                            .and_then(|sv| sv.sparse_vector_index.as_ref())
2271                            .map(|idx| idx.enabled)
2272                            .unwrap_or(false)
2273                })
2274                .map(|(k, _)| k.clone());
2275
2276            if let Some(existing_key) = existing_key {
2277                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2278            }
2279        }
2280
2281        // Get or create ValueTypes for this key
2282        let value_types = self.keys.entry(key.to_string()).or_default();
2283
2284        // Set the appropriate index based on config type
2285        match config {
2286            IndexConfig::Vector(_) => {
2287                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2288                    key: key.to_string(),
2289                });
2290            }
2291            IndexConfig::Fts(_) => {
2292                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2293                    key: key.to_string(),
2294                });
2295            }
2296            IndexConfig::SparseVector(cfg) => {
2297                value_types.sparse_vector = Some(SparseVectorValueType {
2298                    sparse_vector_index: Some(SparseVectorIndexType {
2299                        enabled,
2300                        config: cfg,
2301                    }),
2302                });
2303            }
2304            IndexConfig::StringInverted(cfg) => {
2305                if value_types.string.is_none() {
2306                    value_types.string = Some(StringValueType {
2307                        fts_index: None,
2308                        string_inverted_index: None,
2309                    });
2310                }
2311                if let Some(string) = &mut value_types.string {
2312                    string.string_inverted_index = Some(StringInvertedIndexType {
2313                        enabled,
2314                        config: cfg,
2315                    });
2316                }
2317            }
2318            IndexConfig::IntInverted(cfg) => {
2319                value_types.int = Some(IntValueType {
2320                    int_inverted_index: Some(IntInvertedIndexType {
2321                        enabled,
2322                        config: cfg,
2323                    }),
2324                });
2325            }
2326            IndexConfig::FloatInverted(cfg) => {
2327                value_types.float = Some(FloatValueType {
2328                    float_inverted_index: Some(FloatInvertedIndexType {
2329                        enabled,
2330                        config: cfg,
2331                    }),
2332                });
2333            }
2334            IndexConfig::BoolInverted(cfg) => {
2335                value_types.boolean = Some(BoolValueType {
2336                    bool_inverted_index: Some(BoolInvertedIndexType {
2337                        enabled,
2338                        config: cfg,
2339                    }),
2340                });
2341            }
2342        }
2343
2344        Ok(())
2345    }
2346
2347    /// Set index configuration in defaults
2348    fn _set_index_in_defaults_builder(
2349        &mut self,
2350        config: IndexConfig,
2351        enabled: bool,
2352    ) -> Result<(), SchemaBuilderError> {
2353        match config {
2354            IndexConfig::Vector(_) => {
2355                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2356                    key: "defaults".to_string(),
2357                });
2358            }
2359            IndexConfig::Fts(_) => {
2360                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2361                    key: "defaults".to_string(),
2362                });
2363            }
2364            IndexConfig::SparseVector(cfg) => {
2365                self.defaults.sparse_vector = Some(SparseVectorValueType {
2366                    sparse_vector_index: Some(SparseVectorIndexType {
2367                        enabled,
2368                        config: cfg,
2369                    }),
2370                });
2371            }
2372            IndexConfig::StringInverted(cfg) => {
2373                if self.defaults.string.is_none() {
2374                    self.defaults.string = Some(StringValueType {
2375                        fts_index: None,
2376                        string_inverted_index: None,
2377                    });
2378                }
2379                if let Some(string) = &mut self.defaults.string {
2380                    string.string_inverted_index = Some(StringInvertedIndexType {
2381                        enabled,
2382                        config: cfg,
2383                    });
2384                }
2385            }
2386            IndexConfig::IntInverted(cfg) => {
2387                self.defaults.int = Some(IntValueType {
2388                    int_inverted_index: Some(IntInvertedIndexType {
2389                        enabled,
2390                        config: cfg,
2391                    }),
2392                });
2393            }
2394            IndexConfig::FloatInverted(cfg) => {
2395                self.defaults.float = Some(FloatValueType {
2396                    float_inverted_index: Some(FloatInvertedIndexType {
2397                        enabled,
2398                        config: cfg,
2399                    }),
2400                });
2401            }
2402            IndexConfig::BoolInverted(cfg) => {
2403                self.defaults.boolean = Some(BoolValueType {
2404                    bool_inverted_index: Some(BoolInvertedIndexType {
2405                        enabled,
2406                        config: cfg,
2407                    }),
2408                });
2409            }
2410        }
2411
2412        Ok(())
2413    }
2414}
2415
2416// ============================================================================
2417// INDEX CONFIGURATION STRUCTURES
2418// ============================================================================
2419
2420#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2421#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2422#[serde(deny_unknown_fields)]
2423pub struct VectorIndexConfig {
2424    /// Vector space for similarity calculation (cosine, l2, ip)
2425    #[serde(skip_serializing_if = "Option::is_none")]
2426    pub space: Option<Space>,
2427    /// Embedding function configuration
2428    #[serde(skip_serializing_if = "Option::is_none")]
2429    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2430    /// Key to source the vector from
2431    #[serde(skip_serializing_if = "Option::is_none")]
2432    pub source_key: Option<String>,
2433    /// HNSW algorithm configuration
2434    #[serde(skip_serializing_if = "Option::is_none")]
2435    pub hnsw: Option<HnswIndexConfig>,
2436    /// SPANN algorithm configuration
2437    #[serde(skip_serializing_if = "Option::is_none")]
2438    pub spann: Option<SpannIndexConfig>,
2439}
2440
2441/// Configuration for HNSW vector index algorithm parameters
2442#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2443#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2444#[serde(deny_unknown_fields)]
2445pub struct HnswIndexConfig {
2446    #[serde(skip_serializing_if = "Option::is_none")]
2447    pub ef_construction: Option<usize>,
2448    #[serde(skip_serializing_if = "Option::is_none")]
2449    pub max_neighbors: Option<usize>,
2450    #[serde(skip_serializing_if = "Option::is_none")]
2451    pub ef_search: Option<usize>,
2452    #[serde(skip_serializing_if = "Option::is_none")]
2453    pub num_threads: Option<usize>,
2454    #[serde(skip_serializing_if = "Option::is_none")]
2455    #[validate(range(min = 2))]
2456    pub batch_size: Option<usize>,
2457    #[serde(skip_serializing_if = "Option::is_none")]
2458    #[validate(range(min = 2))]
2459    pub sync_threshold: Option<usize>,
2460    #[serde(skip_serializing_if = "Option::is_none")]
2461    pub resize_factor: Option<f64>,
2462}
2463
2464impl HnswIndexConfig {
2465    /// Check if this config has default values
2466    /// None values are considered default (not set by user)
2467    /// Note: We skip num_threads as it's variable based on available_parallelism
2468    pub fn is_default(&self) -> bool {
2469        if let Some(ef_construction) = self.ef_construction {
2470            if ef_construction != default_construction_ef() {
2471                return false;
2472            }
2473        }
2474        if let Some(max_neighbors) = self.max_neighbors {
2475            if max_neighbors != default_m() {
2476                return false;
2477            }
2478        }
2479        if let Some(ef_search) = self.ef_search {
2480            if ef_search != default_search_ef() {
2481                return false;
2482            }
2483        }
2484        if let Some(batch_size) = self.batch_size {
2485            if batch_size != default_batch_size() {
2486                return false;
2487            }
2488        }
2489        if let Some(sync_threshold) = self.sync_threshold {
2490            if sync_threshold != default_sync_threshold() {
2491                return false;
2492            }
2493        }
2494        if let Some(resize_factor) = self.resize_factor {
2495            if resize_factor != default_resize_factor() {
2496                return false;
2497            }
2498        }
2499        // Skip num_threads check as it's system-dependent
2500        true
2501    }
2502}
2503
2504/// Configuration for SPANN vector index algorithm parameters
2505#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2506#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2507#[serde(deny_unknown_fields)]
2508pub struct SpannIndexConfig {
2509    #[serde(skip_serializing_if = "Option::is_none")]
2510    #[validate(range(max = 128))]
2511    pub search_nprobe: Option<u32>,
2512    #[serde(skip_serializing_if = "Option::is_none")]
2513    #[validate(range(min = 1.0, max = 1.0))]
2514    pub search_rng_factor: Option<f32>,
2515    #[serde(skip_serializing_if = "Option::is_none")]
2516    #[validate(range(min = 5.0, max = 10.0))]
2517    pub search_rng_epsilon: Option<f32>,
2518    #[serde(skip_serializing_if = "Option::is_none")]
2519    #[validate(range(max = 8))]
2520    pub nreplica_count: Option<u32>,
2521    #[serde(skip_serializing_if = "Option::is_none")]
2522    #[validate(range(min = 1.0, max = 1.0))]
2523    pub write_rng_factor: Option<f32>,
2524    #[serde(skip_serializing_if = "Option::is_none")]
2525    #[validate(range(min = 5.0, max = 10.0))]
2526    pub write_rng_epsilon: Option<f32>,
2527    #[serde(skip_serializing_if = "Option::is_none")]
2528    #[validate(range(min = 50, max = 200))]
2529    pub split_threshold: Option<u32>,
2530    #[serde(skip_serializing_if = "Option::is_none")]
2531    #[validate(range(max = 1000))]
2532    pub num_samples_kmeans: Option<usize>,
2533    #[serde(skip_serializing_if = "Option::is_none")]
2534    #[validate(range(min = 100.0, max = 100.0))]
2535    pub initial_lambda: Option<f32>,
2536    #[serde(skip_serializing_if = "Option::is_none")]
2537    #[validate(range(max = 64))]
2538    pub reassign_neighbor_count: Option<u32>,
2539    #[serde(skip_serializing_if = "Option::is_none")]
2540    #[validate(range(min = 25, max = 100))]
2541    pub merge_threshold: Option<u32>,
2542    #[serde(skip_serializing_if = "Option::is_none")]
2543    #[validate(range(max = 8))]
2544    pub num_centers_to_merge_to: Option<u32>,
2545    #[serde(skip_serializing_if = "Option::is_none")]
2546    #[validate(range(max = 64))]
2547    pub write_nprobe: Option<u32>,
2548    #[serde(skip_serializing_if = "Option::is_none")]
2549    #[validate(range(max = 200))]
2550    pub ef_construction: Option<usize>,
2551    #[serde(skip_serializing_if = "Option::is_none")]
2552    #[validate(range(max = 200))]
2553    pub ef_search: Option<usize>,
2554    #[serde(skip_serializing_if = "Option::is_none")]
2555    #[validate(range(max = 64))]
2556    pub max_neighbors: Option<usize>,
2557}
2558
2559impl SpannIndexConfig {
2560    /// Check if this config has default values
2561    /// None values are considered default (not set by user)
2562    pub fn is_default(&self) -> bool {
2563        if let Some(search_nprobe) = self.search_nprobe {
2564            if search_nprobe != default_search_nprobe() {
2565                return false;
2566            }
2567        }
2568        if let Some(search_rng_factor) = self.search_rng_factor {
2569            if search_rng_factor != default_search_rng_factor() {
2570                return false;
2571            }
2572        }
2573        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2574            if search_rng_epsilon != default_search_rng_epsilon() {
2575                return false;
2576            }
2577        }
2578        if let Some(nreplica_count) = self.nreplica_count {
2579            if nreplica_count != default_nreplica_count() {
2580                return false;
2581            }
2582        }
2583        if let Some(write_rng_factor) = self.write_rng_factor {
2584            if write_rng_factor != default_write_rng_factor() {
2585                return false;
2586            }
2587        }
2588        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2589            if write_rng_epsilon != default_write_rng_epsilon() {
2590                return false;
2591            }
2592        }
2593        if let Some(split_threshold) = self.split_threshold {
2594            if split_threshold != default_split_threshold() {
2595                return false;
2596            }
2597        }
2598        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2599            if num_samples_kmeans != default_num_samples_kmeans() {
2600                return false;
2601            }
2602        }
2603        if let Some(initial_lambda) = self.initial_lambda {
2604            if initial_lambda != default_initial_lambda() {
2605                return false;
2606            }
2607        }
2608        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2609            if reassign_neighbor_count != default_reassign_neighbor_count() {
2610                return false;
2611            }
2612        }
2613        if let Some(merge_threshold) = self.merge_threshold {
2614            if merge_threshold != default_merge_threshold() {
2615                return false;
2616            }
2617        }
2618        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2619            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2620                return false;
2621            }
2622        }
2623        if let Some(write_nprobe) = self.write_nprobe {
2624            if write_nprobe != default_write_nprobe() {
2625                return false;
2626            }
2627        }
2628        if let Some(ef_construction) = self.ef_construction {
2629            if ef_construction != default_construction_ef_spann() {
2630                return false;
2631            }
2632        }
2633        if let Some(ef_search) = self.ef_search {
2634            if ef_search != default_search_ef_spann() {
2635                return false;
2636            }
2637        }
2638        if let Some(max_neighbors) = self.max_neighbors {
2639            if max_neighbors != default_m_spann() {
2640                return false;
2641            }
2642        }
2643        true
2644    }
2645}
2646
2647#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2648#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2649#[serde(deny_unknown_fields)]
2650pub struct SparseVectorIndexConfig {
2651    /// Embedding function configuration
2652    #[serde(skip_serializing_if = "Option::is_none")]
2653    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2654    /// Key to source the sparse vector from
2655    #[serde(skip_serializing_if = "Option::is_none")]
2656    pub source_key: Option<String>,
2657    /// Whether this embedding is BM25
2658    #[serde(skip_serializing_if = "Option::is_none")]
2659    pub bm25: Option<bool>,
2660}
2661
2662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2664#[serde(deny_unknown_fields)]
2665pub struct FtsIndexConfig {
2666    // FTS index typically has no additional parameters
2667}
2668
2669#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2671#[serde(deny_unknown_fields)]
2672pub struct StringInvertedIndexConfig {
2673    // String inverted index typically has no additional parameters
2674}
2675
2676#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2677#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2678#[serde(deny_unknown_fields)]
2679pub struct IntInvertedIndexConfig {
2680    // Integer inverted index typically has no additional parameters
2681}
2682
2683#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2684#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2685#[serde(deny_unknown_fields)]
2686pub struct FloatInvertedIndexConfig {
2687    // Float inverted index typically has no additional parameters
2688}
2689
2690#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2691#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2692#[serde(deny_unknown_fields)]
2693pub struct BoolInvertedIndexConfig {
2694    // Boolean inverted index typically has no additional parameters
2695}
2696
2697// ============================================================================
2698// BUILDER PATTERN SUPPORT
2699// ============================================================================
2700
2701/// Union type for all index configurations (used by builder pattern)
2702#[derive(Clone, Debug)]
2703#[allow(clippy::large_enum_variant)]
2704pub enum IndexConfig {
2705    Vector(VectorIndexConfig),
2706    SparseVector(SparseVectorIndexConfig),
2707    Fts(FtsIndexConfig),
2708    StringInverted(StringInvertedIndexConfig),
2709    IntInverted(IntInvertedIndexConfig),
2710    FloatInverted(FloatInvertedIndexConfig),
2711    BoolInverted(BoolInvertedIndexConfig),
2712}
2713
2714// Convenience From implementations for ergonomic usage
2715impl From<VectorIndexConfig> for IndexConfig {
2716    fn from(config: VectorIndexConfig) -> Self {
2717        IndexConfig::Vector(config)
2718    }
2719}
2720
2721impl From<SparseVectorIndexConfig> for IndexConfig {
2722    fn from(config: SparseVectorIndexConfig) -> Self {
2723        IndexConfig::SparseVector(config)
2724    }
2725}
2726
2727impl From<FtsIndexConfig> for IndexConfig {
2728    fn from(config: FtsIndexConfig) -> Self {
2729        IndexConfig::Fts(config)
2730    }
2731}
2732
2733impl From<StringInvertedIndexConfig> for IndexConfig {
2734    fn from(config: StringInvertedIndexConfig) -> Self {
2735        IndexConfig::StringInverted(config)
2736    }
2737}
2738
2739impl From<IntInvertedIndexConfig> for IndexConfig {
2740    fn from(config: IntInvertedIndexConfig) -> Self {
2741        IndexConfig::IntInverted(config)
2742    }
2743}
2744
2745impl From<FloatInvertedIndexConfig> for IndexConfig {
2746    fn from(config: FloatInvertedIndexConfig) -> Self {
2747        IndexConfig::FloatInverted(config)
2748    }
2749}
2750
2751impl From<BoolInvertedIndexConfig> for IndexConfig {
2752    fn from(config: BoolInvertedIndexConfig) -> Self {
2753        IndexConfig::BoolInverted(config)
2754    }
2755}
2756
2757impl TryFrom<&InternalCollectionConfiguration> for Schema {
2758    type Error = SchemaError;
2759
2760    fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2761        // Start with a default schema structure
2762        let mut schema = match &config.vector_index {
2763            VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2764            VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2765        };
2766        // Convert vector index configuration
2767        let vector_config = match &config.vector_index {
2768            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2769                space: Some(hnsw_config.space.clone()),
2770                embedding_function: config.embedding_function.clone(),
2771                source_key: None,
2772                hnsw: Some(HnswIndexConfig {
2773                    ef_construction: Some(hnsw_config.ef_construction),
2774                    max_neighbors: Some(hnsw_config.max_neighbors),
2775                    ef_search: Some(hnsw_config.ef_search),
2776                    num_threads: Some(hnsw_config.num_threads),
2777                    batch_size: Some(hnsw_config.batch_size),
2778                    sync_threshold: Some(hnsw_config.sync_threshold),
2779                    resize_factor: Some(hnsw_config.resize_factor),
2780                }),
2781                spann: None,
2782            },
2783            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
2784                space: Some(spann_config.space.clone()),
2785                embedding_function: config.embedding_function.clone(),
2786                source_key: None,
2787                hnsw: None,
2788                spann: Some(SpannIndexConfig {
2789                    search_nprobe: Some(spann_config.search_nprobe),
2790                    search_rng_factor: Some(spann_config.search_rng_factor),
2791                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
2792                    nreplica_count: Some(spann_config.nreplica_count),
2793                    write_rng_factor: Some(spann_config.write_rng_factor),
2794                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
2795                    split_threshold: Some(spann_config.split_threshold),
2796                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
2797                    initial_lambda: Some(spann_config.initial_lambda),
2798                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
2799                    merge_threshold: Some(spann_config.merge_threshold),
2800                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
2801                    write_nprobe: Some(spann_config.write_nprobe),
2802                    ef_construction: Some(spann_config.ef_construction),
2803                    ef_search: Some(spann_config.ef_search),
2804                    max_neighbors: Some(spann_config.max_neighbors),
2805                }),
2806            },
2807        };
2808
2809        // Update defaults (keep enabled=false, just update the config)
2810        // This serves as the template for any new float_list fields
2811        if let Some(float_list) = &mut schema.defaults.float_list {
2812            if let Some(vector_index) = &mut float_list.vector_index {
2813                vector_index.config = vector_config.clone();
2814            }
2815        }
2816
2817        // Update the vector_index in the existing #embedding key override
2818        // Keep enabled=true (already set by new_default) and update the config
2819        // Set source_key to DOCUMENT_KEY for the embedding key
2820        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
2821            if let Some(float_list) = &mut embedding_types.float_list {
2822                if let Some(vector_index) = &mut float_list.vector_index {
2823                    let mut vector_config = vector_config;
2824                    vector_config.source_key = Some(DOCUMENT_KEY.to_string());
2825                    vector_index.config = vector_config;
2826                }
2827            }
2828        }
2829
2830        Ok(schema)
2831    }
2832}
2833
2834#[cfg(test)]
2835mod tests {
2836    use super::*;
2837    use crate::hnsw_configuration::Space;
2838    use crate::metadata::SparseVector;
2839    use crate::{
2840        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2841    };
2842    use serde_json::json;
2843
2844    #[test]
2845    fn test_reconcile_with_defaults_none_user_schema() {
2846        // Test that when no user schema is provided, we get the default schema
2847        let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2848        let expected = Schema::new_default(KnnIndex::Spann);
2849        assert_eq!(result, expected);
2850    }
2851
2852    #[test]
2853    fn test_reconcile_with_defaults_empty_user_schema() {
2854        // Test merging with an empty user schema
2855        let user_schema = Schema {
2856            defaults: ValueTypes::default(),
2857            keys: HashMap::new(),
2858            cmek: None,
2859            source_attached_function_id: None,
2860        };
2861
2862        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2863        let expected = Schema::new_default(KnnIndex::Spann);
2864        assert_eq!(result, expected);
2865    }
2866
2867    #[test]
2868    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2869        // Test that user can override string inverted index enabled state
2870        let mut user_schema = Schema {
2871            defaults: ValueTypes::default(),
2872            keys: HashMap::new(),
2873            cmek: None,
2874            source_attached_function_id: None,
2875        };
2876
2877        user_schema.defaults.string = Some(StringValueType {
2878            string_inverted_index: Some(StringInvertedIndexType {
2879                enabled: false, // Override default (true) to false
2880                config: StringInvertedIndexConfig {},
2881            }),
2882            fts_index: None,
2883        });
2884
2885        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2886
2887        // Check that the user override took precedence
2888        assert!(
2889            !result
2890                .defaults
2891                .string
2892                .as_ref()
2893                .unwrap()
2894                .string_inverted_index
2895                .as_ref()
2896                .unwrap()
2897                .enabled
2898        );
2899        // Check that other defaults are still present
2900        assert!(result.defaults.float.is_some());
2901        assert!(result.defaults.int.is_some());
2902    }
2903
2904    #[test]
2905    fn test_reconcile_with_defaults_user_overrides_vector_config() {
2906        // Test field-level merging for vector configurations
2907        let mut user_schema = Schema {
2908            defaults: ValueTypes::default(),
2909            keys: HashMap::new(),
2910            cmek: None,
2911            source_attached_function_id: None,
2912        };
2913
2914        user_schema.defaults.float_list = Some(FloatListValueType {
2915            vector_index: Some(VectorIndexType {
2916                enabled: true, // Enable vector index (default is false)
2917                config: VectorIndexConfig {
2918                    space: Some(Space::L2),                     // Override default space
2919                    embedding_function: None,                   // Will use default
2920                    source_key: Some("custom_key".to_string()), // Override default
2921                    hnsw: Some(HnswIndexConfig {
2922                        ef_construction: Some(500), // Override default
2923                        max_neighbors: None,        // Will use default
2924                        ef_search: None,            // Will use default
2925                        num_threads: None,
2926                        batch_size: None,
2927                        sync_threshold: None,
2928                        resize_factor: None,
2929                    }),
2930                    spann: None,
2931                },
2932            }),
2933        });
2934
2935        // Use HNSW defaults for this test so we have HNSW config to merge with
2936        let result = {
2937            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2938            let merged_defaults = Schema::merge_value_types(
2939                &default_schema.defaults,
2940                &user_schema.defaults,
2941                KnnIndex::Hnsw,
2942            )
2943            .unwrap();
2944            let mut merged_keys = default_schema.keys.clone();
2945            for (key, user_value_types) in user_schema.keys {
2946                if let Some(default_value_types) = merged_keys.get(&key) {
2947                    let merged_value_types = Schema::merge_value_types(
2948                        default_value_types,
2949                        &user_value_types,
2950                        KnnIndex::Hnsw,
2951                    )
2952                    .unwrap();
2953                    merged_keys.insert(key, merged_value_types);
2954                } else {
2955                    merged_keys.insert(key, user_value_types);
2956                }
2957            }
2958            Schema {
2959                defaults: merged_defaults,
2960                keys: merged_keys,
2961                cmek: None,
2962                source_attached_function_id: None,
2963            }
2964        };
2965
2966        let vector_config = &result
2967            .defaults
2968            .float_list
2969            .as_ref()
2970            .unwrap()
2971            .vector_index
2972            .as_ref()
2973            .unwrap()
2974            .config;
2975
2976        // Check user overrides took precedence
2977        assert_eq!(vector_config.space, Some(Space::L2));
2978        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2979        assert_eq!(
2980            vector_config.hnsw.as_ref().unwrap().ef_construction,
2981            Some(500)
2982        );
2983
2984        // Check defaults were preserved for unspecified fields
2985        assert_eq!(vector_config.embedding_function, None);
2986        // Since user provided HNSW config, the default max_neighbors should be merged in
2987        assert_eq!(
2988            vector_config.hnsw.as_ref().unwrap().max_neighbors,
2989            Some(default_m())
2990        );
2991    }
2992
2993    #[test]
2994    fn test_reconcile_with_defaults_keys() {
2995        // Test that key overrides are properly merged
2996        let mut user_schema = Schema {
2997            defaults: ValueTypes::default(),
2998            keys: HashMap::new(),
2999            cmek: None,
3000            source_attached_function_id: None,
3001        };
3002
3003        // Add a custom key override
3004        let custom_key_types = ValueTypes {
3005            string: Some(StringValueType {
3006                fts_index: Some(FtsIndexType {
3007                    enabled: true,
3008                    config: FtsIndexConfig {},
3009                }),
3010                string_inverted_index: Some(StringInvertedIndexType {
3011                    enabled: false,
3012                    config: StringInvertedIndexConfig {},
3013                }),
3014            }),
3015            ..Default::default()
3016        };
3017        user_schema
3018            .keys
3019            .insert("custom_key".to_string(), custom_key_types);
3020
3021        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3022
3023        // Check that default key overrides are preserved
3024        assert!(result.keys.contains_key(EMBEDDING_KEY));
3025        assert!(result.keys.contains_key(DOCUMENT_KEY));
3026
3027        // Check that user key override was added
3028        assert!(result.keys.contains_key("custom_key"));
3029        let custom_override = result.keys.get("custom_key").unwrap();
3030        assert!(
3031            custom_override
3032                .string
3033                .as_ref()
3034                .unwrap()
3035                .fts_index
3036                .as_ref()
3037                .unwrap()
3038                .enabled
3039        );
3040    }
3041
3042    #[test]
3043    fn test_reconcile_with_defaults_override_existing_key() {
3044        // Test overriding an existing key override (like #embedding)
3045        let mut user_schema = Schema {
3046            defaults: ValueTypes::default(),
3047            keys: HashMap::new(),
3048            cmek: None,
3049            source_attached_function_id: None,
3050        };
3051
3052        // Override the #embedding key with custom settings
3053        let embedding_override = ValueTypes {
3054            float_list: Some(FloatListValueType {
3055                vector_index: Some(VectorIndexType {
3056                    enabled: false, // Override default enabled=true to false
3057                    config: VectorIndexConfig {
3058                        space: Some(Space::Ip), // Override default space
3059                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3060                        source_key: Some("custom_embedding_key".to_string()),
3061                        hnsw: None,
3062                        spann: None,
3063                    },
3064                }),
3065            }),
3066            ..Default::default()
3067        };
3068        user_schema
3069            .keys
3070            .insert(EMBEDDING_KEY.to_string(), embedding_override);
3071
3072        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3073
3074        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3075        let vector_config = &embedding_config
3076            .float_list
3077            .as_ref()
3078            .unwrap()
3079            .vector_index
3080            .as_ref()
3081            .unwrap();
3082
3083        // Check user overrides took precedence
3084        assert!(!vector_config.enabled);
3085        assert_eq!(vector_config.config.space, Some(Space::Ip));
3086        assert_eq!(
3087            vector_config.config.source_key,
3088            Some("custom_embedding_key".to_string())
3089        );
3090    }
3091
3092    #[test]
3093    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3094        let collection_config = InternalCollectionConfiguration {
3095            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3096                space: Space::Cosine,
3097                ef_construction: 128,
3098                ef_search: 96,
3099                max_neighbors: 42,
3100                num_threads: 8,
3101                resize_factor: 1.5,
3102                sync_threshold: 2_000,
3103                batch_size: 256,
3104            }),
3105            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3106                EmbeddingFunctionNewConfiguration {
3107                    name: "custom".to_string(),
3108                    config: json!({"alpha": 1}),
3109                },
3110            )),
3111        };
3112
3113        let schema = Schema::try_from(&collection_config).unwrap();
3114        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3115
3116        assert_eq!(reconstructed, collection_config);
3117    }
3118
3119    #[test]
3120    fn test_convert_schema_to_collection_config_spann_roundtrip() {
3121        let spann_config = InternalSpannConfiguration {
3122            space: Space::Cosine,
3123            search_nprobe: 11,
3124            search_rng_factor: 1.7,
3125            write_nprobe: 5,
3126            nreplica_count: 3,
3127            split_threshold: 150,
3128            merge_threshold: 80,
3129            ef_construction: 120,
3130            ef_search: 90,
3131            max_neighbors: 40,
3132            ..Default::default()
3133        };
3134
3135        let collection_config = InternalCollectionConfiguration {
3136            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3137            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3138                EmbeddingFunctionNewConfiguration {
3139                    name: "custom".to_string(),
3140                    config: json!({"beta": true}),
3141                },
3142            )),
3143        };
3144
3145        let schema = Schema::try_from(&collection_config).unwrap();
3146        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3147
3148        assert_eq!(reconstructed, collection_config);
3149    }
3150
3151    #[test]
3152    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3153        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3154        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3155            if let Some(float_list) = &mut embedding.float_list {
3156                if let Some(vector_index) = &mut float_list.vector_index {
3157                    vector_index.config.spann = Some(SpannIndexConfig {
3158                        search_nprobe: Some(1),
3159                        search_rng_factor: Some(1.0),
3160                        search_rng_epsilon: Some(0.1),
3161                        nreplica_count: Some(1),
3162                        write_rng_factor: Some(1.0),
3163                        write_rng_epsilon: Some(0.1),
3164                        split_threshold: Some(100),
3165                        num_samples_kmeans: Some(10),
3166                        initial_lambda: Some(0.5),
3167                        reassign_neighbor_count: Some(10),
3168                        merge_threshold: Some(50),
3169                        num_centers_to_merge_to: Some(3),
3170                        write_nprobe: Some(1),
3171                        ef_construction: Some(50),
3172                        ef_search: Some(40),
3173                        max_neighbors: Some(20),
3174                    });
3175                }
3176            }
3177        }
3178
3179        let result = InternalCollectionConfiguration::try_from(&schema);
3180        assert!(result.is_err());
3181    }
3182
3183    #[test]
3184    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3185        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3186        let before = schema.clone();
3187        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3188        assert!(!modified);
3189        assert_eq!(schema, before);
3190    }
3191
3192    #[test]
3193    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3194        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3195        assert!(!schema.keys.contains_key("custom_field"));
3196
3197        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3198
3199        assert!(modified);
3200        let entry = schema
3201            .keys
3202            .get("custom_field")
3203            .expect("expected new key override to be inserted");
3204        assert_eq!(entry.boolean, schema.defaults.boolean);
3205        assert!(entry.string.is_none());
3206        assert!(entry.int.is_none());
3207        assert!(entry.float.is_none());
3208        assert!(entry.float_list.is_none());
3209        assert!(entry.sparse_vector.is_none());
3210    }
3211
3212    #[test]
3213    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3214        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3215        let initial_len = schema.keys.len();
3216        schema.keys.insert(
3217            "custom_field".to_string(),
3218            ValueTypes {
3219                string: schema.defaults.string.clone(),
3220                ..Default::default()
3221            },
3222        );
3223
3224        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3225
3226        assert!(modified);
3227        assert_eq!(schema.keys.len(), initial_len + 1);
3228        let entry = schema
3229            .keys
3230            .get("custom_field")
3231            .expect("expected key override to exist after ensure call");
3232        assert!(entry.string.is_some());
3233        assert_eq!(entry.boolean, schema.defaults.boolean);
3234    }
3235
3236    #[test]
3237    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3238        let schema = Schema::new_default(KnnIndex::Spann);
3239        let result = schema.is_knn_key_indexing_enabled(
3240            "custom_sparse",
3241            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3242        );
3243
3244        let err = result.expect_err("expected indexing disabled error");
3245        match err {
3246            FilterValidationError::IndexingDisabled { key, value_type } => {
3247                assert_eq!(key, "custom_sparse");
3248                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3249            }
3250            other => panic!("unexpected error variant: {other:?}"),
3251        }
3252    }
3253
3254    #[test]
3255    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3256        let mut schema = Schema::new_default(KnnIndex::Spann);
3257        schema.keys.insert(
3258            "sparse_enabled".to_string(),
3259            ValueTypes {
3260                sparse_vector: Some(SparseVectorValueType {
3261                    sparse_vector_index: Some(SparseVectorIndexType {
3262                        enabled: true,
3263                        config: SparseVectorIndexConfig {
3264                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3265                            source_key: None,
3266                            bm25: None,
3267                        },
3268                    }),
3269                }),
3270                ..Default::default()
3271            },
3272        );
3273
3274        let result = schema.is_knn_key_indexing_enabled(
3275            "sparse_enabled",
3276            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3277        );
3278
3279        assert!(result.is_ok());
3280    }
3281
3282    #[test]
3283    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3284        let schema = Schema::new_default(KnnIndex::Spann);
3285        let result = schema.is_knn_key_indexing_enabled(
3286            EMBEDDING_KEY,
3287            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3288        );
3289
3290        assert!(result.is_ok());
3291    }
3292
3293    #[test]
3294    fn test_merge_hnsw_configs_field_level() {
3295        // Test field-level merging for HNSW configurations
3296        let default_hnsw = HnswIndexConfig {
3297            ef_construction: Some(200),
3298            max_neighbors: Some(16),
3299            ef_search: Some(10),
3300            num_threads: Some(4),
3301            batch_size: Some(100),
3302            sync_threshold: Some(1000),
3303            resize_factor: Some(1.2),
3304        };
3305
3306        let user_hnsw = HnswIndexConfig {
3307            ef_construction: Some(300), // Override
3308            max_neighbors: None,        // Will use default
3309            ef_search: Some(20),        // Override
3310            num_threads: None,          // Will use default
3311            batch_size: None,           // Will use default
3312            sync_threshold: Some(2000), // Override
3313            resize_factor: None,        // Will use default
3314        };
3315
3316        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3317
3318        // Check user overrides
3319        assert_eq!(result.ef_construction, Some(300));
3320        assert_eq!(result.ef_search, Some(20));
3321        assert_eq!(result.sync_threshold, Some(2000));
3322
3323        // Check defaults preserved
3324        assert_eq!(result.max_neighbors, Some(16));
3325        assert_eq!(result.num_threads, Some(4));
3326        assert_eq!(result.batch_size, Some(100));
3327        assert_eq!(result.resize_factor, Some(1.2));
3328    }
3329
3330    #[test]
3331    fn test_merge_spann_configs_field_level() {
3332        // Test field-level merging for SPANN configurations
3333        let default_spann = SpannIndexConfig {
3334            search_nprobe: Some(10),
3335            search_rng_factor: Some(1.0),  // Must be exactly 1.0
3336            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
3337            nreplica_count: Some(3),
3338            write_rng_factor: Some(1.0),  // Must be exactly 1.0
3339            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
3340            split_threshold: Some(100),   // Must be 50-200
3341            num_samples_kmeans: Some(100),
3342            initial_lambda: Some(100.0), // Must be exactly 100.0
3343            reassign_neighbor_count: Some(50),
3344            merge_threshold: Some(50),        // Must be 25-100
3345            num_centers_to_merge_to: Some(4), // Max is 8
3346            write_nprobe: Some(5),
3347            ef_construction: Some(100),
3348            ef_search: Some(10),
3349            max_neighbors: Some(16),
3350        };
3351
3352        let user_spann = SpannIndexConfig {
3353            search_nprobe: Some(20),       // Override
3354            search_rng_factor: None,       // Will use default
3355            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
3356            nreplica_count: None,          // Will use default
3357            write_rng_factor: None,
3358            write_rng_epsilon: None,
3359            split_threshold: Some(150), // Override (valid: 50-200)
3360            num_samples_kmeans: None,
3361            initial_lambda: None,
3362            reassign_neighbor_count: None,
3363            merge_threshold: None,
3364            num_centers_to_merge_to: None,
3365            write_nprobe: None,
3366            ef_construction: None,
3367            ef_search: None,
3368            max_neighbors: None,
3369        };
3370
3371        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3372
3373        // Check user overrides
3374        assert_eq!(result.search_nprobe, Some(20));
3375        assert_eq!(result.search_rng_epsilon, Some(8.0));
3376        assert_eq!(result.split_threshold, Some(150));
3377
3378        // Check defaults preserved
3379        assert_eq!(result.search_rng_factor, Some(1.0));
3380        assert_eq!(result.nreplica_count, Some(3));
3381        assert_eq!(result.initial_lambda, Some(100.0));
3382    }
3383
3384    #[test]
3385    fn test_spann_index_config_into_internal_configuration() {
3386        let config = SpannIndexConfig {
3387            search_nprobe: Some(33),
3388            search_rng_factor: Some(1.2),
3389            search_rng_epsilon: None,
3390            nreplica_count: None,
3391            write_rng_factor: Some(1.5),
3392            write_rng_epsilon: None,
3393            split_threshold: Some(75),
3394            num_samples_kmeans: None,
3395            initial_lambda: Some(0.9),
3396            reassign_neighbor_count: Some(40),
3397            merge_threshold: None,
3398            num_centers_to_merge_to: Some(4),
3399            write_nprobe: Some(60),
3400            ef_construction: Some(180),
3401            ef_search: Some(170),
3402            max_neighbors: Some(32),
3403        };
3404
3405        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3406        assert_eq!(with_space.space, Space::Cosine);
3407        assert_eq!(with_space.search_nprobe, 33);
3408        assert_eq!(with_space.search_rng_factor, 1.2);
3409        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3410        assert_eq!(with_space.write_rng_factor, 1.5);
3411        assert_eq!(with_space.write_nprobe, 60);
3412        assert_eq!(with_space.ef_construction, 180);
3413        assert_eq!(with_space.ef_search, 170);
3414        assert_eq!(with_space.max_neighbors, 32);
3415        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3416
3417        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3418        assert_eq!(default_space_config.space, default_space());
3419    }
3420
3421    #[test]
3422    fn test_merge_string_type_combinations() {
3423        // Test all combinations of default and user StringValueType
3424
3425        // Both Some - should merge
3426        let default = StringValueType {
3427            string_inverted_index: Some(StringInvertedIndexType {
3428                enabled: true,
3429                config: StringInvertedIndexConfig {},
3430            }),
3431            fts_index: Some(FtsIndexType {
3432                enabled: false,
3433                config: FtsIndexConfig {},
3434            }),
3435        };
3436
3437        let user = StringValueType {
3438            string_inverted_index: Some(StringInvertedIndexType {
3439                enabled: false, // Override
3440                config: StringInvertedIndexConfig {},
3441            }),
3442            fts_index: None, // Will use default
3443        };
3444
3445        let result = Schema::merge_string_type(Some(&default), Some(&user))
3446            .unwrap()
3447            .unwrap();
3448        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3449        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3450
3451        // Default Some, User None - should return default
3452        let result = Schema::merge_string_type(Some(&default), None)
3453            .unwrap()
3454            .unwrap();
3455        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3456
3457        // Default None, User Some - should return user
3458        let result = Schema::merge_string_type(None, Some(&user))
3459            .unwrap()
3460            .unwrap();
3461        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3462
3463        // Both None - should return None
3464        let result = Schema::merge_string_type(None, None).unwrap();
3465        assert!(result.is_none());
3466    }
3467
3468    #[test]
3469    fn test_merge_vector_index_config_comprehensive() {
3470        // Test comprehensive vector index config merging
3471        let default_config = VectorIndexConfig {
3472            space: Some(Space::Cosine),
3473            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3474            source_key: Some("default_key".to_string()),
3475            hnsw: Some(HnswIndexConfig {
3476                ef_construction: Some(200),
3477                max_neighbors: Some(16),
3478                ef_search: Some(10),
3479                num_threads: Some(4),
3480                batch_size: Some(100),
3481                sync_threshold: Some(1000),
3482                resize_factor: Some(1.2),
3483            }),
3484            spann: None,
3485        };
3486
3487        let user_config = VectorIndexConfig {
3488            space: Some(Space::L2),                   // Override
3489            embedding_function: None,                 // Will use default
3490            source_key: Some("user_key".to_string()), // Override
3491            hnsw: Some(HnswIndexConfig {
3492                ef_construction: Some(300), // Override
3493                max_neighbors: None,        // Will use default
3494                ef_search: None,            // Will use default
3495                num_threads: None,
3496                batch_size: None,
3497                sync_threshold: None,
3498                resize_factor: None,
3499            }),
3500            spann: Some(SpannIndexConfig {
3501                search_nprobe: Some(15),
3502                search_rng_factor: None,
3503                search_rng_epsilon: None,
3504                nreplica_count: None,
3505                write_rng_factor: None,
3506                write_rng_epsilon: None,
3507                split_threshold: None,
3508                num_samples_kmeans: None,
3509                initial_lambda: None,
3510                reassign_neighbor_count: None,
3511                merge_threshold: None,
3512                num_centers_to_merge_to: None,
3513                write_nprobe: None,
3514                ef_construction: None,
3515                ef_search: None,
3516                max_neighbors: None,
3517            }), // Add SPANN config
3518        };
3519
3520        let result =
3521            Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3522
3523        // Check field-level merging
3524        assert_eq!(result.space, Some(Space::L2)); // User override
3525        assert_eq!(
3526            result.embedding_function,
3527            Some(EmbeddingFunctionConfiguration::Legacy)
3528        ); // Default preserved
3529        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3530
3531        // Check HNSW merging
3532        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3533        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3534
3535        // Check SPANN is not present, since merging in the context of HNSW
3536        assert!(result.spann.is_none());
3537    }
3538
3539    #[test]
3540    fn test_merge_sparse_vector_index_config() {
3541        // Test sparse vector index config merging
3542        let default_config = SparseVectorIndexConfig {
3543            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3544            source_key: Some("default_sparse_key".to_string()),
3545            bm25: None,
3546        };
3547
3548        let user_config = SparseVectorIndexConfig {
3549            embedding_function: None,                        // Will use default
3550            source_key: Some("user_sparse_key".to_string()), // Override
3551            bm25: None,
3552        };
3553
3554        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3555
3556        // Check user override
3557        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3558        // Check default preserved
3559        assert_eq!(
3560            result.embedding_function,
3561            Some(EmbeddingFunctionConfiguration::Legacy)
3562        );
3563    }
3564
3565    #[test]
3566    fn test_complex_nested_merging_scenario() {
3567        // Test a complex scenario with multiple levels of merging
3568        let mut user_schema = Schema {
3569            defaults: ValueTypes::default(),
3570            keys: HashMap::new(),
3571            cmek: None,
3572            source_attached_function_id: None,
3573        };
3574
3575        // Set up complex user defaults
3576        user_schema.defaults.string = Some(StringValueType {
3577            string_inverted_index: Some(StringInvertedIndexType {
3578                enabled: false,
3579                config: StringInvertedIndexConfig {},
3580            }),
3581            fts_index: Some(FtsIndexType {
3582                enabled: true,
3583                config: FtsIndexConfig {},
3584            }),
3585        });
3586
3587        user_schema.defaults.float_list = Some(FloatListValueType {
3588            vector_index: Some(VectorIndexType {
3589                enabled: true,
3590                config: VectorIndexConfig {
3591                    space: Some(Space::Ip),
3592                    embedding_function: None, // Will use default
3593                    source_key: Some("custom_vector_key".to_string()),
3594                    hnsw: Some(HnswIndexConfig {
3595                        ef_construction: Some(400),
3596                        max_neighbors: Some(32),
3597                        ef_search: None, // Will use default
3598                        num_threads: None,
3599                        batch_size: None,
3600                        sync_threshold: None,
3601                        resize_factor: None,
3602                    }),
3603                    spann: None,
3604                },
3605            }),
3606        });
3607
3608        // Set up key overrides
3609        let custom_key_override = ValueTypes {
3610            string: Some(StringValueType {
3611                fts_index: Some(FtsIndexType {
3612                    enabled: true,
3613                    config: FtsIndexConfig {},
3614                }),
3615                string_inverted_index: None,
3616            }),
3617            ..Default::default()
3618        };
3619        user_schema
3620            .keys
3621            .insert("custom_field".to_string(), custom_key_override);
3622
3623        // Use HNSW defaults for this test so we have HNSW config to merge with
3624        let result = {
3625            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3626            let merged_defaults = Schema::merge_value_types(
3627                &default_schema.defaults,
3628                &user_schema.defaults,
3629                KnnIndex::Hnsw,
3630            )
3631            .unwrap();
3632            let mut merged_keys = default_schema.keys.clone();
3633            for (key, user_value_types) in user_schema.keys {
3634                if let Some(default_value_types) = merged_keys.get(&key) {
3635                    let merged_value_types = Schema::merge_value_types(
3636                        default_value_types,
3637                        &user_value_types,
3638                        KnnIndex::Hnsw,
3639                    )
3640                    .unwrap();
3641                    merged_keys.insert(key, merged_value_types);
3642                } else {
3643                    merged_keys.insert(key, user_value_types);
3644                }
3645            }
3646            Schema {
3647                defaults: merged_defaults,
3648                keys: merged_keys,
3649                cmek: None,
3650                source_attached_function_id: None,
3651            }
3652        };
3653
3654        // Verify complex merging worked correctly
3655
3656        // Check defaults merging
3657        assert!(
3658            !result
3659                .defaults
3660                .string
3661                .as_ref()
3662                .unwrap()
3663                .string_inverted_index
3664                .as_ref()
3665                .unwrap()
3666                .enabled
3667        );
3668        assert!(
3669            result
3670                .defaults
3671                .string
3672                .as_ref()
3673                .unwrap()
3674                .fts_index
3675                .as_ref()
3676                .unwrap()
3677                .enabled
3678        );
3679
3680        let vector_config = &result
3681            .defaults
3682            .float_list
3683            .as_ref()
3684            .unwrap()
3685            .vector_index
3686            .as_ref()
3687            .unwrap()
3688            .config;
3689        assert_eq!(vector_config.space, Some(Space::Ip));
3690        assert_eq!(vector_config.embedding_function, None); // Default preserved
3691        assert_eq!(
3692            vector_config.source_key,
3693            Some("custom_vector_key".to_string())
3694        );
3695        assert_eq!(
3696            vector_config.hnsw.as_ref().unwrap().ef_construction,
3697            Some(400)
3698        );
3699        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3700        assert_eq!(
3701            vector_config.hnsw.as_ref().unwrap().ef_search,
3702            Some(default_search_ef())
3703        ); // Default preserved
3704
3705        // Check key overrides
3706        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
3707        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
3708        assert!(result.keys.contains_key("custom_field")); // User added
3709
3710        let custom_override = result.keys.get("custom_field").unwrap();
3711        assert!(
3712            custom_override
3713                .string
3714                .as_ref()
3715                .unwrap()
3716                .fts_index
3717                .as_ref()
3718                .unwrap()
3719                .enabled
3720        );
3721        assert!(custom_override
3722            .string
3723            .as_ref()
3724            .unwrap()
3725            .string_inverted_index
3726            .is_none());
3727    }
3728
3729    #[test]
3730    fn test_reconcile_with_collection_config_default_config() {
3731        // Test that when collection config is default, schema is returned as-is
3732        let collection_config = InternalCollectionConfiguration::default_hnsw();
3733        let schema = Schema::try_from(&collection_config).unwrap();
3734
3735        let result =
3736            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3737                .unwrap();
3738        assert_eq!(result, schema);
3739    }
3740
3741    // Test all 8 cases of double default scenarios
3742    #[test]
3743    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
3744        let collection_config = InternalCollectionConfiguration::default_hnsw();
3745        let schema = Schema::new_default(KnnIndex::Hnsw);
3746        let result =
3747            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3748                .unwrap();
3749
3750        // Should create new schema with default_knn_index (Hnsw)
3751        assert!(result.defaults.float_list.is_some());
3752        assert!(result
3753            .defaults
3754            .float_list
3755            .as_ref()
3756            .unwrap()
3757            .vector_index
3758            .as_ref()
3759            .unwrap()
3760            .config
3761            .hnsw
3762            .is_some());
3763        assert!(result
3764            .defaults
3765            .float_list
3766            .as_ref()
3767            .unwrap()
3768            .vector_index
3769            .as_ref()
3770            .unwrap()
3771            .config
3772            .spann
3773            .is_none());
3774    }
3775
3776    #[test]
3777    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
3778        let collection_config = InternalCollectionConfiguration::default_hnsw();
3779        let schema = Schema::new_default(KnnIndex::Hnsw);
3780        let result =
3781            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3782                .unwrap();
3783
3784        // Should create new schema with default_knn_index (Spann)
3785        assert!(result.defaults.float_list.is_some());
3786        assert!(result
3787            .defaults
3788            .float_list
3789            .as_ref()
3790            .unwrap()
3791            .vector_index
3792            .as_ref()
3793            .unwrap()
3794            .config
3795            .spann
3796            .is_some());
3797        assert!(result
3798            .defaults
3799            .float_list
3800            .as_ref()
3801            .unwrap()
3802            .vector_index
3803            .as_ref()
3804            .unwrap()
3805            .config
3806            .hnsw
3807            .is_none());
3808    }
3809
3810    #[test]
3811    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
3812        let collection_config = InternalCollectionConfiguration::default_hnsw();
3813        let schema = Schema::new_default(KnnIndex::Spann);
3814        let result =
3815            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3816                .unwrap();
3817
3818        // Should create new schema with default_knn_index (Hnsw)
3819        assert!(result.defaults.float_list.is_some());
3820        assert!(result
3821            .defaults
3822            .float_list
3823            .as_ref()
3824            .unwrap()
3825            .vector_index
3826            .as_ref()
3827            .unwrap()
3828            .config
3829            .hnsw
3830            .is_some());
3831        assert!(result
3832            .defaults
3833            .float_list
3834            .as_ref()
3835            .unwrap()
3836            .vector_index
3837            .as_ref()
3838            .unwrap()
3839            .config
3840            .spann
3841            .is_none());
3842    }
3843
3844    #[test]
3845    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
3846        let collection_config = InternalCollectionConfiguration::default_hnsw();
3847        let schema = Schema::new_default(KnnIndex::Spann);
3848        let result =
3849            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3850                .unwrap();
3851
3852        // Should create new schema with default_knn_index (Spann)
3853        assert!(result.defaults.float_list.is_some());
3854        assert!(result
3855            .defaults
3856            .float_list
3857            .as_ref()
3858            .unwrap()
3859            .vector_index
3860            .as_ref()
3861            .unwrap()
3862            .config
3863            .spann
3864            .is_some());
3865        assert!(result
3866            .defaults
3867            .float_list
3868            .as_ref()
3869            .unwrap()
3870            .vector_index
3871            .as_ref()
3872            .unwrap()
3873            .config
3874            .hnsw
3875            .is_none());
3876    }
3877
3878    #[test]
3879    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
3880        let collection_config = InternalCollectionConfiguration::default_spann();
3881        let schema = Schema::new_default(KnnIndex::Spann);
3882        let result =
3883            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3884                .unwrap();
3885
3886        // Should create new schema with default_knn_index (Hnsw)
3887        assert!(result.defaults.float_list.is_some());
3888        assert!(result
3889            .defaults
3890            .float_list
3891            .as_ref()
3892            .unwrap()
3893            .vector_index
3894            .as_ref()
3895            .unwrap()
3896            .config
3897            .hnsw
3898            .is_some());
3899        assert!(result
3900            .defaults
3901            .float_list
3902            .as_ref()
3903            .unwrap()
3904            .vector_index
3905            .as_ref()
3906            .unwrap()
3907            .config
3908            .spann
3909            .is_none());
3910    }
3911
3912    #[test]
3913    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
3914        let collection_config = InternalCollectionConfiguration::default_spann();
3915        let schema = Schema::new_default(KnnIndex::Spann);
3916        let result =
3917            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3918                .unwrap();
3919
3920        // Should create new schema with default_knn_index (Spann)
3921        assert!(result.defaults.float_list.is_some());
3922        assert!(result
3923            .defaults
3924            .float_list
3925            .as_ref()
3926            .unwrap()
3927            .vector_index
3928            .as_ref()
3929            .unwrap()
3930            .config
3931            .spann
3932            .is_some());
3933        assert!(result
3934            .defaults
3935            .float_list
3936            .as_ref()
3937            .unwrap()
3938            .vector_index
3939            .as_ref()
3940            .unwrap()
3941            .config
3942            .hnsw
3943            .is_none());
3944        // Defaults should have source_key=None
3945        assert_eq!(
3946            result
3947                .defaults
3948                .float_list
3949                .as_ref()
3950                .unwrap()
3951                .vector_index
3952                .as_ref()
3953                .unwrap()
3954                .config
3955                .source_key,
3956            None
3957        );
3958    }
3959
3960    #[test]
3961    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
3962        let collection_config = InternalCollectionConfiguration::default_spann();
3963        let schema = Schema::new_default(KnnIndex::Hnsw);
3964        let result =
3965            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3966                .unwrap();
3967
3968        // Should create new schema with default_knn_index (Hnsw)
3969        assert!(result.defaults.float_list.is_some());
3970        assert!(result
3971            .defaults
3972            .float_list
3973            .as_ref()
3974            .unwrap()
3975            .vector_index
3976            .as_ref()
3977            .unwrap()
3978            .config
3979            .hnsw
3980            .is_some());
3981        assert!(result
3982            .defaults
3983            .float_list
3984            .as_ref()
3985            .unwrap()
3986            .vector_index
3987            .as_ref()
3988            .unwrap()
3989            .config
3990            .spann
3991            .is_none());
3992    }
3993
3994    #[test]
3995    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
3996        let collection_config = InternalCollectionConfiguration::default_spann();
3997        let schema = Schema::new_default(KnnIndex::Hnsw);
3998        let result =
3999            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4000                .unwrap();
4001
4002        // Should create new schema with default_knn_index (Spann)
4003        assert!(result.defaults.float_list.is_some());
4004        assert!(result
4005            .defaults
4006            .float_list
4007            .as_ref()
4008            .unwrap()
4009            .vector_index
4010            .as_ref()
4011            .unwrap()
4012            .config
4013            .spann
4014            .is_some());
4015        assert!(result
4016            .defaults
4017            .float_list
4018            .as_ref()
4019            .unwrap()
4020            .vector_index
4021            .as_ref()
4022            .unwrap()
4023            .config
4024            .hnsw
4025            .is_none());
4026    }
4027
4028    #[test]
4029    fn test_defaults_source_key_not_document() {
4030        // Test that defaults.float_list.vector_index.config.source_key is None, not DOCUMENT_KEY
4031        let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4032        let schema_spann = Schema::new_default(KnnIndex::Spann);
4033
4034        // Check HNSW default schema
4035        let defaults_hnsw = schema_hnsw
4036            .defaults
4037            .float_list
4038            .as_ref()
4039            .unwrap()
4040            .vector_index
4041            .as_ref()
4042            .unwrap();
4043        assert_eq!(defaults_hnsw.config.source_key, None);
4044
4045        // Check Spann default schema
4046        let defaults_spann = schema_spann
4047            .defaults
4048            .float_list
4049            .as_ref()
4050            .unwrap()
4051            .vector_index
4052            .as_ref()
4053            .unwrap();
4054        assert_eq!(defaults_spann.config.source_key, None);
4055
4056        // Test after reconcile with NON-default collection config
4057        // This path calls try_from where our fix is
4058        let collection_config_hnsw = InternalCollectionConfiguration {
4059            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4060                ef_construction: 300,
4061                max_neighbors: 32,
4062                ef_search: 50,
4063                num_threads: 8,
4064                batch_size: 200,
4065                sync_threshold: 2000,
4066                resize_factor: 1.5,
4067                space: Space::L2,
4068            }),
4069            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4070        };
4071        let result_hnsw = Schema::reconcile_with_collection_config(
4072            &schema_hnsw,
4073            &collection_config_hnsw,
4074            KnnIndex::Hnsw,
4075        )
4076        .unwrap();
4077        let reconciled_defaults_hnsw = result_hnsw
4078            .defaults
4079            .float_list
4080            .as_ref()
4081            .unwrap()
4082            .vector_index
4083            .as_ref()
4084            .unwrap();
4085        assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4086
4087        let collection_config_spann = InternalCollectionConfiguration {
4088            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4089                search_nprobe: 20,
4090                search_rng_factor: 3.0,
4091                search_rng_epsilon: 0.2,
4092                nreplica_count: 5,
4093                write_rng_factor: 2.0,
4094                write_rng_epsilon: 0.1,
4095                split_threshold: 2000,
4096                num_samples_kmeans: 200,
4097                initial_lambda: 0.8,
4098                reassign_neighbor_count: 100,
4099                merge_threshold: 800,
4100                num_centers_to_merge_to: 20,
4101                write_nprobe: 10,
4102                ef_construction: 400,
4103                ef_search: 60,
4104                max_neighbors: 24,
4105                space: Space::Cosine,
4106            }),
4107            embedding_function: None,
4108        };
4109        let result_spann = Schema::reconcile_with_collection_config(
4110            &schema_spann,
4111            &collection_config_spann,
4112            KnnIndex::Spann,
4113        )
4114        .unwrap();
4115        let reconciled_defaults_spann = result_spann
4116            .defaults
4117            .float_list
4118            .as_ref()
4119            .unwrap()
4120            .vector_index
4121            .as_ref()
4122            .unwrap();
4123        assert_eq!(reconciled_defaults_spann.config.source_key, None);
4124
4125        // Verify that #embedding key DOES have source_key set to DOCUMENT_KEY
4126        let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4127        let embedding_vector_index_hnsw = embedding_hnsw
4128            .float_list
4129            .as_ref()
4130            .unwrap()
4131            .vector_index
4132            .as_ref()
4133            .unwrap();
4134        assert_eq!(
4135            embedding_vector_index_hnsw.config.source_key,
4136            Some(DOCUMENT_KEY.to_string())
4137        );
4138
4139        let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4140        let embedding_vector_index_spann = embedding_spann
4141            .float_list
4142            .as_ref()
4143            .unwrap()
4144            .vector_index
4145            .as_ref()
4146            .unwrap();
4147        assert_eq!(
4148            embedding_vector_index_spann.config.source_key,
4149            Some(DOCUMENT_KEY.to_string())
4150        );
4151    }
4152
4153    #[test]
4154    fn test_try_from_source_key() {
4155        // Direct test of try_from to verify source_key behavior
4156        // Defaults should have source_key=None, #embedding should have source_key=DOCUMENT_KEY
4157
4158        // Test with HNSW config
4159        let collection_config_hnsw = InternalCollectionConfiguration {
4160            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4161                ef_construction: 300,
4162                max_neighbors: 32,
4163                ef_search: 50,
4164                num_threads: 8,
4165                batch_size: 200,
4166                sync_threshold: 2000,
4167                resize_factor: 1.5,
4168                space: Space::L2,
4169            }),
4170            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4171        };
4172        let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4173
4174        // Check defaults have source_key=None
4175        let defaults_hnsw = schema_hnsw
4176            .defaults
4177            .float_list
4178            .as_ref()
4179            .unwrap()
4180            .vector_index
4181            .as_ref()
4182            .unwrap();
4183        assert_eq!(defaults_hnsw.config.source_key, None);
4184
4185        // Check #embedding has source_key=DOCUMENT_KEY
4186        let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4187        let embedding_vector_index_hnsw = embedding_hnsw
4188            .float_list
4189            .as_ref()
4190            .unwrap()
4191            .vector_index
4192            .as_ref()
4193            .unwrap();
4194        assert_eq!(
4195            embedding_vector_index_hnsw.config.source_key,
4196            Some(DOCUMENT_KEY.to_string())
4197        );
4198
4199        // Test with Spann config
4200        let collection_config_spann = InternalCollectionConfiguration {
4201            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4202                search_nprobe: 20,
4203                search_rng_factor: 3.0,
4204                search_rng_epsilon: 0.2,
4205                nreplica_count: 5,
4206                write_rng_factor: 2.0,
4207                write_rng_epsilon: 0.1,
4208                split_threshold: 2000,
4209                num_samples_kmeans: 200,
4210                initial_lambda: 0.8,
4211                reassign_neighbor_count: 100,
4212                merge_threshold: 800,
4213                num_centers_to_merge_to: 20,
4214                write_nprobe: 10,
4215                ef_construction: 400,
4216                ef_search: 60,
4217                max_neighbors: 24,
4218                space: Space::Cosine,
4219            }),
4220            embedding_function: None,
4221        };
4222        let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4223
4224        // Check defaults have source_key=None
4225        let defaults_spann = schema_spann
4226            .defaults
4227            .float_list
4228            .as_ref()
4229            .unwrap()
4230            .vector_index
4231            .as_ref()
4232            .unwrap();
4233        assert_eq!(defaults_spann.config.source_key, None);
4234
4235        // Check #embedding has source_key=DOCUMENT_KEY
4236        let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4237        let embedding_vector_index_spann = embedding_spann
4238            .float_list
4239            .as_ref()
4240            .unwrap()
4241            .vector_index
4242            .as_ref()
4243            .unwrap();
4244        assert_eq!(
4245            embedding_vector_index_spann.config.source_key,
4246            Some(DOCUMENT_KEY.to_string())
4247        );
4248    }
4249
4250    #[test]
4251    fn test_default_hnsw_with_default_embedding_function() {
4252        // Test that when InternalCollectionConfiguration is default HNSW but has
4253        // an embedding function with name "default" and config as {}, it still
4254        // goes through the double default path and preserves source_key behavior
4255        use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4256
4257        let collection_config = InternalCollectionConfiguration {
4258            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4259            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4260                EmbeddingFunctionNewConfiguration {
4261                    name: "default".to_string(),
4262                    config: serde_json::json!({}),
4263                },
4264            )),
4265        };
4266
4267        // Verify it's still considered default
4268        assert!(collection_config.is_default());
4269
4270        let schema = Schema::new_default(KnnIndex::Hnsw);
4271        let result =
4272            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4273                .unwrap();
4274
4275        // Check that defaults have source_key=None
4276        let defaults = result
4277            .defaults
4278            .float_list
4279            .as_ref()
4280            .unwrap()
4281            .vector_index
4282            .as_ref()
4283            .unwrap();
4284        assert_eq!(defaults.config.source_key, None);
4285
4286        // Check that #embedding has source_key=DOCUMENT_KEY
4287        let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4288        let embedding_vector_index = embedding
4289            .float_list
4290            .as_ref()
4291            .unwrap()
4292            .vector_index
4293            .as_ref()
4294            .unwrap();
4295        assert_eq!(
4296            embedding_vector_index.config.source_key,
4297            Some(DOCUMENT_KEY.to_string())
4298        );
4299
4300        // verify vector index config is set to spann
4301        let vector_index_config = defaults.config.clone();
4302        assert!(vector_index_config.spann.is_some());
4303        assert!(vector_index_config.hnsw.is_none());
4304
4305        // Verify embedding function was set correctly
4306        assert_eq!(
4307            embedding_vector_index.config.embedding_function,
4308            Some(EmbeddingFunctionConfiguration::Known(
4309                EmbeddingFunctionNewConfiguration {
4310                    name: "default".to_string(),
4311                    config: serde_json::json!({}),
4312                },
4313            ))
4314        );
4315        assert_eq!(
4316            defaults.config.embedding_function,
4317            Some(EmbeddingFunctionConfiguration::Known(
4318                EmbeddingFunctionNewConfiguration {
4319                    name: "default".to_string(),
4320                    config: serde_json::json!({}),
4321                },
4322            ))
4323        );
4324    }
4325
4326    #[test]
4327    fn test_reconcile_with_collection_config_both_non_default() {
4328        // Test that when both schema and collection config are non-default, it returns an error
4329        let mut schema = Schema::new_default(KnnIndex::Hnsw);
4330        schema.defaults.string = Some(StringValueType {
4331            fts_index: Some(FtsIndexType {
4332                enabled: true,
4333                config: FtsIndexConfig {},
4334            }),
4335            string_inverted_index: None,
4336        });
4337
4338        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4339        // Make collection config non-default by changing a parameter
4340        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4341        {
4342            hnsw_config.ef_construction = 500; // Non-default value
4343        }
4344
4345        // Use reconcile_schema_and_config which has the early validation
4346        let result = Schema::reconcile_schema_and_config(
4347            Some(&schema),
4348            Some(&collection_config),
4349            KnnIndex::Spann,
4350        );
4351        assert!(result.is_err());
4352        assert!(matches!(
4353            result.unwrap_err(),
4354            SchemaError::ConfigAndSchemaConflict
4355        ));
4356    }
4357
4358    #[test]
4359    fn test_reconcile_with_collection_config_hnsw_override() {
4360        // Test that non-default HNSW collection config overrides default schema
4361        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
4362
4363        let collection_config = InternalCollectionConfiguration {
4364            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4365                ef_construction: 300,
4366                max_neighbors: 32,
4367                ef_search: 50,
4368                num_threads: 8,
4369                batch_size: 200,
4370                sync_threshold: 2000,
4371                resize_factor: 1.5,
4372                space: Space::L2,
4373            }),
4374            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4375        };
4376
4377        let result =
4378            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4379                .unwrap();
4380
4381        // Check that #embedding key override was created with the collection config settings
4382        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4383        let vector_index = embedding_override
4384            .float_list
4385            .as_ref()
4386            .unwrap()
4387            .vector_index
4388            .as_ref()
4389            .unwrap();
4390
4391        assert!(vector_index.enabled);
4392        assert_eq!(vector_index.config.space, Some(Space::L2));
4393        assert_eq!(
4394            vector_index.config.embedding_function,
4395            Some(EmbeddingFunctionConfiguration::Legacy)
4396        );
4397        assert_eq!(
4398            vector_index.config.source_key,
4399            Some(DOCUMENT_KEY.to_string())
4400        );
4401
4402        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4403        assert_eq!(hnsw_config.ef_construction, Some(300));
4404        assert_eq!(hnsw_config.max_neighbors, Some(32));
4405        assert_eq!(hnsw_config.ef_search, Some(50));
4406        assert_eq!(hnsw_config.num_threads, Some(8));
4407        assert_eq!(hnsw_config.batch_size, Some(200));
4408        assert_eq!(hnsw_config.sync_threshold, Some(2000));
4409        assert_eq!(hnsw_config.resize_factor, Some(1.5));
4410
4411        assert!(vector_index.config.spann.is_none());
4412    }
4413
4414    #[test]
4415    fn test_reconcile_with_collection_config_spann_override() {
4416        // Test that non-default SPANN collection config overrides default schema
4417        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
4418
4419        let collection_config = InternalCollectionConfiguration {
4420            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4421                search_nprobe: 20,
4422                search_rng_factor: 3.0,
4423                search_rng_epsilon: 0.2,
4424                nreplica_count: 5,
4425                write_rng_factor: 2.0,
4426                write_rng_epsilon: 0.1,
4427                split_threshold: 2000,
4428                num_samples_kmeans: 200,
4429                initial_lambda: 0.8,
4430                reassign_neighbor_count: 100,
4431                merge_threshold: 800,
4432                num_centers_to_merge_to: 20,
4433                write_nprobe: 10,
4434                ef_construction: 400,
4435                ef_search: 60,
4436                max_neighbors: 24,
4437                space: Space::Cosine,
4438            }),
4439            embedding_function: None,
4440        };
4441
4442        let result =
4443            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4444                .unwrap();
4445
4446        // Check that #embedding key override was created with the collection config settings
4447        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4448        let vector_index = embedding_override
4449            .float_list
4450            .as_ref()
4451            .unwrap()
4452            .vector_index
4453            .as_ref()
4454            .unwrap();
4455
4456        assert!(vector_index.enabled);
4457        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4458        assert_eq!(vector_index.config.embedding_function, None);
4459        assert_eq!(
4460            vector_index.config.source_key,
4461            Some(DOCUMENT_KEY.to_string())
4462        );
4463
4464        assert!(vector_index.config.hnsw.is_none());
4465
4466        let spann_config = vector_index.config.spann.as_ref().unwrap();
4467        assert_eq!(spann_config.search_nprobe, Some(20));
4468        assert_eq!(spann_config.search_rng_factor, Some(3.0));
4469        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4470        assert_eq!(spann_config.nreplica_count, Some(5));
4471        assert_eq!(spann_config.write_rng_factor, Some(2.0));
4472        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4473        assert_eq!(spann_config.split_threshold, Some(2000));
4474        assert_eq!(spann_config.num_samples_kmeans, Some(200));
4475        assert_eq!(spann_config.initial_lambda, Some(0.8));
4476        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4477        assert_eq!(spann_config.merge_threshold, Some(800));
4478        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4479        assert_eq!(spann_config.write_nprobe, Some(10));
4480        assert_eq!(spann_config.ef_construction, Some(400));
4481        assert_eq!(spann_config.ef_search, Some(60));
4482        assert_eq!(spann_config.max_neighbors, Some(24));
4483    }
4484
4485    #[test]
4486    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4487        // Test that collection config updates BOTH defaults.float_list.vector_index
4488        // AND keys["embedding"].float_list.vector_index
4489        let schema = Schema::new_default(KnnIndex::Hnsw);
4490
4491        let collection_config = InternalCollectionConfiguration {
4492            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4493                ef_construction: 300,
4494                max_neighbors: 32,
4495                ef_search: 50,
4496                num_threads: 8,
4497                batch_size: 200,
4498                sync_threshold: 2000,
4499                resize_factor: 1.5,
4500                space: Space::L2,
4501            }),
4502            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4503        };
4504
4505        let result =
4506            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4507                .unwrap();
4508
4509        // Check that defaults.float_list.vector_index was updated
4510        let defaults_vector_index = result
4511            .defaults
4512            .float_list
4513            .as_ref()
4514            .unwrap()
4515            .vector_index
4516            .as_ref()
4517            .unwrap();
4518
4519        // Should be disabled in defaults (template for new keys)
4520        assert!(!defaults_vector_index.enabled);
4521        // But config should be updated
4522        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4523        assert_eq!(
4524            defaults_vector_index.config.embedding_function,
4525            Some(EmbeddingFunctionConfiguration::Legacy)
4526        );
4527        assert_eq!(defaults_vector_index.config.source_key, None);
4528        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4529        assert_eq!(defaults_hnsw.ef_construction, Some(300));
4530        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4531
4532        // Check that #embedding key override was also updated
4533        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4534        let embedding_vector_index = embedding_override
4535            .float_list
4536            .as_ref()
4537            .unwrap()
4538            .vector_index
4539            .as_ref()
4540            .unwrap();
4541
4542        // Should be enabled on #embedding
4543        assert!(embedding_vector_index.enabled);
4544        // Config should match defaults
4545        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4546        assert_eq!(
4547            embedding_vector_index.config.embedding_function,
4548            Some(EmbeddingFunctionConfiguration::Legacy)
4549        );
4550        assert_eq!(
4551            embedding_vector_index.config.source_key,
4552            Some(DOCUMENT_KEY.to_string())
4553        );
4554        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4555        assert_eq!(embedding_hnsw.ef_construction, Some(300));
4556        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4557    }
4558
4559    #[test]
4560    fn test_is_schema_default() {
4561        // Test that actual default schemas are correctly identified
4562        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4563        assert!(default_hnsw_schema.is_default());
4564
4565        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4566        assert!(default_spann_schema.is_default());
4567
4568        // Test that a modified default schema is not considered default
4569        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4570        // Make a clear modification - change the string inverted index enabled state
4571        if let Some(ref mut string_type) = modified_schema.defaults.string {
4572            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4573                string_inverted.enabled = false; // Default is true, so this should make it non-default
4574            }
4575        }
4576        assert!(!modified_schema.is_default());
4577
4578        // Test that schema with additional key overrides is not default
4579        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4580        schema_with_extra_overrides
4581            .keys
4582            .insert("custom_key".to_string(), ValueTypes::default());
4583        assert!(!schema_with_extra_overrides.is_default());
4584    }
4585
4586    #[test]
4587    fn test_is_schema_default_with_space() {
4588        let schema = Schema::new_default(KnnIndex::Hnsw);
4589        assert!(schema.is_default());
4590
4591        let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4592        if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4593            if let Some(ref mut vector_index) = float_list.vector_index {
4594                vector_index.config.space = Some(Space::Cosine);
4595            }
4596        }
4597        assert!(!schema_with_space.is_default());
4598
4599        let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4600        if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4601            .keys
4602            .get_mut(EMBEDDING_KEY)
4603        {
4604            if let Some(ref mut float_list) = embedding_key.float_list {
4605                if let Some(ref mut vector_index) = float_list.vector_index {
4606                    vector_index.config.space = Some(Space::Cosine);
4607                }
4608            }
4609        }
4610        assert!(!schema_with_space_in_embedding_key.is_default());
4611    }
4612
4613    #[test]
4614    fn test_is_schema_default_with_embedding_function() {
4615        let schema = Schema::new_default(KnnIndex::Hnsw);
4616        assert!(schema.is_default());
4617
4618        let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4619        if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4620            if let Some(ref mut vector_index) = float_list.vector_index {
4621                vector_index.config.embedding_function =
4622                    Some(EmbeddingFunctionConfiguration::Legacy);
4623            }
4624        }
4625        assert!(!schema_with_embedding_function.is_default());
4626
4627        let mut schema_with_embedding_function_in_embedding_key =
4628            Schema::new_default(KnnIndex::Spann);
4629        if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4630            .keys
4631            .get_mut(EMBEDDING_KEY)
4632        {
4633            if let Some(ref mut float_list) = embedding_key.float_list {
4634                if let Some(ref mut vector_index) = float_list.vector_index {
4635                    vector_index.config.embedding_function =
4636                        Some(EmbeddingFunctionConfiguration::Legacy);
4637                }
4638            }
4639        }
4640        assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4641    }
4642
4643    #[test]
4644    fn test_add_merges_keys_by_value_type() {
4645        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4646        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4647
4648        let string_override = ValueTypes {
4649            string: Some(StringValueType {
4650                string_inverted_index: Some(StringInvertedIndexType {
4651                    enabled: true,
4652                    config: StringInvertedIndexConfig {},
4653                }),
4654                fts_index: None,
4655            }),
4656            ..Default::default()
4657        };
4658        schema_a
4659            .keys
4660            .insert("custom_field".to_string(), string_override);
4661
4662        let float_override = ValueTypes {
4663            float: Some(FloatValueType {
4664                float_inverted_index: Some(FloatInvertedIndexType {
4665                    enabled: true,
4666                    config: FloatInvertedIndexConfig {},
4667                }),
4668            }),
4669            ..Default::default()
4670        };
4671        schema_b
4672            .keys
4673            .insert("custom_field".to_string(), float_override);
4674
4675        let merged = schema_a.merge(&schema_b).unwrap();
4676        let merged_override = merged.keys.get("custom_field").unwrap();
4677
4678        assert!(merged_override.string.is_some());
4679        assert!(merged_override.float.is_some());
4680        assert!(
4681            merged_override
4682                .string
4683                .as_ref()
4684                .unwrap()
4685                .string_inverted_index
4686                .as_ref()
4687                .unwrap()
4688                .enabled
4689        );
4690        assert!(
4691            merged_override
4692                .float
4693                .as_ref()
4694                .unwrap()
4695                .float_inverted_index
4696                .as_ref()
4697                .unwrap()
4698                .enabled
4699        );
4700    }
4701
4702    #[test]
4703    fn test_add_rejects_different_defaults() {
4704        let schema_a = Schema::new_default(KnnIndex::Hnsw);
4705        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4706
4707        if let Some(string_type) = schema_b.defaults.string.as_mut() {
4708            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
4709                string_index.enabled = false;
4710            }
4711        }
4712
4713        let err = schema_a.merge(&schema_b).unwrap_err();
4714        assert!(matches!(err, SchemaError::DefaultsMismatch));
4715    }
4716
4717    #[test]
4718    fn test_add_detects_conflicting_value_type_configuration() {
4719        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4720        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4721
4722        let string_override_enabled = ValueTypes {
4723            string: Some(StringValueType {
4724                string_inverted_index: Some(StringInvertedIndexType {
4725                    enabled: true,
4726                    config: StringInvertedIndexConfig {},
4727                }),
4728                fts_index: None,
4729            }),
4730            ..Default::default()
4731        };
4732        schema_a
4733            .keys
4734            .insert("custom_field".to_string(), string_override_enabled);
4735
4736        let string_override_disabled = ValueTypes {
4737            string: Some(StringValueType {
4738                string_inverted_index: Some(StringInvertedIndexType {
4739                    enabled: false,
4740                    config: StringInvertedIndexConfig {},
4741                }),
4742                fts_index: None,
4743            }),
4744            ..Default::default()
4745        };
4746        schema_b
4747            .keys
4748            .insert("custom_field".to_string(), string_override_disabled);
4749
4750        let err = schema_a.merge(&schema_b).unwrap_err();
4751        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
4752    }
4753
4754    // TODO(Sanket): Remove this test once deployed
4755    #[test]
4756    fn test_backward_compatibility_aliases() {
4757        // Test that old format with # and $ prefixes and key_overrides can be deserialized
4758        let old_format_json = r###"{
4759            "defaults": {
4760                "#string": {
4761                    "$fts_index": {
4762                        "enabled": true,
4763                        "config": {}
4764                    }
4765                },
4766                "#int": {
4767                    "$int_inverted_index": {
4768                        "enabled": true,
4769                        "config": {}
4770                    }
4771                },
4772                "#float_list": {
4773                    "$vector_index": {
4774                        "enabled": true,
4775                        "config": {
4776                            "spann": {
4777                                "search_nprobe": 10
4778                            }
4779                        }
4780                    }
4781                }
4782            },
4783            "key_overrides": {
4784                "#document": {
4785                    "#string": {
4786                        "$fts_index": {
4787                            "enabled": false,
4788                            "config": {}
4789                        }
4790                    }
4791                }
4792            }
4793        }"###;
4794
4795        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
4796
4797        // Test that new format without prefixes and keys can be deserialized
4798        let new_format_json = r###"{
4799            "defaults": {
4800                "string": {
4801                    "fts_index": {
4802                        "enabled": true,
4803                        "config": {}
4804                    }
4805                },
4806                "int": {
4807                    "int_inverted_index": {
4808                        "enabled": true,
4809                        "config": {}
4810                    }
4811                },
4812                "float_list": {
4813                    "vector_index": {
4814                        "enabled": true,
4815                        "config": {
4816                            "spann": {
4817                                "search_nprobe": 10
4818                            }
4819                        }
4820                    }
4821                }
4822            },
4823            "keys": {
4824                "#document": {
4825                    "string": {
4826                        "fts_index": {
4827                            "enabled": false,
4828                            "config": {}
4829                        }
4830                    }
4831                }
4832            }
4833        }"###;
4834
4835        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4836
4837        // Both should deserialize to the same structure
4838        assert_eq!(schema_from_old, schema_from_new);
4839
4840        // Verify the deserialized content is correct
4841        assert!(schema_from_old.defaults.string.is_some());
4842        assert!(schema_from_old
4843            .defaults
4844            .string
4845            .as_ref()
4846            .unwrap()
4847            .fts_index
4848            .is_some());
4849        assert!(
4850            schema_from_old
4851                .defaults
4852                .string
4853                .as_ref()
4854                .unwrap()
4855                .fts_index
4856                .as_ref()
4857                .unwrap()
4858                .enabled
4859        );
4860
4861        assert!(schema_from_old.defaults.int.is_some());
4862        assert!(schema_from_old
4863            .defaults
4864            .int
4865            .as_ref()
4866            .unwrap()
4867            .int_inverted_index
4868            .is_some());
4869
4870        assert!(schema_from_old.defaults.float_list.is_some());
4871        assert!(schema_from_old
4872            .defaults
4873            .float_list
4874            .as_ref()
4875            .unwrap()
4876            .vector_index
4877            .is_some());
4878
4879        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4880        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4881        assert!(doc_override.string.is_some());
4882        assert!(
4883            !doc_override
4884                .string
4885                .as_ref()
4886                .unwrap()
4887                .fts_index
4888                .as_ref()
4889                .unwrap()
4890                .enabled
4891        );
4892
4893        // Test that serialization always outputs the new format (without prefixes)
4894        let serialized = serde_json::to_string(&schema_from_old).unwrap();
4895
4896        // Should contain new format keys
4897        assert!(serialized.contains(r#""keys":"#));
4898        assert!(serialized.contains(r#""string":"#));
4899        assert!(serialized.contains(r#""fts_index":"#));
4900        assert!(serialized.contains(r#""int_inverted_index":"#));
4901        assert!(serialized.contains(r#""vector_index":"#));
4902
4903        // Should NOT contain old format keys
4904        assert!(!serialized.contains(r#""key_overrides":"#));
4905        assert!(!serialized.contains(r###""#string":"###));
4906        assert!(!serialized.contains(r###""$fts_index":"###));
4907        assert!(!serialized.contains(r###""$int_inverted_index":"###));
4908        assert!(!serialized.contains(r###""$vector_index":"###));
4909    }
4910
4911    #[test]
4912    fn test_hnsw_index_config_validation() {
4913        use validator::Validate;
4914
4915        // Valid configuration - should pass
4916        let valid_config = HnswIndexConfig {
4917            batch_size: Some(10),
4918            sync_threshold: Some(100),
4919            ef_construction: Some(100),
4920            max_neighbors: Some(16),
4921            ..Default::default()
4922        };
4923        assert!(valid_config.validate().is_ok());
4924
4925        // Invalid: batch_size too small (min 2)
4926        let invalid_batch_size = HnswIndexConfig {
4927            batch_size: Some(1),
4928            ..Default::default()
4929        };
4930        assert!(invalid_batch_size.validate().is_err());
4931
4932        // Invalid: sync_threshold too small (min 2)
4933        let invalid_sync_threshold = HnswIndexConfig {
4934            sync_threshold: Some(1),
4935            ..Default::default()
4936        };
4937        assert!(invalid_sync_threshold.validate().is_err());
4938
4939        // Valid: boundary values (exactly 2) should pass
4940        let boundary_config = HnswIndexConfig {
4941            batch_size: Some(2),
4942            sync_threshold: Some(2),
4943            ..Default::default()
4944        };
4945        assert!(boundary_config.validate().is_ok());
4946
4947        // Valid: None values should pass validation
4948        let all_none_config = HnswIndexConfig {
4949            ..Default::default()
4950        };
4951        assert!(all_none_config.validate().is_ok());
4952
4953        // Valid: fields without validation can be any value
4954        let other_fields_config = HnswIndexConfig {
4955            ef_construction: Some(1),
4956            max_neighbors: Some(1),
4957            ef_search: Some(1),
4958            num_threads: Some(1),
4959            resize_factor: Some(0.1),
4960            ..Default::default()
4961        };
4962        assert!(other_fields_config.validate().is_ok());
4963    }
4964
4965    #[test]
4966    fn test_spann_index_config_validation() {
4967        use validator::Validate;
4968
4969        // Valid configuration - should pass
4970        let valid_config = SpannIndexConfig {
4971            write_nprobe: Some(32),
4972            nreplica_count: Some(4),
4973            split_threshold: Some(100),
4974            merge_threshold: Some(50),
4975            reassign_neighbor_count: Some(32),
4976            num_centers_to_merge_to: Some(4),
4977            ef_construction: Some(100),
4978            ef_search: Some(100),
4979            max_neighbors: Some(32),
4980            search_rng_factor: Some(1.0),
4981            write_rng_factor: Some(1.0),
4982            search_rng_epsilon: Some(7.5),
4983            write_rng_epsilon: Some(7.5),
4984            ..Default::default()
4985        };
4986        assert!(valid_config.validate().is_ok());
4987
4988        // Invalid: write_nprobe too large (max 64)
4989        let invalid_write_nprobe = SpannIndexConfig {
4990            write_nprobe: Some(200),
4991            ..Default::default()
4992        };
4993        assert!(invalid_write_nprobe.validate().is_err());
4994
4995        // Invalid: split_threshold too small (min 50)
4996        let invalid_split_threshold = SpannIndexConfig {
4997            split_threshold: Some(10),
4998            ..Default::default()
4999        };
5000        assert!(invalid_split_threshold.validate().is_err());
5001
5002        // Invalid: split_threshold too large (max 200)
5003        let invalid_split_threshold_high = SpannIndexConfig {
5004            split_threshold: Some(250),
5005            ..Default::default()
5006        };
5007        assert!(invalid_split_threshold_high.validate().is_err());
5008
5009        // Invalid: nreplica_count too large (max 8)
5010        let invalid_nreplica = SpannIndexConfig {
5011            nreplica_count: Some(10),
5012            ..Default::default()
5013        };
5014        assert!(invalid_nreplica.validate().is_err());
5015
5016        // Invalid: reassign_neighbor_count too large (max 64)
5017        let invalid_reassign = SpannIndexConfig {
5018            reassign_neighbor_count: Some(100),
5019            ..Default::default()
5020        };
5021        assert!(invalid_reassign.validate().is_err());
5022
5023        // Invalid: merge_threshold out of range (min 25, max 100)
5024        let invalid_merge_threshold_low = SpannIndexConfig {
5025            merge_threshold: Some(5),
5026            ..Default::default()
5027        };
5028        assert!(invalid_merge_threshold_low.validate().is_err());
5029
5030        let invalid_merge_threshold_high = SpannIndexConfig {
5031            merge_threshold: Some(150),
5032            ..Default::default()
5033        };
5034        assert!(invalid_merge_threshold_high.validate().is_err());
5035
5036        // Invalid: num_centers_to_merge_to too large (max 8)
5037        let invalid_num_centers = SpannIndexConfig {
5038            num_centers_to_merge_to: Some(10),
5039            ..Default::default()
5040        };
5041        assert!(invalid_num_centers.validate().is_err());
5042
5043        // Invalid: ef_construction too large (max 200)
5044        let invalid_ef_construction = SpannIndexConfig {
5045            ef_construction: Some(300),
5046            ..Default::default()
5047        };
5048        assert!(invalid_ef_construction.validate().is_err());
5049
5050        // Invalid: ef_search too large (max 200)
5051        let invalid_ef_search = SpannIndexConfig {
5052            ef_search: Some(300),
5053            ..Default::default()
5054        };
5055        assert!(invalid_ef_search.validate().is_err());
5056
5057        // Invalid: max_neighbors too large (max 64)
5058        let invalid_max_neighbors = SpannIndexConfig {
5059            max_neighbors: Some(100),
5060            ..Default::default()
5061        };
5062        assert!(invalid_max_neighbors.validate().is_err());
5063
5064        // Invalid: search_nprobe too large (max 128)
5065        let invalid_search_nprobe = SpannIndexConfig {
5066            search_nprobe: Some(200),
5067            ..Default::default()
5068        };
5069        assert!(invalid_search_nprobe.validate().is_err());
5070
5071        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5072        let invalid_search_rng_factor_low = SpannIndexConfig {
5073            search_rng_factor: Some(0.9),
5074            ..Default::default()
5075        };
5076        assert!(invalid_search_rng_factor_low.validate().is_err());
5077
5078        let invalid_search_rng_factor_high = SpannIndexConfig {
5079            search_rng_factor: Some(1.1),
5080            ..Default::default()
5081        };
5082        assert!(invalid_search_rng_factor_high.validate().is_err());
5083
5084        // Valid: search_rng_factor exactly 1.0
5085        let valid_search_rng_factor = SpannIndexConfig {
5086            search_rng_factor: Some(1.0),
5087            ..Default::default()
5088        };
5089        assert!(valid_search_rng_factor.validate().is_ok());
5090
5091        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
5092        let invalid_search_rng_epsilon_low = SpannIndexConfig {
5093            search_rng_epsilon: Some(4.0),
5094            ..Default::default()
5095        };
5096        assert!(invalid_search_rng_epsilon_low.validate().is_err());
5097
5098        let invalid_search_rng_epsilon_high = SpannIndexConfig {
5099            search_rng_epsilon: Some(11.0),
5100            ..Default::default()
5101        };
5102        assert!(invalid_search_rng_epsilon_high.validate().is_err());
5103
5104        // Valid: search_rng_epsilon within range
5105        let valid_search_rng_epsilon = SpannIndexConfig {
5106            search_rng_epsilon: Some(7.5),
5107            ..Default::default()
5108        };
5109        assert!(valid_search_rng_epsilon.validate().is_ok());
5110
5111        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5112        let invalid_write_rng_factor_low = SpannIndexConfig {
5113            write_rng_factor: Some(0.9),
5114            ..Default::default()
5115        };
5116        assert!(invalid_write_rng_factor_low.validate().is_err());
5117
5118        let invalid_write_rng_factor_high = SpannIndexConfig {
5119            write_rng_factor: Some(1.1),
5120            ..Default::default()
5121        };
5122        assert!(invalid_write_rng_factor_high.validate().is_err());
5123
5124        // Valid: write_rng_factor exactly 1.0
5125        let valid_write_rng_factor = SpannIndexConfig {
5126            write_rng_factor: Some(1.0),
5127            ..Default::default()
5128        };
5129        assert!(valid_write_rng_factor.validate().is_ok());
5130
5131        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
5132        let invalid_write_rng_epsilon_low = SpannIndexConfig {
5133            write_rng_epsilon: Some(4.0),
5134            ..Default::default()
5135        };
5136        assert!(invalid_write_rng_epsilon_low.validate().is_err());
5137
5138        let invalid_write_rng_epsilon_high = SpannIndexConfig {
5139            write_rng_epsilon: Some(11.0),
5140            ..Default::default()
5141        };
5142        assert!(invalid_write_rng_epsilon_high.validate().is_err());
5143
5144        // Valid: write_rng_epsilon within range
5145        let valid_write_rng_epsilon = SpannIndexConfig {
5146            write_rng_epsilon: Some(7.5),
5147            ..Default::default()
5148        };
5149        assert!(valid_write_rng_epsilon.validate().is_ok());
5150
5151        // Invalid: num_samples_kmeans too large (max 1000)
5152        let invalid_num_samples_kmeans = SpannIndexConfig {
5153            num_samples_kmeans: Some(1500),
5154            ..Default::default()
5155        };
5156        assert!(invalid_num_samples_kmeans.validate().is_err());
5157
5158        // Valid: num_samples_kmeans within range
5159        let valid_num_samples_kmeans = SpannIndexConfig {
5160            num_samples_kmeans: Some(500),
5161            ..Default::default()
5162        };
5163        assert!(valid_num_samples_kmeans.validate().is_ok());
5164
5165        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
5166        let invalid_initial_lambda_high = SpannIndexConfig {
5167            initial_lambda: Some(150.0),
5168            ..Default::default()
5169        };
5170        assert!(invalid_initial_lambda_high.validate().is_err());
5171
5172        let invalid_initial_lambda_low = SpannIndexConfig {
5173            initial_lambda: Some(50.0),
5174            ..Default::default()
5175        };
5176        assert!(invalid_initial_lambda_low.validate().is_err());
5177
5178        // Valid: initial_lambda exactly 100.0
5179        let valid_initial_lambda = SpannIndexConfig {
5180            initial_lambda: Some(100.0),
5181            ..Default::default()
5182        };
5183        assert!(valid_initial_lambda.validate().is_ok());
5184
5185        // Valid: None values should pass validation
5186        let all_none_config = SpannIndexConfig {
5187            ..Default::default()
5188        };
5189        assert!(all_none_config.validate().is_ok());
5190    }
5191
5192    #[test]
5193    fn test_builder_pattern_crud_workflow() {
5194        // Test comprehensive CRUD workflow using the builder pattern
5195
5196        // CREATE: Build a schema with multiple indexes
5197        let schema = Schema::new_default(KnnIndex::Hnsw)
5198            .create_index(
5199                None,
5200                IndexConfig::Vector(VectorIndexConfig {
5201                    space: Some(Space::Cosine),
5202                    embedding_function: None,
5203                    source_key: None,
5204                    hnsw: Some(HnswIndexConfig {
5205                        ef_construction: Some(200),
5206                        max_neighbors: Some(32),
5207                        ef_search: Some(50),
5208                        num_threads: None,
5209                        batch_size: None,
5210                        sync_threshold: None,
5211                        resize_factor: None,
5212                    }),
5213                    spann: None,
5214                }),
5215            )
5216            .expect("vector config should succeed")
5217            .create_index(
5218                Some("category"),
5219                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5220            )
5221            .expect("string inverted on key should succeed")
5222            .create_index(
5223                Some("year"),
5224                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5225            )
5226            .expect("int inverted on key should succeed")
5227            .create_index(
5228                Some("rating"),
5229                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5230            )
5231            .expect("float inverted on key should succeed")
5232            .create_index(
5233                Some("is_active"),
5234                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5235            )
5236            .expect("bool inverted on key should succeed");
5237
5238        // READ: Verify the schema was built correctly
5239        // Check vector config
5240        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5241        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5242        assert!(embedding.float_list.is_some());
5243        let vector_index = embedding
5244            .float_list
5245            .as_ref()
5246            .unwrap()
5247            .vector_index
5248            .as_ref()
5249            .unwrap();
5250        assert!(vector_index.enabled);
5251        assert_eq!(vector_index.config.space, Some(Space::Cosine));
5252        assert_eq!(
5253            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5254            Some(200)
5255        );
5256
5257        // Check per-key indexes
5258        assert!(schema.keys.contains_key("category"));
5259        assert!(schema.keys.contains_key("year"));
5260        assert!(schema.keys.contains_key("rating"));
5261        assert!(schema.keys.contains_key("is_active"));
5262
5263        // Verify category string inverted index
5264        let category = schema.keys.get("category").unwrap();
5265        assert!(category.string.is_some());
5266        let string_idx = category
5267            .string
5268            .as_ref()
5269            .unwrap()
5270            .string_inverted_index
5271            .as_ref()
5272            .unwrap();
5273        assert!(string_idx.enabled);
5274
5275        // Verify year int inverted index
5276        let year = schema.keys.get("year").unwrap();
5277        assert!(year.int.is_some());
5278        let int_idx = year
5279            .int
5280            .as_ref()
5281            .unwrap()
5282            .int_inverted_index
5283            .as_ref()
5284            .unwrap();
5285        assert!(int_idx.enabled);
5286
5287        // UPDATE/DELETE: Disable some indexes
5288        let schema = schema
5289            .delete_index(
5290                Some("category"),
5291                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5292            )
5293            .expect("delete string inverted should succeed")
5294            .delete_index(
5295                Some("year"),
5296                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5297            )
5298            .expect("delete int inverted should succeed");
5299
5300        // VERIFY DELETE: Check that indexes were disabled
5301        let category = schema.keys.get("category").unwrap();
5302        let string_idx = category
5303            .string
5304            .as_ref()
5305            .unwrap()
5306            .string_inverted_index
5307            .as_ref()
5308            .unwrap();
5309        assert!(!string_idx.enabled); // Should be disabled now
5310
5311        let year = schema.keys.get("year").unwrap();
5312        let int_idx = year
5313            .int
5314            .as_ref()
5315            .unwrap()
5316            .int_inverted_index
5317            .as_ref()
5318            .unwrap();
5319        assert!(!int_idx.enabled); // Should be disabled now
5320
5321        // Verify other indexes still enabled
5322        let rating = schema.keys.get("rating").unwrap();
5323        let float_idx = rating
5324            .float
5325            .as_ref()
5326            .unwrap()
5327            .float_inverted_index
5328            .as_ref()
5329            .unwrap();
5330        assert!(float_idx.enabled); // Should still be enabled
5331
5332        let is_active = schema.keys.get("is_active").unwrap();
5333        let bool_idx = is_active
5334            .boolean
5335            .as_ref()
5336            .unwrap()
5337            .bool_inverted_index
5338            .as_ref()
5339            .unwrap();
5340        assert!(bool_idx.enabled); // Should still be enabled
5341    }
5342
5343    #[test]
5344    fn test_builder_create_index_validation_errors() {
5345        // Test all validation errors for create_index() as documented in the docstring:
5346        // - Attempting to create index on special keys (#document, #embedding)
5347        // - Invalid configuration (e.g., vector index on non-embedding key)
5348        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
5349
5350        // Error: Vector index on specific key (must be global)
5351        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5352            Some("my_vectors"),
5353            IndexConfig::Vector(VectorIndexConfig {
5354                space: Some(Space::L2),
5355                embedding_function: None,
5356                source_key: None,
5357                hnsw: None,
5358                spann: None,
5359            }),
5360        );
5361        assert!(result.is_err());
5362        assert!(matches!(
5363            result.unwrap_err(),
5364            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5365        ));
5366
5367        // Error: FTS index on specific key (must be global)
5368        let result = Schema::new_default(KnnIndex::Hnsw)
5369            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5370        assert!(result.is_err());
5371        assert!(matches!(
5372            result.unwrap_err(),
5373            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5374        ));
5375
5376        // Error: Cannot create index on special key #document
5377        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5378            Some(DOCUMENT_KEY),
5379            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5380        );
5381        assert!(result.is_err());
5382        assert!(matches!(
5383            result.unwrap_err(),
5384            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5385        ));
5386
5387        // Error: Cannot create index on special key #embedding
5388        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5389            Some(EMBEDDING_KEY),
5390            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5391        );
5392        assert!(result.is_err());
5393        assert!(matches!(
5394            result.unwrap_err(),
5395            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5396        ));
5397
5398        // Error: Sparse vector without key (must specify key)
5399        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5400            None,
5401            IndexConfig::SparseVector(SparseVectorIndexConfig {
5402                embedding_function: None,
5403                source_key: None,
5404                bm25: None,
5405            }),
5406        );
5407        assert!(result.is_err());
5408        assert!(matches!(
5409            result.unwrap_err(),
5410            SchemaBuilderError::SparseVectorRequiresKey
5411        ));
5412
5413        // Error: Multiple sparse vector indexes (only one allowed per collection)
5414        let result = Schema::new_default(KnnIndex::Hnsw)
5415            .create_index(
5416                Some("sparse1"),
5417                IndexConfig::SparseVector(SparseVectorIndexConfig {
5418                    embedding_function: None,
5419                    source_key: None,
5420                    bm25: None,
5421                }),
5422            )
5423            .expect("first sparse should succeed")
5424            .create_index(
5425                Some("sparse2"),
5426                IndexConfig::SparseVector(SparseVectorIndexConfig {
5427                    embedding_function: None,
5428                    source_key: None,
5429                    bm25: None,
5430                }),
5431            );
5432        assert!(result.is_err());
5433        assert!(matches!(
5434            result.unwrap_err(),
5435            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5436        ));
5437    }
5438
5439    #[test]
5440    fn test_builder_delete_index_validation_errors() {
5441        // Test all validation errors for delete_index() as documented in the docstring:
5442        // - Attempting to delete index on special keys (#document, #embedding)
5443        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
5444
5445        // Error: Delete on special key #embedding
5446        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5447            Some(EMBEDDING_KEY),
5448            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5449        );
5450        assert!(result.is_err());
5451        assert!(matches!(
5452            result.unwrap_err(),
5453            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5454        ));
5455
5456        // Error: Delete on special key #document
5457        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5458            Some(DOCUMENT_KEY),
5459            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5460        );
5461        assert!(result.is_err());
5462        assert!(matches!(
5463            result.unwrap_err(),
5464            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5465        ));
5466
5467        // Error: Delete vector index (not currently supported)
5468        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5469            None,
5470            IndexConfig::Vector(VectorIndexConfig {
5471                space: None,
5472                embedding_function: None,
5473                source_key: None,
5474                hnsw: None,
5475                spann: None,
5476            }),
5477        );
5478        assert!(result.is_err());
5479        assert!(matches!(
5480            result.unwrap_err(),
5481            SchemaBuilderError::VectorIndexDeletionNotSupported
5482        ));
5483
5484        // Error: Delete FTS index (not currently supported)
5485        let result = Schema::new_default(KnnIndex::Hnsw)
5486            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5487        assert!(result.is_err());
5488        assert!(matches!(
5489            result.unwrap_err(),
5490            SchemaBuilderError::FtsIndexDeletionNotSupported
5491        ));
5492
5493        // Error: Delete sparse vector index (not currently supported)
5494        let result = Schema::new_default(KnnIndex::Hnsw)
5495            .create_index(
5496                Some("sparse"),
5497                IndexConfig::SparseVector(SparseVectorIndexConfig {
5498                    embedding_function: None,
5499                    source_key: None,
5500                    bm25: None,
5501                }),
5502            )
5503            .expect("create should succeed")
5504            .delete_index(
5505                Some("sparse"),
5506                IndexConfig::SparseVector(SparseVectorIndexConfig {
5507                    embedding_function: None,
5508                    source_key: None,
5509                    bm25: None,
5510                }),
5511            );
5512        assert!(result.is_err());
5513        assert!(matches!(
5514            result.unwrap_err(),
5515            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5516        ));
5517    }
5518
5519    #[test]
5520    fn test_builder_pattern_chaining() {
5521        // Test complex chaining scenario
5522        let schema = Schema::new_default(KnnIndex::Hnsw)
5523            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5524            .unwrap()
5525            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5526            .unwrap()
5527            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5528            .unwrap()
5529            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5530            .unwrap()
5531            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5532            .unwrap()
5533            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5534            .unwrap();
5535
5536        // Verify tag1 is enabled
5537        assert!(
5538            schema
5539                .keys
5540                .get("tag1")
5541                .unwrap()
5542                .string
5543                .as_ref()
5544                .unwrap()
5545                .string_inverted_index
5546                .as_ref()
5547                .unwrap()
5548                .enabled
5549        );
5550
5551        // Verify tag2 is disabled
5552        assert!(
5553            !schema
5554                .keys
5555                .get("tag2")
5556                .unwrap()
5557                .string
5558                .as_ref()
5559                .unwrap()
5560                .string_inverted_index
5561                .as_ref()
5562                .unwrap()
5563                .enabled
5564        );
5565
5566        // Verify tag3 is enabled
5567        assert!(
5568            schema
5569                .keys
5570                .get("tag3")
5571                .unwrap()
5572                .string
5573                .as_ref()
5574                .unwrap()
5575                .string_inverted_index
5576                .as_ref()
5577                .unwrap()
5578                .enabled
5579        );
5580
5581        // Verify count is enabled
5582        assert!(
5583            schema
5584                .keys
5585                .get("count")
5586                .unwrap()
5587                .int
5588                .as_ref()
5589                .unwrap()
5590                .int_inverted_index
5591                .as_ref()
5592                .unwrap()
5593                .enabled
5594        );
5595
5596        // Verify score is enabled
5597        assert!(
5598            schema
5599                .keys
5600                .get("score")
5601                .unwrap()
5602                .float
5603                .as_ref()
5604                .unwrap()
5605                .float_inverted_index
5606                .as_ref()
5607                .unwrap()
5608                .enabled
5609        );
5610    }
5611
5612    #[test]
5613    fn test_schema_default_matches_python() {
5614        // Test that Schema::default() matches Python's Schema() behavior exactly
5615        let schema = Schema::default();
5616
5617        // ============================================================================
5618        // VERIFY DEFAULTS (match Python's _initialize_defaults)
5619        // ============================================================================
5620
5621        // String defaults: FTS disabled, string inverted enabled
5622        assert!(schema.defaults.string.is_some());
5623        let string = schema.defaults.string.as_ref().unwrap();
5624        assert!(!string.fts_index.as_ref().unwrap().enabled);
5625        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5626
5627        // Float list defaults: vector index disabled
5628        assert!(schema.defaults.float_list.is_some());
5629        let float_list = schema.defaults.float_list.as_ref().unwrap();
5630        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5631        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5632        assert_eq!(vector_config.space, None); // Python leaves as None
5633        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
5634        assert_eq!(vector_config.spann, None); // Python doesn't specify
5635        assert_eq!(vector_config.source_key, None);
5636
5637        // Sparse vector defaults: disabled
5638        assert!(schema.defaults.sparse_vector.is_some());
5639        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5640        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5641
5642        // Int defaults: inverted index enabled
5643        assert!(schema.defaults.int.is_some());
5644        assert!(
5645            schema
5646                .defaults
5647                .int
5648                .as_ref()
5649                .unwrap()
5650                .int_inverted_index
5651                .as_ref()
5652                .unwrap()
5653                .enabled
5654        );
5655
5656        // Float defaults: inverted index enabled
5657        assert!(schema.defaults.float.is_some());
5658        assert!(
5659            schema
5660                .defaults
5661                .float
5662                .as_ref()
5663                .unwrap()
5664                .float_inverted_index
5665                .as_ref()
5666                .unwrap()
5667                .enabled
5668        );
5669
5670        // Bool defaults: inverted index enabled
5671        assert!(schema.defaults.boolean.is_some());
5672        assert!(
5673            schema
5674                .defaults
5675                .boolean
5676                .as_ref()
5677                .unwrap()
5678                .bool_inverted_index
5679                .as_ref()
5680                .unwrap()
5681                .enabled
5682        );
5683
5684        // ============================================================================
5685        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
5686        // ============================================================================
5687
5688        // #document: FTS enabled, string inverted disabled
5689        assert!(schema.keys.contains_key(DOCUMENT_KEY));
5690        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
5691        assert!(doc.string.is_some());
5692        assert!(
5693            doc.string
5694                .as_ref()
5695                .unwrap()
5696                .fts_index
5697                .as_ref()
5698                .unwrap()
5699                .enabled
5700        );
5701        assert!(
5702            !doc.string
5703                .as_ref()
5704                .unwrap()
5705                .string_inverted_index
5706                .as_ref()
5707                .unwrap()
5708                .enabled
5709        );
5710
5711        // #embedding: vector index enabled with source_key=#document
5712        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5713        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5714        assert!(embedding.float_list.is_some());
5715        let vec_idx = embedding
5716            .float_list
5717            .as_ref()
5718            .unwrap()
5719            .vector_index
5720            .as_ref()
5721            .unwrap();
5722        assert!(vec_idx.enabled);
5723        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
5724        assert_eq!(vec_idx.config.space, None); // Python leaves as None
5725        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
5726        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
5727
5728        // Verify only these two special keys exist
5729        assert_eq!(schema.keys.len(), 2);
5730    }
5731
5732    #[test]
5733    fn test_schema_default_works_with_builder() {
5734        // Test that Schema::default() can be used with builder pattern
5735        let schema = Schema::default()
5736            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
5737            .expect("should succeed");
5738
5739        // Verify the new index was added
5740        assert!(schema.keys.contains_key("category"));
5741        assert!(schema.keys.contains_key(DOCUMENT_KEY));
5742        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5743        assert_eq!(schema.keys.len(), 3);
5744    }
5745
5746    #[cfg(feature = "testing")]
5747    mod proptests {
5748        use super::*;
5749        use crate::strategies::{
5750            embedding_function_strategy, internal_collection_configuration_strategy,
5751            internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
5752            knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
5753        };
5754        use crate::{
5755            HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
5756        };
5757        use proptest::prelude::*;
5758        use proptest::strategy::BoxedStrategy;
5759        use proptest::string::string_regex;
5760        use serde_json::json;
5761
5762        fn default_embedding_function_strategy(
5763        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5764            proptest::option::of(prop_oneof![
5765                Just(EmbeddingFunctionConfiguration::Unknown),
5766                Just(EmbeddingFunctionConfiguration::Known(
5767                    EmbeddingFunctionNewConfiguration {
5768                        name: "default".to_string(),
5769                        config: json!({ "alpha": 1 }),
5770                    }
5771                )),
5772            ])
5773        }
5774
5775        fn sparse_embedding_function_strategy(
5776        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5777            let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
5778                EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
5779                    name,
5780                    config: json!({ "alpha": 1 }),
5781                })
5782            });
5783
5784            proptest::option::of(prop_oneof![
5785                Just(EmbeddingFunctionConfiguration::Unknown),
5786                known_strategy,
5787            ])
5788        }
5789
5790        fn non_default_internal_collection_configuration_strategy(
5791        ) -> impl Strategy<Value = InternalCollectionConfiguration> {
5792            internal_collection_configuration_strategy()
5793                .prop_filter("non-default configuration", |config| !config.is_default())
5794        }
5795
5796        fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
5797            (
5798                proptest::option::of(1usize..=512),
5799                proptest::option::of(1usize..=128),
5800                proptest::option::of(1usize..=512),
5801                proptest::option::of(1usize..=64),
5802                proptest::option::of(2usize..=4096),
5803                proptest::option::of(2usize..=4096),
5804                proptest::option::of(prop_oneof![
5805                    Just(0.5f64),
5806                    Just(1.0f64),
5807                    Just(1.5f64),
5808                    Just(2.0f64)
5809                ]),
5810            )
5811                .prop_map(
5812                    |(
5813                        ef_construction,
5814                        max_neighbors,
5815                        ef_search,
5816                        num_threads,
5817                        batch_size,
5818                        sync_threshold,
5819                        resize_factor,
5820                    )| HnswIndexConfig {
5821                        ef_construction,
5822                        max_neighbors,
5823                        ef_search,
5824                        num_threads,
5825                        batch_size,
5826                        sync_threshold,
5827                        resize_factor,
5828                    },
5829                )
5830        }
5831
5832        fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
5833            let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
5834            (
5835                (
5836                    proptest::option::of(1u32..=128),               // search_nprobe
5837                    proptest::option::of(Just(1.0f32)), // search_rng_factor (must be 1.0)
5838                    proptest::option::of(epsilon_strategy.clone()), // search_rng_epsilon
5839                    proptest::option::of(1u32..=8),     // nreplica_count
5840                    proptest::option::of(Just(1.0f32)), // write_rng_factor (must be 1.0)
5841                    proptest::option::of(epsilon_strategy), // write_rng_epsilon
5842                    proptest::option::of(50u32..=200),  // split_threshold
5843                    proptest::option::of(1usize..=1000), // num_samples_kmeans
5844                ),
5845                (
5846                    proptest::option::of(Just(100.0f32)), // initial_lambda (must be 100.0)
5847                    proptest::option::of(1u32..=64),      // reassign_neighbor_count
5848                    proptest::option::of(25u32..=100),    // merge_threshold
5849                    proptest::option::of(1u32..=8),       // num_centers_to_merge_to
5850                    proptest::option::of(1u32..=64),      // write_nprobe
5851                    proptest::option::of(1usize..=200),   // ef_construction
5852                    proptest::option::of(1usize..=200),   // ef_search
5853                    proptest::option::of(1usize..=64),    // max_neighbors
5854                ),
5855            )
5856                .prop_map(
5857                    |(
5858                        (
5859                            search_nprobe,
5860                            search_rng_factor,
5861                            search_rng_epsilon,
5862                            nreplica_count,
5863                            write_rng_factor,
5864                            write_rng_epsilon,
5865                            split_threshold,
5866                            num_samples_kmeans,
5867                        ),
5868                        (
5869                            initial_lambda,
5870                            reassign_neighbor_count,
5871                            merge_threshold,
5872                            num_centers_to_merge_to,
5873                            write_nprobe,
5874                            ef_construction,
5875                            ef_search,
5876                            max_neighbors,
5877                        ),
5878                    )| SpannIndexConfig {
5879                        search_nprobe,
5880                        search_rng_factor,
5881                        search_rng_epsilon,
5882                        nreplica_count,
5883                        write_rng_factor,
5884                        write_rng_epsilon,
5885                        split_threshold,
5886                        num_samples_kmeans,
5887                        initial_lambda,
5888                        reassign_neighbor_count,
5889                        merge_threshold,
5890                        num_centers_to_merge_to,
5891                        write_nprobe,
5892                        ef_construction,
5893                        ef_search,
5894                        max_neighbors,
5895                    },
5896                )
5897        }
5898
5899        proptest! {
5900            #[test]
5901            fn merge_hnsw_configs_preserves_user_overrides(
5902                base in partial_hnsw_index_config_strategy(),
5903                user in partial_hnsw_index_config_strategy(),
5904            ) {
5905                let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
5906                    .expect("merge should return Some when both are Some");
5907
5908                // Property: user values always take precedence when Some
5909                if user.ef_construction.is_some() {
5910                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
5911                }
5912                if user.max_neighbors.is_some() {
5913                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5914                }
5915                if user.ef_search.is_some() {
5916                    prop_assert_eq!(merged.ef_search, user.ef_search);
5917                }
5918                if user.num_threads.is_some() {
5919                    prop_assert_eq!(merged.num_threads, user.num_threads);
5920                }
5921                if user.batch_size.is_some() {
5922                    prop_assert_eq!(merged.batch_size, user.batch_size);
5923                }
5924                if user.sync_threshold.is_some() {
5925                    prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
5926                }
5927                if user.resize_factor.is_some() {
5928                    prop_assert_eq!(merged.resize_factor, user.resize_factor);
5929                }
5930            }
5931
5932            #[test]
5933            fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
5934                base in partial_hnsw_index_config_strategy(),
5935            ) {
5936                let merged = Schema::merge_hnsw_configs(Some(&base), None)
5937                    .expect("merge should return Some when base is Some");
5938
5939                // Property: when user is None, base values are preserved
5940                prop_assert_eq!(merged, base);
5941            }
5942
5943            #[test]
5944            fn merge_hnsw_configs_returns_user_when_base_is_none(
5945                user in partial_hnsw_index_config_strategy(),
5946            ) {
5947                let merged = Schema::merge_hnsw_configs(None, Some(&user))
5948                    .expect("merge should return Some when user is Some");
5949
5950                // Property: when base is None, user values are preserved
5951                prop_assert_eq!(merged, user);
5952            }
5953
5954            #[test]
5955            fn merge_spann_configs_preserves_user_overrides(
5956                base in partial_spann_index_config_strategy(),
5957                user in partial_spann_index_config_strategy(),
5958            ) {
5959                let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
5960                    .expect("merge should return Some when both are Some");
5961
5962                // Property: user values always take precedence when Some
5963                if user.search_nprobe.is_some() {
5964                    prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
5965                }
5966                if user.search_rng_epsilon.is_some() {
5967                    prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
5968                }
5969                if user.split_threshold.is_some() {
5970                    prop_assert_eq!(merged.split_threshold, user.split_threshold);
5971                }
5972                if user.ef_construction.is_some() {
5973                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
5974                }
5975                if user.ef_search.is_some() {
5976                    prop_assert_eq!(merged.ef_search, user.ef_search);
5977                }
5978                if user.max_neighbors.is_some() {
5979                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5980                }
5981            }
5982
5983            #[test]
5984            fn merge_spann_configs_falls_back_to_base_when_user_is_none(
5985                base in partial_spann_index_config_strategy(),
5986            ) {
5987                let merged = Schema::merge_spann_configs(Some(&base), None)
5988                    .expect("merge should return Some when base is Some");
5989
5990                // Property: when user is None, base values are preserved
5991                prop_assert_eq!(merged, base);
5992            }
5993
5994            #[test]
5995            fn merge_vector_index_config_preserves_user_overrides(
5996                base in vector_index_config_strategy(),
5997                user in vector_index_config_strategy(),
5998                knn in knn_index_strategy(),
5999            ) {
6000                let merged = Schema::merge_vector_index_config(&base, &user, knn);
6001
6002                // Property: user values take precedence for top-level fields
6003                if user.space.is_some() {
6004                    prop_assert_eq!(merged.space, user.space);
6005                }
6006                if user.embedding_function.is_some() {
6007                    prop_assert_eq!(merged.embedding_function, user.embedding_function);
6008                }
6009                if user.source_key.is_some() {
6010                    prop_assert_eq!(merged.source_key, user.source_key);
6011                }
6012
6013                // Property: nested configs are merged according to merge rules
6014                match knn {
6015                    KnnIndex::Hnsw => {
6016                        if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6017                            let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6018                            if user_hnsw.ef_construction.is_some() {
6019                                prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6020                            }
6021                        }
6022                    }
6023                    KnnIndex::Spann => {
6024                        if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6025                            let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6026                            if user_spann.search_nprobe.is_some() {
6027                                prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6028                            }
6029                        }
6030                    }
6031                }
6032            }
6033        }
6034
6035        fn expected_vector_index_config(
6036            config: &InternalCollectionConfiguration,
6037        ) -> VectorIndexConfig {
6038            match &config.vector_index {
6039                VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6040                    space: Some(hnsw_config.space.clone()),
6041                    embedding_function: config.embedding_function.clone(),
6042                    source_key: None,
6043                    hnsw: Some(HnswIndexConfig {
6044                        ef_construction: Some(hnsw_config.ef_construction),
6045                        max_neighbors: Some(hnsw_config.max_neighbors),
6046                        ef_search: Some(hnsw_config.ef_search),
6047                        num_threads: Some(hnsw_config.num_threads),
6048                        batch_size: Some(hnsw_config.batch_size),
6049                        sync_threshold: Some(hnsw_config.sync_threshold),
6050                        resize_factor: Some(hnsw_config.resize_factor),
6051                    }),
6052                    spann: None,
6053                },
6054                VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6055                    space: Some(spann_config.space.clone()),
6056                    embedding_function: config.embedding_function.clone(),
6057                    source_key: None,
6058                    hnsw: None,
6059                    spann: Some(SpannIndexConfig {
6060                        search_nprobe: Some(spann_config.search_nprobe),
6061                        search_rng_factor: Some(spann_config.search_rng_factor),
6062                        search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6063                        nreplica_count: Some(spann_config.nreplica_count),
6064                        write_rng_factor: Some(spann_config.write_rng_factor),
6065                        write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6066                        split_threshold: Some(spann_config.split_threshold),
6067                        num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6068                        initial_lambda: Some(spann_config.initial_lambda),
6069                        reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6070                        merge_threshold: Some(spann_config.merge_threshold),
6071                        num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6072                        write_nprobe: Some(spann_config.write_nprobe),
6073                        ef_construction: Some(spann_config.ef_construction),
6074                        ef_search: Some(spann_config.ef_search),
6075                        max_neighbors: Some(spann_config.max_neighbors),
6076                    }),
6077                },
6078            }
6079        }
6080
6081        fn non_special_key_strategy() -> BoxedStrategy<String> {
6082            string_regex(TEST_NAME_PATTERN)
6083                .unwrap()
6084                .prop_filter("exclude special keys", |key| {
6085                    key != DOCUMENT_KEY && key != EMBEDDING_KEY
6086                })
6087                .boxed()
6088        }
6089
6090        fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6091            proptest::option::of(prop_oneof![
6092                Just(DOCUMENT_KEY.to_string()),
6093                string_regex(TEST_NAME_PATTERN).unwrap(),
6094            ])
6095            .boxed()
6096        }
6097
6098        fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6099            any::<bool>().prop_map(|enabled| FtsIndexType {
6100                enabled,
6101                config: FtsIndexConfig {},
6102            })
6103        }
6104
6105        fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6106            any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6107                enabled,
6108                config: StringInvertedIndexConfig {},
6109            })
6110        }
6111
6112        fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6113            proptest::option::of(
6114                (
6115                    proptest::option::of(string_inverted_index_type_strategy()),
6116                    proptest::option::of(fts_index_type_strategy()),
6117                )
6118                    .prop_map(|(string_inverted_index, fts_index)| {
6119                        StringValueType {
6120                            string_inverted_index,
6121                            fts_index,
6122                        }
6123                    }),
6124            )
6125            .boxed()
6126        }
6127
6128        fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6129            any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6130                enabled,
6131                config: FloatInvertedIndexConfig {},
6132            })
6133        }
6134
6135        fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6136            proptest::option::of(
6137                proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6138                    |float_inverted_index| FloatValueType {
6139                        float_inverted_index,
6140                    },
6141                ),
6142            )
6143            .boxed()
6144        }
6145
6146        fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6147            any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6148                enabled,
6149                config: IntInvertedIndexConfig {},
6150            })
6151        }
6152
6153        fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6154            proptest::option::of(
6155                proptest::option::of(int_inverted_index_type_strategy())
6156                    .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6157            )
6158            .boxed()
6159        }
6160
6161        fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6162            any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6163                enabled,
6164                config: BoolInvertedIndexConfig {},
6165            })
6166        }
6167
6168        fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6169            proptest::option::of(
6170                proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6171                    |bool_inverted_index| BoolValueType {
6172                        bool_inverted_index,
6173                    },
6174                ),
6175            )
6176            .boxed()
6177        }
6178
6179        fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6180            (
6181                sparse_embedding_function_strategy(),
6182                source_key_strategy(),
6183                proptest::option::of(any::<bool>()),
6184            )
6185                .prop_map(|(embedding_function, source_key, bm25)| {
6186                    SparseVectorIndexConfig {
6187                        embedding_function,
6188                        source_key,
6189                        bm25,
6190                    }
6191                })
6192        }
6193
6194        fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6195            proptest::option::of(
6196                (
6197                    any::<bool>(),
6198                    proptest::option::of(sparse_vector_index_config_strategy()),
6199                )
6200                    .prop_map(|(enabled, config)| SparseVectorValueType {
6201                        sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6202                            enabled,
6203                            config: cfg,
6204                        }),
6205                    }),
6206            )
6207            .boxed()
6208        }
6209
6210        fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6211            internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6212                ef_construction: Some(config.ef_construction),
6213                max_neighbors: Some(config.max_neighbors),
6214                ef_search: Some(config.ef_search),
6215                num_threads: Some(config.num_threads),
6216                batch_size: Some(config.batch_size),
6217                sync_threshold: Some(config.sync_threshold),
6218                resize_factor: Some(config.resize_factor),
6219            })
6220        }
6221
6222        fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6223            internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6224                search_nprobe: Some(config.search_nprobe),
6225                search_rng_factor: Some(config.search_rng_factor),
6226                search_rng_epsilon: Some(config.search_rng_epsilon),
6227                nreplica_count: Some(config.nreplica_count),
6228                write_rng_factor: Some(config.write_rng_factor),
6229                write_rng_epsilon: Some(config.write_rng_epsilon),
6230                split_threshold: Some(config.split_threshold),
6231                num_samples_kmeans: Some(config.num_samples_kmeans),
6232                initial_lambda: Some(config.initial_lambda),
6233                reassign_neighbor_count: Some(config.reassign_neighbor_count),
6234                merge_threshold: Some(config.merge_threshold),
6235                num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6236                write_nprobe: Some(config.write_nprobe),
6237                ef_construction: Some(config.ef_construction),
6238                ef_search: Some(config.ef_search),
6239                max_neighbors: Some(config.max_neighbors),
6240            })
6241        }
6242
6243        fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6244            (
6245                proptest::option::of(space_strategy()),
6246                embedding_function_strategy(),
6247                source_key_strategy(),
6248                proptest::option::of(hnsw_index_config_strategy()),
6249                proptest::option::of(spann_index_config_strategy()),
6250            )
6251                .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6252                    VectorIndexConfig {
6253                        space,
6254                        embedding_function,
6255                        source_key,
6256                        hnsw,
6257                        spann,
6258                    }
6259                })
6260        }
6261
6262        fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6263            (any::<bool>(), vector_index_config_strategy())
6264                .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6265        }
6266
6267        fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6268            proptest::option::of(
6269                proptest::option::of(vector_index_type_strategy())
6270                    .prop_map(|vector_index| FloatListValueType { vector_index }),
6271            )
6272            .boxed()
6273        }
6274
6275        fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6276            (
6277                string_value_type_strategy(),
6278                float_list_value_type_strategy(),
6279                sparse_vector_value_type_strategy(),
6280                int_value_type_strategy(),
6281                float_value_type_strategy(),
6282                bool_value_type_strategy(),
6283            )
6284                .prop_map(
6285                    |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6286                        string,
6287                        float_list,
6288                        sparse_vector,
6289                        int,
6290                        float,
6291                        boolean,
6292                    },
6293                )
6294                .boxed()
6295        }
6296
6297        fn schema_strategy() -> BoxedStrategy<Schema> {
6298            (
6299                value_types_strategy(),
6300                proptest::collection::hash_map(
6301                    non_special_key_strategy(),
6302                    value_types_strategy(),
6303                    0..=3,
6304                ),
6305                proptest::option::of(value_types_strategy()),
6306                proptest::option::of(value_types_strategy()),
6307            )
6308                .prop_map(
6309                    |(defaults, mut extra_keys, document_override, embedding_override)| {
6310                        if let Some(doc) = document_override {
6311                            extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6312                        }
6313                        if let Some(embed) = embedding_override {
6314                            extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6315                        }
6316                        Schema {
6317                            defaults,
6318                            keys: extra_keys,
6319                            cmek: None,
6320                            source_attached_function_id: None,
6321                        }
6322                    },
6323                )
6324                .boxed()
6325        }
6326
6327        fn force_non_default_schema(mut schema: Schema) -> Schema {
6328            if schema.is_default() {
6329                if let Some(string_value) = schema
6330                    .defaults
6331                    .string
6332                    .as_mut()
6333                    .and_then(|string_value| string_value.string_inverted_index.as_mut())
6334                {
6335                    string_value.enabled = !string_value.enabled;
6336                } else {
6337                    schema.defaults.string = Some(StringValueType {
6338                        string_inverted_index: Some(StringInvertedIndexType {
6339                            enabled: false,
6340                            config: StringInvertedIndexConfig {},
6341                        }),
6342                        fts_index: None,
6343                    });
6344                }
6345            }
6346            schema
6347        }
6348
6349        fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6350            schema_strategy().prop_map(force_non_default_schema).boxed()
6351        }
6352
6353        fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6354            let defaults = schema
6355                .defaults
6356                .float_list
6357                .as_ref()
6358                .and_then(|fl| fl.vector_index.as_ref())
6359                .map(|vi| vi.config.clone())
6360                .expect("defaults vector index missing");
6361
6362            let embedding = schema
6363                .keys
6364                .get(EMBEDDING_KEY)
6365                .and_then(|value_types| value_types.float_list.as_ref())
6366                .and_then(|fl| fl.vector_index.as_ref())
6367                .map(|vi| vi.config.clone())
6368                .expect("#embedding vector index missing");
6369
6370            (defaults, embedding)
6371        }
6372
6373        proptest! {
6374            #[test]
6375            fn reconcile_schema_and_config_matches_convert_for_config_only(
6376                config in internal_collection_configuration_strategy(),
6377                knn in knn_index_strategy(),
6378            ) {
6379                let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6380                    .expect("reconciliation should succeed");
6381
6382                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6383                let expected_config = expected_vector_index_config(&config);
6384
6385                prop_assert_eq!(defaults_vi, expected_config.clone());
6386
6387                let mut expected_embedding_config = expected_config;
6388                expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6389                prop_assert_eq!(embedding_vi, expected_embedding_config);
6390
6391                prop_assert_eq!(result.keys.len(), 2);
6392            }
6393        }
6394
6395        proptest! {
6396            #[test]
6397            fn reconcile_schema_and_config_errors_when_both_non_default(
6398                config in non_default_internal_collection_configuration_strategy(),
6399                knn in knn_index_strategy(),
6400            ) {
6401                let schema = Schema::try_from(&config)
6402                    .expect("conversion should succeed");
6403                prop_assume!(!schema.is_default());
6404
6405                let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6406
6407                prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6408            }
6409        }
6410
6411        proptest! {
6412            #[test]
6413            fn reconcile_schema_and_config_matches_schema_only_path(
6414                schema in schema_strategy(),
6415                knn in knn_index_strategy(),
6416            ) {
6417                let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6418                    .expect("reconciliation should succeed");
6419
6420                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6421
6422                // Property: schema defaults.float_list vector_index config should be merged into defaults
6423                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6424                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6425                        // Property: schema values take precedence over defaults
6426                        if let Some(schema_space) = &schema_vi.config.space {
6427                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6428                        }
6429                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6430                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6431                        }
6432                        // Test nested config merging properties
6433                        match knn {
6434                            KnnIndex::Hnsw => {
6435                                if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6436                                    if let Some(merged_hnsw) = &defaults_vi.hnsw {
6437                                        if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6438                                            prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6439                                        }
6440                                    }
6441                                }
6442                            }
6443                            KnnIndex::Spann => {
6444                                if let Some(schema_spann) = &schema_vi.config.spann {
6445                                    if let Some(merged_spann) = &defaults_vi.spann {
6446                                        if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6447                                            prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6448                                        }
6449                                    }
6450                                }
6451                            }
6452                        }
6453                    }
6454                }
6455
6456                // Property: schema #embedding float_list vector_index config should be merged into embedding
6457                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6458                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6459                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6460                            if let Some(schema_space) = &embedding_vi_type.config.space {
6461                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6462                            }
6463                        }
6464                    }
6465                }
6466            }
6467        }
6468
6469        proptest! {
6470            #[test]
6471            fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6472                embedding_function in default_embedding_function_strategy(),
6473                knn in knn_index_strategy(),
6474            ) {
6475                let schema = Schema::new_default(knn);
6476                let mut config = match knn {
6477                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6478                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6479                };
6480                config.embedding_function = embedding_function.clone();
6481
6482                let result = Schema::reconcile_schema_and_config(
6483                    Some(&schema),
6484                    Some(&config),
6485                    knn,
6486                )
6487                .expect("reconciliation should succeed");
6488
6489                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6490
6491                // Property: embedding function from config should be applied to both defaults and embedding
6492                if let Some(ef) = embedding_function {
6493                    prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6494                    prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6495                } else {
6496                    // Property: when embedding function is None, it should remain None
6497                    prop_assert_eq!(defaults_vi.embedding_function, None);
6498                    prop_assert_eq!(embedding_vi.embedding_function, None);
6499                }
6500            }
6501        }
6502
6503        proptest! {
6504            #[test]
6505            fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6506                schema in non_default_schema_strategy(),
6507                knn in knn_index_strategy(),
6508            ) {
6509                let default_config = match knn {
6510                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6511                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6512                };
6513
6514                let result = Schema::reconcile_schema_and_config(
6515                    Some(&schema),
6516                    Some(&default_config),
6517                    knn,
6518                )
6519                .expect("reconciliation should succeed");
6520
6521                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6522
6523                // Property: when config is default, schema values should be preserved
6524                // Test that schema defaults.float_list vector_index config is applied
6525                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6526                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6527                        if let Some(schema_space) = &schema_vi.config.space {
6528                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6529                        }
6530                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6531                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6532                        }
6533                    }
6534                }
6535
6536                // Property: schema #embedding float_list vector_index config should be applied
6537                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6538                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6539                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6540                            if let Some(schema_space) = &embedding_vi_type.config.space {
6541                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6542                            }
6543                        }
6544                    }
6545                }
6546            }
6547        }
6548    }
6549}