chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11    EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12    UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18    default_batch_size, default_construction_ef, default_construction_ef_spann,
19    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
20    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
21    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
22    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
23    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
24    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
25    HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
26    InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
27};
28
29impl ChromaError for SchemaError {
30    fn code(&self) -> ErrorCodes {
31        match self {
32            // Internal errors (500)
33            // These indicate system/internal issues during schema operations
34            SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
35            SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
36            // DefaultsMismatch and ConfigurationConflict only occur during schema merge()
37            // which happens internally during compaction, not from user input
38            SchemaError::DefaultsMismatch => ErrorCodes::Internal,
39            SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
40
41            // User/External errors (400)
42            // These indicate user-provided invalid input
43            SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
44            SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
45            SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
46            SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
47            SchemaError::Builder(e) => e.code(),
48        }
49    }
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaError {
54    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
55    MissingIndexConfiguration { key: String, value_type: String },
56    #[error("Schema reconciliation failed: {reason}")]
57    InvalidSchema { reason: String },
58    #[error("Cannot set both collection config and schema simultaneously")]
59    ConfigAndSchemaConflict,
60    #[error("Cannot merge schemas with differing defaults")]
61    DefaultsMismatch,
62    #[error("Conflicting configuration for {context}")]
63    ConfigurationConflict { context: String },
64    #[error("Invalid HNSW configuration: {0}")]
65    InvalidHnswConfig(validator::ValidationErrors),
66    #[error("Invalid SPANN configuration: {0}")]
67    InvalidSpannConfig(validator::ValidationErrors),
68    #[error("Invalid schema input: {reason}")]
69    InvalidUserInput { reason: String },
70    #[error(transparent)]
71    Builder(#[from] SchemaBuilderError),
72}
73
74#[derive(Debug, Error)]
75pub enum SchemaBuilderError {
76    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
77    VectorIndexMustBeGlobal { key: String },
78    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
79    FtsIndexMustBeGlobal { key: String },
80    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
81    SpecialKeyModificationNotAllowed { key: String },
82    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
83    SparseVectorRequiresKey,
84    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
85    MultipleSparseVectorIndexes { existing_key: String },
86    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
87    VectorIndexDeletionNotSupported,
88    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
89    FtsIndexDeletionNotSupported,
90    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91    SparseVectorIndexDeletionNotSupported,
92}
93
94#[derive(Debug, Error)]
95pub enum FilterValidationError {
96    #[error(
97        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
98    )]
99    IndexingDisabled {
100        key: String,
101        value_type: MetadataValueType,
102    },
103    #[error(transparent)]
104    Schema(#[from] SchemaError),
105}
106
107impl ChromaError for SchemaBuilderError {
108    fn code(&self) -> ErrorCodes {
109        ErrorCodes::InvalidArgument
110    }
111}
112
113impl ChromaError for FilterValidationError {
114    fn code(&self) -> ErrorCodes {
115        match self {
116            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
117            FilterValidationError::Schema(_) => ErrorCodes::Internal,
118        }
119    }
120}
121
122// ============================================================================
123// SCHEMA CONSTANTS
124// ============================================================================
125// These constants must match the Python constants in chromadb/api/types.py
126
127// Value type name constants
128pub const STRING_VALUE_NAME: &str = "string";
129pub const INT_VALUE_NAME: &str = "int";
130pub const BOOL_VALUE_NAME: &str = "bool";
131pub const FLOAT_VALUE_NAME: &str = "float";
132pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
133pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
134
135// Index type name constants
136pub const FTS_INDEX_NAME: &str = "fts_index";
137pub const VECTOR_INDEX_NAME: &str = "vector_index";
138pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
139pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
140pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
141pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
142pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
143
144// Special metadata keys - must match Python constants in chromadb/api/types.py
145pub const DOCUMENT_KEY: &str = "#document";
146pub const EMBEDDING_KEY: &str = "#embedding";
147
148// Static regex pattern to validate CMEK for GCP
149static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
150    Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
151        .expect("The CMEK pattern for GCP should be valid")
152});
153
154/// Customer-managed encryption key for storage encryption.
155///
156/// CMEK allows you to use your own encryption keys managed by cloud providers'
157/// key management services (KMS) instead of default provider-managed keys.
158#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Cmek {
161    /// Google Cloud Platform KMS key resource name.
162    ///
163    /// Format: `projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{cryptoKey}`
164    Gcp(Arc<String>),
165}
166
167impl Cmek {
168    /// Create a GCP CMEK from a KMS resource name
169    ///
170    /// # Example
171    /// ```
172    /// use chroma_types::Cmek;
173    /// let cmek = Cmek::gcp(
174    ///     "projects/my-project/locations/us-central1/keyRings/my-ring/cryptoKeys/my-key".to_string()
175    /// );
176    /// ```
177    pub fn gcp(resource: String) -> Self {
178        Cmek::Gcp(Arc::new(resource))
179    }
180
181    /// Validates that the CMEK resource name matches the expected pattern.
182    ///
183    /// Returns `true` if the resource name is well-formed according to the
184    /// provider's format requirements. Does not verify that the key exists
185    /// or is accessible.
186    pub fn validate_pattern(&self) -> bool {
187        match self {
188            Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
189        }
190    }
191}
192
193impl TryFrom<chroma_proto::Cmek> for Cmek {
194    type Error = ConversionError;
195
196    fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
197        match proto.provider {
198            Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
199            None => Err(ConversionError::DecodeError),
200        }
201    }
202}
203
204impl From<Cmek> for chroma_proto::Cmek {
205    fn from(cmek: Cmek) -> Self {
206        match cmek {
207            Cmek::Gcp(resource) => chroma_proto::Cmek {
208                provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
209            },
210        }
211    }
212}
213
214// ============================================================================
215// SCHEMA STRUCTURES
216// ============================================================================
217
218/// Schema representation for collection index configurations
219///
220/// This represents the server-side schema structure used for index management
221
222#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
223#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
224pub struct Schema {
225    /// Default index configurations for each value type
226    pub defaults: ValueTypes,
227    /// Key-specific index overrides
228    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
229    #[serde(rename = "keys", alias = "key_overrides")]
230    pub keys: HashMap<String, ValueTypes>,
231    /// Customer-managed encryption key for collection data
232    #[serde(skip_serializing_if = "Option::is_none")]
233    #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
234    pub cmek: Option<Cmek>,
235}
236
237impl Schema {
238    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
239        if let Some(vector_update) = &configuration.vector_index {
240            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
241                Self::apply_vector_index_update(default_vector_index, vector_update);
242            }
243            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
244                Self::apply_vector_index_update(embedding_vector_index, vector_update);
245            }
246        }
247
248        if let Some(embedding_function) = configuration.embedding_function.as_ref() {
249            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
250                default_vector_index.config.embedding_function = Some(embedding_function.clone());
251            }
252            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
253                embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
254            }
255        }
256    }
257
258    fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
259        self.defaults
260            .float_list
261            .as_mut()
262            .and_then(|float_list| float_list.vector_index.as_mut())
263    }
264
265    fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
266        self.keys
267            .get_mut(EMBEDDING_KEY)
268            .and_then(|value_types| value_types.float_list.as_mut())
269            .and_then(|float_list| float_list.vector_index.as_mut())
270    }
271
272    fn apply_vector_index_update(
273        vector_index: &mut VectorIndexType,
274        update: &UpdateVectorIndexConfiguration,
275    ) {
276        match update {
277            UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
278                if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
279                    if let Some(ef_search) = hnsw_update.ef_search {
280                        hnsw_config.ef_search = Some(ef_search);
281                    }
282                    if let Some(max_neighbors) = hnsw_update.max_neighbors {
283                        hnsw_config.max_neighbors = Some(max_neighbors);
284                    }
285                    if let Some(num_threads) = hnsw_update.num_threads {
286                        hnsw_config.num_threads = Some(num_threads);
287                    }
288                    if let Some(resize_factor) = hnsw_update.resize_factor {
289                        hnsw_config.resize_factor = Some(resize_factor);
290                    }
291                    if let Some(sync_threshold) = hnsw_update.sync_threshold {
292                        hnsw_config.sync_threshold = Some(sync_threshold);
293                    }
294                    if let Some(batch_size) = hnsw_update.batch_size {
295                        hnsw_config.batch_size = Some(batch_size);
296                    }
297                }
298            }
299            UpdateVectorIndexConfiguration::Hnsw(None) => {}
300            UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
301                if let Some(spann_config) = vector_index.config.spann.as_mut() {
302                    if let Some(search_nprobe) = spann_update.search_nprobe {
303                        spann_config.search_nprobe = Some(search_nprobe);
304                    }
305                    if let Some(ef_search) = spann_update.ef_search {
306                        spann_config.ef_search = Some(ef_search);
307                    }
308                }
309            }
310            UpdateVectorIndexConfiguration::Spann(None) => {}
311        }
312    }
313
314    pub fn is_sparse_index_enabled(&self) -> bool {
315        let defaults_enabled = self
316            .defaults
317            .sparse_vector
318            .as_ref()
319            .and_then(|sv| sv.sparse_vector_index.as_ref())
320            .is_some_and(|idx| idx.enabled);
321        let key_enabled = self.keys.values().any(|value_types| {
322            value_types
323                .sparse_vector
324                .as_ref()
325                .and_then(|sv| sv.sparse_vector_index.as_ref())
326                .is_some_and(|idx| idx.enabled)
327        });
328        defaults_enabled || key_enabled
329    }
330}
331
332impl Default for Schema {
333    /// Create a default Schema that matches Python's behavior exactly.
334    ///
335    /// Python creates a Schema with:
336    /// - All inverted indexes enabled by default (string, int, float, bool)
337    /// - Vector and FTS indexes disabled in defaults
338    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
339    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
340    ///
341    /// # Examples
342    /// ```
343    /// use chroma_types::Schema;
344    ///
345    /// let schema = Schema::default();
346    /// assert!(schema.keys.contains_key("#document"));
347    /// assert!(schema.keys.contains_key("#embedding"));
348    /// ```
349    fn default() -> Self {
350        // Initialize defaults - match Python's _initialize_defaults()
351        let defaults = ValueTypes {
352            string: Some(StringValueType {
353                fts_index: Some(FtsIndexType {
354                    enabled: false,
355                    config: FtsIndexConfig {},
356                }),
357                string_inverted_index: Some(StringInvertedIndexType {
358                    enabled: true,
359                    config: StringInvertedIndexConfig {},
360                }),
361            }),
362            float_list: Some(FloatListValueType {
363                vector_index: Some(VectorIndexType {
364                    enabled: false,
365                    config: VectorIndexConfig {
366                        space: None, // Python leaves as None (resolved on serialization)
367                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
368                        source_key: None,
369                        hnsw: None,  // Python doesn't specify
370                        spann: None, // Python doesn't specify
371                    },
372                }),
373            }),
374            sparse_vector: Some(SparseVectorValueType {
375                sparse_vector_index: Some(SparseVectorIndexType {
376                    enabled: false,
377                    config: SparseVectorIndexConfig {
378                        embedding_function: None,
379                        source_key: None,
380                        bm25: None,
381                    },
382                }),
383            }),
384            int: Some(IntValueType {
385                int_inverted_index: Some(IntInvertedIndexType {
386                    enabled: true,
387                    config: IntInvertedIndexConfig {},
388                }),
389            }),
390            float: Some(FloatValueType {
391                float_inverted_index: Some(FloatInvertedIndexType {
392                    enabled: true,
393                    config: FloatInvertedIndexConfig {},
394                }),
395            }),
396            boolean: Some(BoolValueType {
397                bool_inverted_index: Some(BoolInvertedIndexType {
398                    enabled: true,
399                    config: BoolInvertedIndexConfig {},
400                }),
401            }),
402        };
403
404        // Initialize key-specific overrides - match Python's _initialize_keys()
405        let mut keys = HashMap::new();
406
407        // #document: FTS enabled, string inverted disabled
408        keys.insert(
409            DOCUMENT_KEY.to_string(),
410            ValueTypes {
411                string: Some(StringValueType {
412                    fts_index: Some(FtsIndexType {
413                        enabled: true,
414                        config: FtsIndexConfig {},
415                    }),
416                    string_inverted_index: Some(StringInvertedIndexType {
417                        enabled: false,
418                        config: StringInvertedIndexConfig {},
419                    }),
420                }),
421                ..Default::default()
422            },
423        );
424
425        // #embedding: Vector index enabled with source_key=#document
426        keys.insert(
427            EMBEDDING_KEY.to_string(),
428            ValueTypes {
429                float_list: Some(FloatListValueType {
430                    vector_index: Some(VectorIndexType {
431                        enabled: true,
432                        config: VectorIndexConfig {
433                            space: None, // Python leaves as None (resolved on serialization)
434                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
435                            source_key: Some(DOCUMENT_KEY.to_string()),
436                            hnsw: None,  // Python doesn't specify
437                            spann: None, // Python doesn't specify
438                        },
439                    }),
440                }),
441                ..Default::default()
442            },
443        );
444
445        Schema {
446            defaults,
447            keys,
448            cmek: None,
449        }
450    }
451}
452
453pub fn is_embedding_function_default(
454    embedding_function: &Option<EmbeddingFunctionConfiguration>,
455) -> bool {
456    match embedding_function {
457        None => true,
458        Some(embedding_function) => embedding_function.is_default(),
459    }
460}
461
462/// Check if space is default (None means default, or if present, should be default space)
463pub fn is_space_default(space: &Option<Space>) -> bool {
464    match space {
465        None => true,                     // None means default
466        Some(s) => *s == default_space(), // If present, check if it's the default space
467    }
468}
469
470/// Check if HNSW config is default
471pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
472    hnsw_config.ef_construction == Some(default_construction_ef())
473        && hnsw_config.ef_search == Some(default_search_ef())
474        && hnsw_config.max_neighbors == Some(default_m())
475        && hnsw_config.num_threads == Some(default_num_threads())
476        && hnsw_config.batch_size == Some(default_batch_size())
477        && hnsw_config.sync_threshold == Some(default_sync_threshold())
478        && hnsw_config.resize_factor == Some(default_resize_factor())
479}
480
481// ============================================================================
482// NEW STRONGLY-TYPED SCHEMA STRUCTURES
483// ============================================================================
484
485/// Strongly-typed value type configurations
486/// Contains optional configurations for each supported value type
487#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
488#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
489pub struct ValueTypes {
490    #[serde(
491        rename = "string",
492        alias = "#string",
493        skip_serializing_if = "Option::is_none"
494    )] // STRING_VALUE_NAME
495    pub string: Option<StringValueType>,
496
497    #[serde(
498        rename = "float_list",
499        alias = "#float_list",
500        skip_serializing_if = "Option::is_none"
501    )]
502    // FLOAT_LIST_VALUE_NAME
503    pub float_list: Option<FloatListValueType>,
504
505    #[serde(
506        rename = "sparse_vector",
507        alias = "#sparse_vector",
508        skip_serializing_if = "Option::is_none"
509    )]
510    // SPARSE_VECTOR_VALUE_NAME
511    pub sparse_vector: Option<SparseVectorValueType>,
512
513    #[serde(
514        rename = "int",
515        alias = "#int",
516        skip_serializing_if = "Option::is_none"
517    )] // INT_VALUE_NAME
518    pub int: Option<IntValueType>,
519
520    #[serde(
521        rename = "float",
522        alias = "#float",
523        skip_serializing_if = "Option::is_none"
524    )] // FLOAT_VALUE_NAME
525    pub float: Option<FloatValueType>,
526
527    #[serde(
528        rename = "bool",
529        alias = "#bool",
530        skip_serializing_if = "Option::is_none"
531    )] // BOOL_VALUE_NAME
532    pub boolean: Option<BoolValueType>,
533}
534
535/// String value type index configurations
536#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
537#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
538pub struct StringValueType {
539    #[serde(
540        rename = "fts_index",
541        alias = "$fts_index",
542        skip_serializing_if = "Option::is_none"
543    )] // FTS_INDEX_NAME
544    pub fts_index: Option<FtsIndexType>,
545
546    #[serde(
547        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
548        alias = "$string_inverted_index",
549        skip_serializing_if = "Option::is_none"
550    )]
551    pub string_inverted_index: Option<StringInvertedIndexType>,
552}
553
554/// Float list value type index configurations (for vectors)
555#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
556#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
557pub struct FloatListValueType {
558    #[serde(
559        rename = "vector_index",
560        alias = "$vector_index",
561        skip_serializing_if = "Option::is_none"
562    )] // VECTOR_INDEX_NAME
563    pub vector_index: Option<VectorIndexType>,
564}
565
566/// Sparse vector value type index configurations
567#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
568#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
569pub struct SparseVectorValueType {
570    #[serde(
571        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
572        alias = "$sparse_vector_index",
573        skip_serializing_if = "Option::is_none"
574    )]
575    pub sparse_vector_index: Option<SparseVectorIndexType>,
576}
577
578/// Integer value type index configurations
579#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
580#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
581pub struct IntValueType {
582    #[serde(
583        rename = "int_inverted_index",
584        alias = "$int_inverted_index",
585        skip_serializing_if = "Option::is_none"
586    )]
587    // INT_INVERTED_INDEX_NAME
588    pub int_inverted_index: Option<IntInvertedIndexType>,
589}
590
591/// Float value type index configurations
592#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
593#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
594pub struct FloatValueType {
595    #[serde(
596        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
597        alias = "$float_inverted_index",
598        skip_serializing_if = "Option::is_none"
599    )]
600    pub float_inverted_index: Option<FloatInvertedIndexType>,
601}
602
603/// Boolean value type index configurations
604#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
605#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
606pub struct BoolValueType {
607    #[serde(
608        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
609        alias = "$bool_inverted_index",
610        skip_serializing_if = "Option::is_none"
611    )]
612    pub bool_inverted_index: Option<BoolInvertedIndexType>,
613}
614
615// Individual index type structs with enabled status and config
616#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
617#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
618pub struct FtsIndexType {
619    pub enabled: bool,
620    pub config: FtsIndexConfig,
621}
622
623#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
624#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
625pub struct VectorIndexType {
626    pub enabled: bool,
627    pub config: VectorIndexConfig,
628}
629
630#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
631#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
632pub struct SparseVectorIndexType {
633    pub enabled: bool,
634    pub config: SparseVectorIndexConfig,
635}
636
637#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
638#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
639pub struct StringInvertedIndexType {
640    pub enabled: bool,
641    pub config: StringInvertedIndexConfig,
642}
643
644#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
645#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
646pub struct IntInvertedIndexType {
647    pub enabled: bool,
648    pub config: IntInvertedIndexConfig,
649}
650
651#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
652#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
653pub struct FloatInvertedIndexType {
654    pub enabled: bool,
655    pub config: FloatInvertedIndexConfig,
656}
657
658#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
659#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
660pub struct BoolInvertedIndexType {
661    pub enabled: bool,
662    pub config: BoolInvertedIndexConfig,
663}
664
665impl Schema {
666    /// Create a new Schema with strongly-typed default configurations
667    pub fn new_default(default_knn_index: KnnIndex) -> Self {
668        // Vector index disabled on all keys except #embedding.
669        let vector_config = VectorIndexType {
670            enabled: false,
671            config: VectorIndexConfig {
672                space: Some(default_space()),
673                embedding_function: None,
674                source_key: None,
675                hnsw: match default_knn_index {
676                    KnnIndex::Hnsw => Some(HnswIndexConfig {
677                        ef_construction: Some(default_construction_ef()),
678                        max_neighbors: Some(default_m()),
679                        ef_search: Some(default_search_ef()),
680                        num_threads: Some(default_num_threads()),
681                        batch_size: Some(default_batch_size()),
682                        sync_threshold: Some(default_sync_threshold()),
683                        resize_factor: Some(default_resize_factor()),
684                    }),
685                    KnnIndex::Spann => None,
686                },
687                spann: match default_knn_index {
688                    KnnIndex::Hnsw => None,
689                    KnnIndex::Spann => Some(SpannIndexConfig {
690                        search_nprobe: Some(default_search_nprobe()),
691                        search_rng_factor: Some(default_search_rng_factor()),
692                        search_rng_epsilon: Some(default_search_rng_epsilon()),
693                        nreplica_count: Some(default_nreplica_count()),
694                        write_rng_factor: Some(default_write_rng_factor()),
695                        write_rng_epsilon: Some(default_write_rng_epsilon()),
696                        split_threshold: Some(default_split_threshold()),
697                        num_samples_kmeans: Some(default_num_samples_kmeans()),
698                        initial_lambda: Some(default_initial_lambda()),
699                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
700                        merge_threshold: Some(default_merge_threshold()),
701                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
702                        write_nprobe: Some(default_write_nprobe()),
703                        ef_construction: Some(default_construction_ef_spann()),
704                        ef_search: Some(default_search_ef_spann()),
705                        max_neighbors: Some(default_m_spann()),
706                    }),
707                },
708            },
709        };
710
711        // Initialize defaults struct directly instead of using Default::default() + field assignments
712        let defaults = ValueTypes {
713            string: Some(StringValueType {
714                string_inverted_index: Some(StringInvertedIndexType {
715                    enabled: true,
716                    config: StringInvertedIndexConfig {},
717                }),
718                fts_index: Some(FtsIndexType {
719                    enabled: false,
720                    config: FtsIndexConfig {},
721                }),
722            }),
723            float: Some(FloatValueType {
724                float_inverted_index: Some(FloatInvertedIndexType {
725                    enabled: true,
726                    config: FloatInvertedIndexConfig {},
727                }),
728            }),
729            int: Some(IntValueType {
730                int_inverted_index: Some(IntInvertedIndexType {
731                    enabled: true,
732                    config: IntInvertedIndexConfig {},
733                }),
734            }),
735            boolean: Some(BoolValueType {
736                bool_inverted_index: Some(BoolInvertedIndexType {
737                    enabled: true,
738                    config: BoolInvertedIndexConfig {},
739                }),
740            }),
741            float_list: Some(FloatListValueType {
742                vector_index: Some(vector_config),
743            }),
744            sparse_vector: Some(SparseVectorValueType {
745                sparse_vector_index: Some(SparseVectorIndexType {
746                    enabled: false,
747                    config: SparseVectorIndexConfig {
748                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
749                        source_key: None,
750                        bm25: Some(false),
751                    },
752                }),
753            }),
754        };
755
756        // Set up key overrides
757        let mut keys = HashMap::new();
758
759        // Enable vector index for #embedding.
760        let embedding_defaults = ValueTypes {
761            float_list: Some(FloatListValueType {
762                vector_index: Some(VectorIndexType {
763                    enabled: true,
764                    config: VectorIndexConfig {
765                        space: Some(default_space()),
766                        embedding_function: None,
767                        source_key: Some(DOCUMENT_KEY.to_string()),
768                        hnsw: match default_knn_index {
769                            KnnIndex::Hnsw => Some(HnswIndexConfig {
770                                ef_construction: Some(default_construction_ef()),
771                                max_neighbors: Some(default_m()),
772                                ef_search: Some(default_search_ef()),
773                                num_threads: Some(default_num_threads()),
774                                batch_size: Some(default_batch_size()),
775                                sync_threshold: Some(default_sync_threshold()),
776                                resize_factor: Some(default_resize_factor()),
777                            }),
778                            KnnIndex::Spann => None,
779                        },
780                        spann: match default_knn_index {
781                            KnnIndex::Hnsw => None,
782                            KnnIndex::Spann => Some(SpannIndexConfig {
783                                search_nprobe: Some(default_search_nprobe()),
784                                search_rng_factor: Some(default_search_rng_factor()),
785                                search_rng_epsilon: Some(default_search_rng_epsilon()),
786                                nreplica_count: Some(default_nreplica_count()),
787                                write_rng_factor: Some(default_write_rng_factor()),
788                                write_rng_epsilon: Some(default_write_rng_epsilon()),
789                                split_threshold: Some(default_split_threshold()),
790                                num_samples_kmeans: Some(default_num_samples_kmeans()),
791                                initial_lambda: Some(default_initial_lambda()),
792                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
793                                merge_threshold: Some(default_merge_threshold()),
794                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
795                                write_nprobe: Some(default_write_nprobe()),
796                                ef_construction: Some(default_construction_ef_spann()),
797                                ef_search: Some(default_search_ef_spann()),
798                                max_neighbors: Some(default_m_spann()),
799                            }),
800                        },
801                    },
802                }),
803            }),
804            ..Default::default()
805        };
806        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
807
808        // Document defaults - initialize directly instead of Default::default() + field assignment
809        let document_defaults = ValueTypes {
810            string: Some(StringValueType {
811                fts_index: Some(FtsIndexType {
812                    enabled: true,
813                    config: FtsIndexConfig {},
814                }),
815                string_inverted_index: Some(StringInvertedIndexType {
816                    enabled: false,
817                    config: StringInvertedIndexConfig {},
818                }),
819            }),
820            ..Default::default()
821        };
822        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
823
824        Schema {
825            defaults,
826            keys,
827            cmek: None,
828        }
829    }
830
831    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
832        let to_internal = |vector_index: &VectorIndexType| {
833            let space = vector_index.config.space.clone();
834            vector_index
835                .config
836                .spann
837                .clone()
838                .map(|config| (space.as_ref(), &config).into())
839        };
840
841        self.keys
842            .get(EMBEDDING_KEY)
843            .and_then(|value_types| value_types.float_list.as_ref())
844            .and_then(|float_list| float_list.vector_index.as_ref())
845            .and_then(to_internal)
846            .or_else(|| {
847                self.defaults
848                    .float_list
849                    .as_ref()
850                    .and_then(|float_list| float_list.vector_index.as_ref())
851                    .and_then(to_internal)
852            })
853    }
854
855    pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
856        let to_internal = |vector_index: &VectorIndexType| {
857            if vector_index.config.spann.is_some() {
858                return None;
859            }
860            let space = vector_index.config.space.as_ref();
861            let hnsw_config = vector_index.config.hnsw.as_ref();
862            Some((space, hnsw_config).into())
863        };
864
865        self.keys
866            .get(EMBEDDING_KEY)
867            .and_then(|value_types| value_types.float_list.as_ref())
868            .and_then(|float_list| float_list.vector_index.as_ref())
869            .and_then(to_internal)
870            .or_else(|| {
871                self.defaults
872                    .float_list
873                    .as_ref()
874                    .and_then(|float_list| float_list.vector_index.as_ref())
875                    .and_then(to_internal)
876            })
877    }
878
879    pub fn get_internal_hnsw_config_with_legacy_fallback(
880        &self,
881        segment: &Segment,
882    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
883        if let Some(config) = self.get_internal_hnsw_config() {
884            let config_from_metadata =
885                InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
886
887            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
888                return Ok(Some(config_from_metadata));
889            }
890
891            return Ok(Some(config));
892        }
893
894        Ok(None)
895    }
896
897    /// Reconcile user-provided schema with system defaults
898    ///
899    /// This method merges user configurations with system defaults, ensuring that:
900    /// - User overrides take precedence over defaults
901    /// - Missing user configurations fall back to system defaults
902    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
903    pub fn reconcile_with_defaults(
904        user_schema: Option<&Schema>,
905        knn_index: KnnIndex,
906    ) -> Result<Self, SchemaError> {
907        let default_schema = Schema::new_default(knn_index);
908
909        match user_schema {
910            Some(user) => {
911                // Merge defaults with user overrides
912                let merged_defaults =
913                    Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
914
915                // Merge key overrides
916                let mut merged_keys = default_schema.keys.clone();
917                for (key, user_value_types) in &user.keys {
918                    if let Some(default_value_types) = merged_keys.get(key) {
919                        // Merge with existing default key override
920                        let merged_value_types = Self::merge_value_types(
921                            default_value_types,
922                            user_value_types,
923                            knn_index,
924                        )?;
925                        merged_keys.insert(key.clone(), merged_value_types);
926                    } else {
927                        // New key override from user
928                        merged_keys.insert(key.clone(), user_value_types.clone());
929                    }
930                }
931
932                Ok(Schema {
933                    defaults: merged_defaults,
934                    keys: merged_keys,
935                    cmek: user.cmek.clone().or(default_schema.cmek.clone()),
936                })
937            }
938            None => Ok(default_schema),
939        }
940    }
941
942    /// Merge two schemas together, combining key overrides when possible.
943    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
944        if self.defaults != other.defaults {
945            return Err(SchemaError::DefaultsMismatch);
946        }
947
948        let mut keys = self.keys.clone();
949
950        for (key, other_value_types) in &other.keys {
951            if let Some(existing) = keys.get(key).cloned() {
952                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
953                keys.insert(key.clone(), merged);
954            } else {
955                keys.insert(key.clone(), other_value_types.clone());
956            }
957        }
958
959        Ok(Schema {
960            defaults: self.defaults.clone(),
961            keys,
962            cmek: other.cmek.clone().or(self.cmek.clone()),
963        })
964    }
965
966    fn merge_override_value_types(
967        key: &str,
968        left: &ValueTypes,
969        right: &ValueTypes,
970    ) -> Result<ValueTypes, SchemaError> {
971        Ok(ValueTypes {
972            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
973            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
974            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
975            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
976            float_list: Self::merge_float_list_override(
977                key,
978                left.float_list.as_ref(),
979                right.float_list.as_ref(),
980            )?,
981            sparse_vector: Self::merge_sparse_vector_override(
982                key,
983                left.sparse_vector.as_ref(),
984                right.sparse_vector.as_ref(),
985            )?,
986        })
987    }
988
989    fn merge_string_override(
990        key: &str,
991        left: Option<&StringValueType>,
992        right: Option<&StringValueType>,
993    ) -> Result<Option<StringValueType>, SchemaError> {
994        match (left, right) {
995            (Some(l), Some(r)) => Ok(Some(StringValueType {
996                string_inverted_index: Self::merge_index_or_error(
997                    l.string_inverted_index.as_ref(),
998                    r.string_inverted_index.as_ref(),
999                    &format!("key '{key}' string.string_inverted_index"),
1000                )?,
1001                fts_index: Self::merge_index_or_error(
1002                    l.fts_index.as_ref(),
1003                    r.fts_index.as_ref(),
1004                    &format!("key '{key}' string.fts_index"),
1005                )?,
1006            })),
1007            (Some(l), None) => Ok(Some(l.clone())),
1008            (None, Some(r)) => Ok(Some(r.clone())),
1009            (None, None) => Ok(None),
1010        }
1011    }
1012
1013    fn merge_float_override(
1014        key: &str,
1015        left: Option<&FloatValueType>,
1016        right: Option<&FloatValueType>,
1017    ) -> Result<Option<FloatValueType>, SchemaError> {
1018        match (left, right) {
1019            (Some(l), Some(r)) => Ok(Some(FloatValueType {
1020                float_inverted_index: Self::merge_index_or_error(
1021                    l.float_inverted_index.as_ref(),
1022                    r.float_inverted_index.as_ref(),
1023                    &format!("key '{key}' float.float_inverted_index"),
1024                )?,
1025            })),
1026            (Some(l), None) => Ok(Some(l.clone())),
1027            (None, Some(r)) => Ok(Some(r.clone())),
1028            (None, None) => Ok(None),
1029        }
1030    }
1031
1032    fn merge_int_override(
1033        key: &str,
1034        left: Option<&IntValueType>,
1035        right: Option<&IntValueType>,
1036    ) -> Result<Option<IntValueType>, SchemaError> {
1037        match (left, right) {
1038            (Some(l), Some(r)) => Ok(Some(IntValueType {
1039                int_inverted_index: Self::merge_index_or_error(
1040                    l.int_inverted_index.as_ref(),
1041                    r.int_inverted_index.as_ref(),
1042                    &format!("key '{key}' int.int_inverted_index"),
1043                )?,
1044            })),
1045            (Some(l), None) => Ok(Some(l.clone())),
1046            (None, Some(r)) => Ok(Some(r.clone())),
1047            (None, None) => Ok(None),
1048        }
1049    }
1050
1051    fn merge_bool_override(
1052        key: &str,
1053        left: Option<&BoolValueType>,
1054        right: Option<&BoolValueType>,
1055    ) -> Result<Option<BoolValueType>, SchemaError> {
1056        match (left, right) {
1057            (Some(l), Some(r)) => Ok(Some(BoolValueType {
1058                bool_inverted_index: Self::merge_index_or_error(
1059                    l.bool_inverted_index.as_ref(),
1060                    r.bool_inverted_index.as_ref(),
1061                    &format!("key '{key}' bool.bool_inverted_index"),
1062                )?,
1063            })),
1064            (Some(l), None) => Ok(Some(l.clone())),
1065            (None, Some(r)) => Ok(Some(r.clone())),
1066            (None, None) => Ok(None),
1067        }
1068    }
1069
1070    fn merge_float_list_override(
1071        key: &str,
1072        left: Option<&FloatListValueType>,
1073        right: Option<&FloatListValueType>,
1074    ) -> Result<Option<FloatListValueType>, SchemaError> {
1075        match (left, right) {
1076            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1077                vector_index: Self::merge_index_or_error(
1078                    l.vector_index.as_ref(),
1079                    r.vector_index.as_ref(),
1080                    &format!("key '{key}' float_list.vector_index"),
1081                )?,
1082            })),
1083            (Some(l), None) => Ok(Some(l.clone())),
1084            (None, Some(r)) => Ok(Some(r.clone())),
1085            (None, None) => Ok(None),
1086        }
1087    }
1088
1089    fn merge_sparse_vector_override(
1090        key: &str,
1091        left: Option<&SparseVectorValueType>,
1092        right: Option<&SparseVectorValueType>,
1093    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1094        match (left, right) {
1095            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1096                sparse_vector_index: Self::merge_index_or_error(
1097                    l.sparse_vector_index.as_ref(),
1098                    r.sparse_vector_index.as_ref(),
1099                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
1100                )?,
1101            })),
1102            (Some(l), None) => Ok(Some(l.clone())),
1103            (None, Some(r)) => Ok(Some(r.clone())),
1104            (None, None) => Ok(None),
1105        }
1106    }
1107
1108    fn merge_index_or_error<T: Clone + PartialEq>(
1109        left: Option<&T>,
1110        right: Option<&T>,
1111        context: &str,
1112    ) -> Result<Option<T>, SchemaError> {
1113        match (left, right) {
1114            (Some(l), Some(r)) => {
1115                if l == r {
1116                    Ok(Some(l.clone()))
1117                } else {
1118                    Err(SchemaError::ConfigurationConflict {
1119                        context: context.to_string(),
1120                    })
1121                }
1122            }
1123            (Some(l), None) => Ok(Some(l.clone())),
1124            (None, Some(r)) => Ok(Some(r.clone())),
1125            (None, None) => Ok(None),
1126        }
1127    }
1128
1129    /// Merge two ValueTypes with field-level merging
1130    /// User values take precedence over default values
1131    fn merge_value_types(
1132        default: &ValueTypes,
1133        user: &ValueTypes,
1134        knn_index: KnnIndex,
1135    ) -> Result<ValueTypes, SchemaError> {
1136        // Merge float_list first
1137        let float_list = Self::merge_float_list_type(
1138            default.float_list.as_ref(),
1139            user.float_list.as_ref(),
1140            knn_index,
1141        );
1142
1143        // Validate the merged float_list (covers all merge cases)
1144        if let Some(ref fl) = float_list {
1145            Self::validate_float_list_value_type(fl)?;
1146        }
1147
1148        Ok(ValueTypes {
1149            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1150            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1151            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1152            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1153            float_list,
1154            sparse_vector: Self::merge_sparse_vector_type(
1155                default.sparse_vector.as_ref(),
1156                user.sparse_vector.as_ref(),
1157            )?,
1158        })
1159    }
1160
1161    /// Merge StringValueType configurations
1162    fn merge_string_type(
1163        default: Option<&StringValueType>,
1164        user: Option<&StringValueType>,
1165    ) -> Result<Option<StringValueType>, SchemaError> {
1166        match (default, user) {
1167            (Some(default), Some(user)) => Ok(Some(StringValueType {
1168                string_inverted_index: Self::merge_string_inverted_index_type(
1169                    default.string_inverted_index.as_ref(),
1170                    user.string_inverted_index.as_ref(),
1171                )?,
1172                fts_index: Self::merge_fts_index_type(
1173                    default.fts_index.as_ref(),
1174                    user.fts_index.as_ref(),
1175                )?,
1176            })),
1177            (Some(default), None) => Ok(Some(default.clone())),
1178            (None, Some(user)) => Ok(Some(user.clone())),
1179            (None, None) => Ok(None),
1180        }
1181    }
1182
1183    /// Merge FloatValueType configurations
1184    fn merge_float_type(
1185        default: Option<&FloatValueType>,
1186        user: Option<&FloatValueType>,
1187    ) -> Result<Option<FloatValueType>, SchemaError> {
1188        match (default, user) {
1189            (Some(default), Some(user)) => Ok(Some(FloatValueType {
1190                float_inverted_index: Self::merge_float_inverted_index_type(
1191                    default.float_inverted_index.as_ref(),
1192                    user.float_inverted_index.as_ref(),
1193                )?,
1194            })),
1195            (Some(default), None) => Ok(Some(default.clone())),
1196            (None, Some(user)) => Ok(Some(user.clone())),
1197            (None, None) => Ok(None),
1198        }
1199    }
1200
1201    /// Merge IntValueType configurations
1202    fn merge_int_type(
1203        default: Option<&IntValueType>,
1204        user: Option<&IntValueType>,
1205    ) -> Result<Option<IntValueType>, SchemaError> {
1206        match (default, user) {
1207            (Some(default), Some(user)) => Ok(Some(IntValueType {
1208                int_inverted_index: Self::merge_int_inverted_index_type(
1209                    default.int_inverted_index.as_ref(),
1210                    user.int_inverted_index.as_ref(),
1211                )?,
1212            })),
1213            (Some(default), None) => Ok(Some(default.clone())),
1214            (None, Some(user)) => Ok(Some(user.clone())),
1215            (None, None) => Ok(None),
1216        }
1217    }
1218
1219    /// Merge BoolValueType configurations
1220    fn merge_bool_type(
1221        default: Option<&BoolValueType>,
1222        user: Option<&BoolValueType>,
1223    ) -> Result<Option<BoolValueType>, SchemaError> {
1224        match (default, user) {
1225            (Some(default), Some(user)) => Ok(Some(BoolValueType {
1226                bool_inverted_index: Self::merge_bool_inverted_index_type(
1227                    default.bool_inverted_index.as_ref(),
1228                    user.bool_inverted_index.as_ref(),
1229                )?,
1230            })),
1231            (Some(default), None) => Ok(Some(default.clone())),
1232            (None, Some(user)) => Ok(Some(user.clone())),
1233            (None, None) => Ok(None),
1234        }
1235    }
1236
1237    /// Merge FloatListValueType configurations
1238    fn merge_float_list_type(
1239        default: Option<&FloatListValueType>,
1240        user: Option<&FloatListValueType>,
1241        knn_index: KnnIndex,
1242    ) -> Option<FloatListValueType> {
1243        match (default, user) {
1244            (Some(default), Some(user)) => Some(FloatListValueType {
1245                vector_index: Self::merge_vector_index_type(
1246                    default.vector_index.as_ref(),
1247                    user.vector_index.as_ref(),
1248                    knn_index,
1249                ),
1250            }),
1251            (Some(default), None) => Some(default.clone()),
1252            (None, Some(user)) => Some(user.clone()),
1253            (None, None) => None,
1254        }
1255    }
1256
1257    /// Merge SparseVectorValueType configurations
1258    fn merge_sparse_vector_type(
1259        default: Option<&SparseVectorValueType>,
1260        user: Option<&SparseVectorValueType>,
1261    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1262        match (default, user) {
1263            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1264                sparse_vector_index: Self::merge_sparse_vector_index_type(
1265                    default.sparse_vector_index.as_ref(),
1266                    user.sparse_vector_index.as_ref(),
1267                )?,
1268            })),
1269            (Some(default), None) => Ok(Some(default.clone())),
1270            (None, Some(user)) => Ok(Some(user.clone())),
1271            (None, None) => Ok(None),
1272        }
1273    }
1274
1275    /// Merge individual index type configurations
1276    fn merge_string_inverted_index_type(
1277        default: Option<&StringInvertedIndexType>,
1278        user: Option<&StringInvertedIndexType>,
1279    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1280        match (default, user) {
1281            (Some(_default), Some(user)) => {
1282                Ok(Some(StringInvertedIndexType {
1283                    enabled: user.enabled,       // User enabled state takes precedence
1284                    config: user.config.clone(), // User config takes precedence
1285                }))
1286            }
1287            (Some(default), None) => Ok(Some(default.clone())),
1288            (None, Some(user)) => Ok(Some(user.clone())),
1289            (None, None) => Ok(None),
1290        }
1291    }
1292
1293    fn merge_fts_index_type(
1294        default: Option<&FtsIndexType>,
1295        user: Option<&FtsIndexType>,
1296    ) -> Result<Option<FtsIndexType>, SchemaError> {
1297        match (default, user) {
1298            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1299                enabled: user.enabled,
1300                config: user.config.clone(),
1301            })),
1302            (Some(default), None) => Ok(Some(default.clone())),
1303            (None, Some(user)) => Ok(Some(user.clone())),
1304            (None, None) => Ok(None),
1305        }
1306    }
1307
1308    fn merge_float_inverted_index_type(
1309        default: Option<&FloatInvertedIndexType>,
1310        user: Option<&FloatInvertedIndexType>,
1311    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1312        match (default, user) {
1313            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1314                enabled: user.enabled,
1315                config: user.config.clone(),
1316            })),
1317            (Some(default), None) => Ok(Some(default.clone())),
1318            (None, Some(user)) => Ok(Some(user.clone())),
1319            (None, None) => Ok(None),
1320        }
1321    }
1322
1323    fn merge_int_inverted_index_type(
1324        default: Option<&IntInvertedIndexType>,
1325        user: Option<&IntInvertedIndexType>,
1326    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1327        match (default, user) {
1328            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1329                enabled: user.enabled,
1330                config: user.config.clone(),
1331            })),
1332            (Some(default), None) => Ok(Some(default.clone())),
1333            (None, Some(user)) => Ok(Some(user.clone())),
1334            (None, None) => Ok(None),
1335        }
1336    }
1337
1338    fn merge_bool_inverted_index_type(
1339        default: Option<&BoolInvertedIndexType>,
1340        user: Option<&BoolInvertedIndexType>,
1341    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1342        match (default, user) {
1343            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1344                enabled: user.enabled,
1345                config: user.config.clone(),
1346            })),
1347            (Some(default), None) => Ok(Some(default.clone())),
1348            (None, Some(user)) => Ok(Some(user.clone())),
1349            (None, None) => Ok(None),
1350        }
1351    }
1352
1353    fn merge_vector_index_type(
1354        default: Option<&VectorIndexType>,
1355        user: Option<&VectorIndexType>,
1356        knn_index: KnnIndex,
1357    ) -> Option<VectorIndexType> {
1358        match (default, user) {
1359            (Some(default), Some(user)) => Some(VectorIndexType {
1360                enabled: user.enabled,
1361                config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1362            }),
1363            (Some(default), None) => Some(default.clone()),
1364            (None, Some(user)) => Some(user.clone()),
1365            (None, None) => None,
1366        }
1367    }
1368
1369    fn merge_sparse_vector_index_type(
1370        default: Option<&SparseVectorIndexType>,
1371        user: Option<&SparseVectorIndexType>,
1372    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1373        match (default, user) {
1374            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1375                enabled: user.enabled,
1376                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1377            })),
1378            (Some(default), None) => Ok(Some(default.clone())),
1379            (None, Some(user)) => Ok(Some(user.clone())),
1380            (None, None) => Ok(None),
1381        }
1382    }
1383
1384    /// Validate FloatListValueType vector index configurations
1385    /// This validates HNSW and SPANN configs within the merged float_list
1386    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1387        if let Some(vector_index) = &float_list.vector_index {
1388            if let Some(hnsw) = &vector_index.config.hnsw {
1389                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1390            }
1391            if let Some(spann) = &vector_index.config.spann {
1392                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1393            }
1394        }
1395        Ok(())
1396    }
1397
1398    /// Merge VectorIndexConfig with field-level merging
1399    fn merge_vector_index_config(
1400        default: &VectorIndexConfig,
1401        user: &VectorIndexConfig,
1402        knn_index: KnnIndex,
1403    ) -> VectorIndexConfig {
1404        match knn_index {
1405            KnnIndex::Hnsw => VectorIndexConfig {
1406                space: user.space.clone().or(default.space.clone()),
1407                embedding_function: user
1408                    .embedding_function
1409                    .clone()
1410                    .or(default.embedding_function.clone()),
1411                source_key: user.source_key.clone().or(default.source_key.clone()),
1412                hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1413                spann: None,
1414            },
1415            KnnIndex::Spann => VectorIndexConfig {
1416                space: user.space.clone().or(default.space.clone()),
1417                embedding_function: user
1418                    .embedding_function
1419                    .clone()
1420                    .or(default.embedding_function.clone()),
1421                source_key: user.source_key.clone().or(default.source_key.clone()),
1422                hnsw: None,
1423                spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1424            },
1425        }
1426    }
1427
1428    /// Merge SparseVectorIndexConfig with field-level merging
1429    fn merge_sparse_vector_index_config(
1430        default: &SparseVectorIndexConfig,
1431        user: &SparseVectorIndexConfig,
1432    ) -> SparseVectorIndexConfig {
1433        SparseVectorIndexConfig {
1434            embedding_function: user
1435                .embedding_function
1436                .clone()
1437                .or(default.embedding_function.clone()),
1438            source_key: user.source_key.clone().or(default.source_key.clone()),
1439            bm25: user.bm25.or(default.bm25),
1440        }
1441    }
1442
1443    /// Merge HNSW configurations with field-level merging
1444    fn merge_hnsw_configs(
1445        default_hnsw: Option<&HnswIndexConfig>,
1446        user_hnsw: Option<&HnswIndexConfig>,
1447    ) -> Option<HnswIndexConfig> {
1448        match (default_hnsw, user_hnsw) {
1449            (Some(default), Some(user)) => Some(HnswIndexConfig {
1450                ef_construction: user.ef_construction.or(default.ef_construction),
1451                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1452                ef_search: user.ef_search.or(default.ef_search),
1453                num_threads: user.num_threads.or(default.num_threads),
1454                batch_size: user.batch_size.or(default.batch_size),
1455                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1456                resize_factor: user.resize_factor.or(default.resize_factor),
1457            }),
1458            (Some(default), None) => Some(default.clone()),
1459            (None, Some(user)) => Some(user.clone()),
1460            (None, None) => None,
1461        }
1462    }
1463
1464    /// Merge SPANN configurations with field-level merging
1465    fn merge_spann_configs(
1466        default_spann: Option<&SpannIndexConfig>,
1467        user_spann: Option<&SpannIndexConfig>,
1468    ) -> Option<SpannIndexConfig> {
1469        match (default_spann, user_spann) {
1470            (Some(default), Some(user)) => Some(SpannIndexConfig {
1471                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1472                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1473                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1474                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1475                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1476                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1477                split_threshold: user.split_threshold.or(default.split_threshold),
1478                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1479                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1480                reassign_neighbor_count: user
1481                    .reassign_neighbor_count
1482                    .or(default.reassign_neighbor_count),
1483                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1484                num_centers_to_merge_to: user
1485                    .num_centers_to_merge_to
1486                    .or(default.num_centers_to_merge_to),
1487                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1488                ef_construction: user.ef_construction.or(default.ef_construction),
1489                ef_search: user.ef_search.or(default.ef_search),
1490                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1491            }),
1492            (Some(default), None) => Some(default.clone()),
1493            (None, Some(user)) => Some(user.clone()),
1494            (None, None) => None,
1495        }
1496    }
1497
1498    /// Reconcile Schema with InternalCollectionConfiguration
1499    ///
1500    /// Simple reconciliation logic:
1501    /// 1. If collection config is default → return schema (schema is source of truth)
1502    /// 2. If collection config is non-default and schema is default → override schema with collection config
1503    ///
1504    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1505    pub fn reconcile_with_collection_config(
1506        schema: &Schema,
1507        collection_config: &InternalCollectionConfiguration,
1508        default_knn_index: KnnIndex,
1509    ) -> Result<Schema, SchemaError> {
1510        // 1. Check if collection config is default
1511        if collection_config.is_default() {
1512            if schema.is_default() {
1513                // if both are default, use the schema, and apply the ef from config if available
1514                // for both defaults and #embedding key
1515                let mut new_schema = Schema::new_default(default_knn_index);
1516
1517                if collection_config.embedding_function.is_some() {
1518                    if let Some(float_list) = &mut new_schema.defaults.float_list {
1519                        if let Some(vector_index) = &mut float_list.vector_index {
1520                            vector_index.config.embedding_function =
1521                                collection_config.embedding_function.clone();
1522                        }
1523                    }
1524                    if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1525                        if let Some(float_list) = &mut embedding_types.float_list {
1526                            if let Some(vector_index) = &mut float_list.vector_index {
1527                                vector_index.config.embedding_function =
1528                                    collection_config.embedding_function.clone();
1529                            }
1530                        }
1531                    }
1532                }
1533                return Ok(new_schema);
1534            } else {
1535                // Collection config is default and schema is non-default → schema is source of truth
1536                return Ok(schema.clone());
1537            }
1538        }
1539
1540        // 2. Collection config is non-default, schema must be default (already validated earlier)
1541        // Convert collection config to schema
1542        Self::try_from(collection_config)
1543    }
1544
1545    pub fn reconcile_schema_and_config(
1546        schema: Option<&Schema>,
1547        configuration: Option<&InternalCollectionConfiguration>,
1548        knn_index: KnnIndex,
1549    ) -> Result<Schema, SchemaError> {
1550        // Early validation: check if both user-provided schema and config are non-default
1551        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1552            if !user_schema.is_default() && !config.is_default() {
1553                return Err(SchemaError::ConfigAndSchemaConflict);
1554            }
1555        }
1556
1557        let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1558        if let Some(config) = configuration {
1559            Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1560        } else {
1561            Ok(reconciled_schema)
1562        }
1563    }
1564
1565    pub fn default_with_embedding_function(
1566        embedding_function: EmbeddingFunctionConfiguration,
1567    ) -> Schema {
1568        let mut schema = Schema::new_default(KnnIndex::Spann);
1569        if let Some(float_list) = &mut schema.defaults.float_list {
1570            if let Some(vector_index) = &mut float_list.vector_index {
1571                vector_index.config.embedding_function = Some(embedding_function.clone());
1572            }
1573        }
1574        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1575            if let Some(float_list) = &mut embedding_types.float_list {
1576                if let Some(vector_index) = &mut float_list.vector_index {
1577                    vector_index.config.embedding_function = Some(embedding_function);
1578                }
1579            }
1580        }
1581        schema
1582    }
1583
1584    /// Check if schema is default by checking each field individually
1585    pub fn is_default(&self) -> bool {
1586        // Check if defaults are default (field by field)
1587        if !Self::is_value_types_default(&self.defaults) {
1588            return false;
1589        }
1590
1591        for key in self.keys.keys() {
1592            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1593                return false;
1594            }
1595        }
1596
1597        // Check #embedding key
1598        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1599            if !Self::is_embedding_value_types_default(embedding_value) {
1600                return false;
1601            }
1602        }
1603
1604        // Check #document key
1605        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1606            if !Self::is_document_value_types_default(document_value) {
1607                return false;
1608            }
1609        }
1610
1611        // Check CMEK is None (default)
1612        if self.cmek.is_some() {
1613            return false;
1614        }
1615
1616        true
1617    }
1618
1619    /// Check if ValueTypes (defaults) are in default state
1620    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1621        // Check string field
1622        if let Some(string) = &value_types.string {
1623            if let Some(string_inverted) = &string.string_inverted_index {
1624                if !string_inverted.enabled {
1625                    return false;
1626                }
1627                // Config is an empty struct, so no need to check it
1628            }
1629            if let Some(fts) = &string.fts_index {
1630                if fts.enabled {
1631                    return false;
1632                }
1633                // Config is an empty struct, so no need to check it
1634            }
1635        }
1636
1637        // Check float field
1638        if let Some(float) = &value_types.float {
1639            if let Some(float_inverted) = &float.float_inverted_index {
1640                if !float_inverted.enabled {
1641                    return false;
1642                }
1643                // Config is an empty struct, so no need to check it
1644            }
1645        }
1646
1647        // Check int field
1648        if let Some(int) = &value_types.int {
1649            if let Some(int_inverted) = &int.int_inverted_index {
1650                if !int_inverted.enabled {
1651                    return false;
1652                }
1653                // Config is an empty struct, so no need to check it
1654            }
1655        }
1656
1657        // Check boolean field
1658        if let Some(boolean) = &value_types.boolean {
1659            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1660                if !bool_inverted.enabled {
1661                    return false;
1662                }
1663                // Config is an empty struct, so no need to check it
1664            }
1665        }
1666
1667        // Check float_list field (vector index should be disabled)
1668        if let Some(float_list) = &value_types.float_list {
1669            if let Some(vector_index) = &float_list.vector_index {
1670                if vector_index.enabled {
1671                    return false;
1672                }
1673                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1674                    return false;
1675                }
1676                if !is_space_default(&vector_index.config.space) {
1677                    return false;
1678                }
1679                // Check that the config has default structure
1680                if vector_index.config.source_key.is_some() {
1681                    return false;
1682                }
1683                // Check that either hnsw or spann config is present (not both, not neither)
1684                // and that the config values are default
1685                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1686                    (Some(hnsw_config), None) => {
1687                        if !hnsw_config.is_default() {
1688                            return false;
1689                        }
1690                    }
1691                    (None, Some(spann_config)) => {
1692                        if !spann_config.is_default() {
1693                            return false;
1694                        }
1695                    }
1696                    (Some(_), Some(_)) => return false, // Both present
1697                    (None, None) => {}
1698                }
1699            }
1700        }
1701
1702        // Check sparse_vector field (should be disabled)
1703        if let Some(sparse_vector) = &value_types.sparse_vector {
1704            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1705                if sparse_index.enabled {
1706                    return false;
1707                }
1708                // Check config structure
1709                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1710                    return false;
1711                }
1712                if sparse_index.config.source_key.is_some() {
1713                    return false;
1714                }
1715                if let Some(bm25) = &sparse_index.config.bm25 {
1716                    if bm25 != &false {
1717                        return false;
1718                    }
1719                }
1720            }
1721        }
1722
1723        true
1724    }
1725
1726    /// Check if ValueTypes for #embedding key are in default state
1727    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1728        // For #embedding, only float_list should be set
1729        if value_types.string.is_some()
1730            || value_types.float.is_some()
1731            || value_types.int.is_some()
1732            || value_types.boolean.is_some()
1733            || value_types.sparse_vector.is_some()
1734        {
1735            return false;
1736        }
1737
1738        // Check float_list field (vector index should be enabled)
1739        if let Some(float_list) = &value_types.float_list {
1740            if let Some(vector_index) = &float_list.vector_index {
1741                if !vector_index.enabled {
1742                    return false;
1743                }
1744                if !is_space_default(&vector_index.config.space) {
1745                    return false;
1746                }
1747                // Check that embedding_function is default
1748                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1749                    return false;
1750                }
1751                // Check that source_key is #document
1752                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1753                    return false;
1754                }
1755                // Check that either hnsw or spann config is present (not both, not neither)
1756                // and that the config values are default
1757                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1758                    (Some(hnsw_config), None) => {
1759                        if !hnsw_config.is_default() {
1760                            return false;
1761                        }
1762                    }
1763                    (None, Some(spann_config)) => {
1764                        if !spann_config.is_default() {
1765                            return false;
1766                        }
1767                    }
1768                    (Some(_), Some(_)) => return false, // Both present
1769                    (None, None) => {}
1770                }
1771            }
1772        }
1773
1774        true
1775    }
1776
1777    /// Check if ValueTypes for #document key are in default state
1778    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1779        // For #document, only string should be set
1780        if value_types.float_list.is_some()
1781            || value_types.float.is_some()
1782            || value_types.int.is_some()
1783            || value_types.boolean.is_some()
1784            || value_types.sparse_vector.is_some()
1785        {
1786            return false;
1787        }
1788
1789        // Check string field
1790        if let Some(string) = &value_types.string {
1791            if let Some(fts) = &string.fts_index {
1792                if !fts.enabled {
1793                    return false;
1794                }
1795                // Config is an empty struct, so no need to check it
1796            }
1797            if let Some(string_inverted) = &string.string_inverted_index {
1798                if string_inverted.enabled {
1799                    return false;
1800                }
1801                // Config is an empty struct, so no need to check it
1802            }
1803        }
1804
1805        true
1806    }
1807
1808    /// Check if a specific metadata key-value should be indexed based on schema configuration
1809    pub fn is_metadata_type_index_enabled(
1810        &self,
1811        key: &str,
1812        value_type: MetadataValueType,
1813    ) -> Result<bool, SchemaError> {
1814        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1815
1816        match value_type {
1817            MetadataValueType::Bool => match &v_type.boolean {
1818                Some(bool_type) => match &bool_type.bool_inverted_index {
1819                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1820                    None => Err(SchemaError::MissingIndexConfiguration {
1821                        key: key.to_string(),
1822                        value_type: "bool".to_string(),
1823                    }),
1824                },
1825                None => match &self.defaults.boolean {
1826                    Some(bool_type) => match &bool_type.bool_inverted_index {
1827                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1828                        None => Err(SchemaError::MissingIndexConfiguration {
1829                            key: key.to_string(),
1830                            value_type: "bool".to_string(),
1831                        }),
1832                    },
1833                    None => Err(SchemaError::MissingIndexConfiguration {
1834                        key: key.to_string(),
1835                        value_type: "bool".to_string(),
1836                    }),
1837                },
1838            },
1839            MetadataValueType::Int => match &v_type.int {
1840                Some(int_type) => match &int_type.int_inverted_index {
1841                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1842                    None => Err(SchemaError::MissingIndexConfiguration {
1843                        key: key.to_string(),
1844                        value_type: "int".to_string(),
1845                    }),
1846                },
1847                None => match &self.defaults.int {
1848                    Some(int_type) => match &int_type.int_inverted_index {
1849                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1850                        None => Err(SchemaError::MissingIndexConfiguration {
1851                            key: key.to_string(),
1852                            value_type: "int".to_string(),
1853                        }),
1854                    },
1855                    None => Err(SchemaError::MissingIndexConfiguration {
1856                        key: key.to_string(),
1857                        value_type: "int".to_string(),
1858                    }),
1859                },
1860            },
1861            MetadataValueType::Float => match &v_type.float {
1862                Some(float_type) => match &float_type.float_inverted_index {
1863                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1864                    None => Err(SchemaError::MissingIndexConfiguration {
1865                        key: key.to_string(),
1866                        value_type: "float".to_string(),
1867                    }),
1868                },
1869                None => match &self.defaults.float {
1870                    Some(float_type) => match &float_type.float_inverted_index {
1871                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1872                        None => Err(SchemaError::MissingIndexConfiguration {
1873                            key: key.to_string(),
1874                            value_type: "float".to_string(),
1875                        }),
1876                    },
1877                    None => Err(SchemaError::MissingIndexConfiguration {
1878                        key: key.to_string(),
1879                        value_type: "float".to_string(),
1880                    }),
1881                },
1882            },
1883            MetadataValueType::Str => match &v_type.string {
1884                Some(string_type) => match &string_type.string_inverted_index {
1885                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1886                    None => Err(SchemaError::MissingIndexConfiguration {
1887                        key: key.to_string(),
1888                        value_type: "string".to_string(),
1889                    }),
1890                },
1891                None => match &self.defaults.string {
1892                    Some(string_type) => match &string_type.string_inverted_index {
1893                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1894                        None => Err(SchemaError::MissingIndexConfiguration {
1895                            key: key.to_string(),
1896                            value_type: "string".to_string(),
1897                        }),
1898                    },
1899                    None => Err(SchemaError::MissingIndexConfiguration {
1900                        key: key.to_string(),
1901                        value_type: "string".to_string(),
1902                    }),
1903                },
1904            },
1905            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1906                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1907                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1908                    None => Err(SchemaError::MissingIndexConfiguration {
1909                        key: key.to_string(),
1910                        value_type: "sparse_vector".to_string(),
1911                    }),
1912                },
1913                None => match &self.defaults.sparse_vector {
1914                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1915                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1916                        None => Err(SchemaError::MissingIndexConfiguration {
1917                            key: key.to_string(),
1918                            value_type: "sparse_vector".to_string(),
1919                        }),
1920                    },
1921                    None => Err(SchemaError::MissingIndexConfiguration {
1922                        key: key.to_string(),
1923                        value_type: "sparse_vector".to_string(),
1924                    }),
1925                },
1926            },
1927        }
1928    }
1929
1930    pub fn is_metadata_where_indexing_enabled(
1931        &self,
1932        where_clause: &Where,
1933    ) -> Result<(), FilterValidationError> {
1934        match where_clause {
1935            Where::Composite(composite) => {
1936                for child in &composite.children {
1937                    self.is_metadata_where_indexing_enabled(child)?;
1938                }
1939                Ok(())
1940            }
1941            Where::Document(_) => Ok(()),
1942            Where::Metadata(expression) => {
1943                let value_type = match &expression.comparison {
1944                    MetadataComparison::Primitive(_, value) => value.value_type(),
1945                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1946                };
1947                let is_enabled = self
1948                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1949                    .map_err(FilterValidationError::Schema)?;
1950                if !is_enabled {
1951                    return Err(FilterValidationError::IndexingDisabled {
1952                        key: expression.key.clone(),
1953                        value_type,
1954                    });
1955                }
1956                Ok(())
1957            }
1958        }
1959    }
1960
1961    pub fn is_knn_key_indexing_enabled(
1962        &self,
1963        key: &str,
1964        query: &QueryVector,
1965    ) -> Result<(), FilterValidationError> {
1966        match query {
1967            QueryVector::Sparse(_) => {
1968                let is_enabled = self
1969                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1970                    .map_err(FilterValidationError::Schema)?;
1971                if !is_enabled {
1972                    return Err(FilterValidationError::IndexingDisabled {
1973                        key: key.to_string(),
1974                        value_type: MetadataValueType::SparseVector,
1975                    });
1976                }
1977                Ok(())
1978            }
1979            QueryVector::Dense(_) => {
1980                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1981                // Dense vectors are always indexed
1982                Ok(())
1983            }
1984        }
1985    }
1986
1987    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1988        if key.starts_with(CHROMA_KEY) {
1989            return false;
1990        }
1991        let value_types = self.keys.entry(key.to_string()).or_default();
1992        match value_type {
1993            MetadataValueType::Bool => {
1994                if value_types.boolean.is_none() {
1995                    value_types.boolean = self.defaults.boolean.clone();
1996                    return true;
1997                }
1998            }
1999            MetadataValueType::Int => {
2000                if value_types.int.is_none() {
2001                    value_types.int = self.defaults.int.clone();
2002                    return true;
2003                }
2004            }
2005            MetadataValueType::Float => {
2006                if value_types.float.is_none() {
2007                    value_types.float = self.defaults.float.clone();
2008                    return true;
2009                }
2010            }
2011            MetadataValueType::Str => {
2012                if value_types.string.is_none() {
2013                    value_types.string = self.defaults.string.clone();
2014                    return true;
2015                }
2016            }
2017            MetadataValueType::SparseVector => {
2018                if value_types.sparse_vector.is_none() {
2019                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
2020                    return true;
2021                }
2022            }
2023        }
2024        false
2025    }
2026
2027    // ========================================================================
2028    // BUILDER PATTERN METHODS
2029    // ========================================================================
2030
2031    /// Create an index configuration (builder pattern)
2032    ///
2033    /// This method allows fluent, chainable configuration of indexes on a schema.
2034    /// It matches the Python API's `.create_index()` method.
2035    ///
2036    /// # Arguments
2037    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
2038    /// * `config` - Index configuration to create
2039    ///
2040    /// # Returns
2041    /// `Self` for method chaining
2042    ///
2043    /// # Errors
2044    /// Returns error if:
2045    /// - Attempting to create index on special keys (`#document`, `#embedding`)
2046    /// - Invalid configuration (e.g., vector index on non-embedding key)
2047    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
2048    ///
2049    /// # Examples
2050    /// ```
2051    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space, SchemaBuilderError};
2052    ///
2053    /// # fn main() -> Result<(), SchemaBuilderError> {
2054    /// let schema = Schema::default()
2055    ///     .create_index(None, VectorIndexConfig {
2056    ///         space: Some(Space::Cosine),
2057    ///         embedding_function: None,
2058    ///         source_key: None,
2059    ///         hnsw: None,
2060    ///         spann: None,
2061    ///     }.into())?
2062    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2063    /// # Ok(())
2064    /// # }
2065    /// ```
2066    pub fn create_index(
2067        mut self,
2068        key: Option<&str>,
2069        config: IndexConfig,
2070    ) -> Result<Self, SchemaBuilderError> {
2071        // Handle special cases: Vector and FTS (global configs only)
2072        match (&key, &config) {
2073            (None, IndexConfig::Vector(cfg)) => {
2074                self._set_vector_index_config_builder(cfg.clone());
2075                return Ok(self);
2076            }
2077            (None, IndexConfig::Fts(cfg)) => {
2078                self._set_fts_index_config_builder(cfg.clone());
2079                return Ok(self);
2080            }
2081            (Some(k), IndexConfig::Vector(_)) => {
2082                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2083            }
2084            (Some(k), IndexConfig::Fts(_)) => {
2085                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2086            }
2087            _ => {}
2088        }
2089
2090        // Validate special keys
2091        if let Some(k) = key {
2092            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2093                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2094                    key: k.to_string(),
2095                });
2096            }
2097        }
2098
2099        // Validate sparse vector requires key
2100        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2101            return Err(SchemaBuilderError::SparseVectorRequiresKey);
2102        }
2103
2104        // Dispatch to appropriate helper
2105        match key {
2106            Some(k) => self._set_index_for_key_builder(k, config, true)?,
2107            None => self._set_index_in_defaults_builder(config, true)?,
2108        }
2109
2110        Ok(self)
2111    }
2112
2113    /// Delete/disable an index configuration (builder pattern)
2114    ///
2115    /// This method allows disabling indexes on a schema.
2116    /// It matches the Python API's `.delete_index()` method.
2117    ///
2118    /// # Arguments
2119    /// * `key` - Optional key name for per-key index. `None` applies to defaults
2120    /// * `config` - Index configuration to disable
2121    ///
2122    /// # Returns
2123    /// `Self` for method chaining
2124    ///
2125    /// # Errors
2126    /// Returns error if:
2127    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
2128    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
2129    ///
2130    /// # Examples
2131    /// ```
2132    /// use chroma_types::{Schema, StringInvertedIndexConfig, SchemaBuilderError};
2133    ///
2134    /// # fn main() -> Result<(), SchemaBuilderError> {
2135    /// let schema = Schema::default()
2136    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2137    /// # Ok(())
2138    /// # }
2139    /// ```
2140    pub fn delete_index(
2141        mut self,
2142        key: Option<&str>,
2143        config: IndexConfig,
2144    ) -> Result<Self, SchemaBuilderError> {
2145        // Validate special keys
2146        if let Some(k) = key {
2147            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2148                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2149                    key: k.to_string(),
2150                });
2151            }
2152        }
2153
2154        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
2155        match &config {
2156            IndexConfig::Vector(_) => {
2157                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2158            }
2159            IndexConfig::Fts(_) => {
2160                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2161            }
2162            IndexConfig::SparseVector(_) => {
2163                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2164            }
2165            _ => {}
2166        }
2167
2168        // Dispatch to appropriate helper (enabled=false)
2169        match key {
2170            Some(k) => self._set_index_for_key_builder(k, config, false)?,
2171            None => self._set_index_in_defaults_builder(config, false)?,
2172        }
2173
2174        Ok(self)
2175    }
2176
2177    /// Set customer-managed encryption key for the collection (builder pattern)
2178    ///
2179    /// This method allows setting CMEK on a schema for fluent, chainable configuration.
2180    ///
2181    /// # Arguments
2182    /// * `cmek` - Customer-managed encryption key configuration
2183    ///
2184    /// # Returns
2185    /// `Self` for method chaining
2186    ///
2187    /// # Examples
2188    /// ```
2189    /// use chroma_types::{Schema, Cmek};
2190    ///
2191    /// let schema = Schema::default()
2192    ///     .with_cmek(Cmek::gcp("projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key".to_string()));
2193    /// ```
2194    pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2195        self.cmek = Some(cmek);
2196        self
2197    }
2198
2199    /// Set vector index config globally (applies to #embedding)
2200    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2201        // Update defaults (disabled, just config update)
2202        if let Some(float_list) = &mut self.defaults.float_list {
2203            if let Some(vector_index) = &mut float_list.vector_index {
2204                vector_index.config = config.clone();
2205            }
2206        }
2207
2208        // Update #embedding key (enabled, config update, preserve source_key=#document)
2209        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2210            if let Some(float_list) = &mut embedding_types.float_list {
2211                if let Some(vector_index) = &mut float_list.vector_index {
2212                    let mut updated_config = config;
2213                    // Preserve source_key as #document
2214                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2215                    vector_index.config = updated_config;
2216                }
2217            }
2218        }
2219    }
2220
2221    /// Set FTS index config globally (applies to #document)
2222    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2223        // Update defaults (disabled, just config update)
2224        if let Some(string) = &mut self.defaults.string {
2225            if let Some(fts_index) = &mut string.fts_index {
2226                fts_index.config = config.clone();
2227            }
2228        }
2229
2230        // Update #document key (enabled, config update)
2231        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2232            if let Some(string) = &mut document_types.string {
2233                if let Some(fts_index) = &mut string.fts_index {
2234                    fts_index.config = config;
2235                }
2236            }
2237        }
2238    }
2239
2240    /// Set index configuration for a specific key
2241    fn _set_index_for_key_builder(
2242        &mut self,
2243        key: &str,
2244        config: IndexConfig,
2245        enabled: bool,
2246    ) -> Result<(), SchemaBuilderError> {
2247        // Check for multiple sparse vector indexes BEFORE getting mutable reference
2248        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2249            // Find existing sparse vector index
2250            let existing_key = self
2251                .keys
2252                .iter()
2253                .find(|(k, v)| {
2254                    k.as_str() != key
2255                        && v.sparse_vector
2256                            .as_ref()
2257                            .and_then(|sv| sv.sparse_vector_index.as_ref())
2258                            .map(|idx| idx.enabled)
2259                            .unwrap_or(false)
2260                })
2261                .map(|(k, _)| k.clone());
2262
2263            if let Some(existing_key) = existing_key {
2264                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2265            }
2266        }
2267
2268        // Get or create ValueTypes for this key
2269        let value_types = self.keys.entry(key.to_string()).or_default();
2270
2271        // Set the appropriate index based on config type
2272        match config {
2273            IndexConfig::Vector(_) => {
2274                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2275                    key: key.to_string(),
2276                });
2277            }
2278            IndexConfig::Fts(_) => {
2279                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2280                    key: key.to_string(),
2281                });
2282            }
2283            IndexConfig::SparseVector(cfg) => {
2284                value_types.sparse_vector = Some(SparseVectorValueType {
2285                    sparse_vector_index: Some(SparseVectorIndexType {
2286                        enabled,
2287                        config: cfg,
2288                    }),
2289                });
2290            }
2291            IndexConfig::StringInverted(cfg) => {
2292                if value_types.string.is_none() {
2293                    value_types.string = Some(StringValueType {
2294                        fts_index: None,
2295                        string_inverted_index: None,
2296                    });
2297                }
2298                if let Some(string) = &mut value_types.string {
2299                    string.string_inverted_index = Some(StringInvertedIndexType {
2300                        enabled,
2301                        config: cfg,
2302                    });
2303                }
2304            }
2305            IndexConfig::IntInverted(cfg) => {
2306                value_types.int = Some(IntValueType {
2307                    int_inverted_index: Some(IntInvertedIndexType {
2308                        enabled,
2309                        config: cfg,
2310                    }),
2311                });
2312            }
2313            IndexConfig::FloatInverted(cfg) => {
2314                value_types.float = Some(FloatValueType {
2315                    float_inverted_index: Some(FloatInvertedIndexType {
2316                        enabled,
2317                        config: cfg,
2318                    }),
2319                });
2320            }
2321            IndexConfig::BoolInverted(cfg) => {
2322                value_types.boolean = Some(BoolValueType {
2323                    bool_inverted_index: Some(BoolInvertedIndexType {
2324                        enabled,
2325                        config: cfg,
2326                    }),
2327                });
2328            }
2329        }
2330
2331        Ok(())
2332    }
2333
2334    /// Set index configuration in defaults
2335    fn _set_index_in_defaults_builder(
2336        &mut self,
2337        config: IndexConfig,
2338        enabled: bool,
2339    ) -> Result<(), SchemaBuilderError> {
2340        match config {
2341            IndexConfig::Vector(_) => {
2342                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2343                    key: "defaults".to_string(),
2344                });
2345            }
2346            IndexConfig::Fts(_) => {
2347                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2348                    key: "defaults".to_string(),
2349                });
2350            }
2351            IndexConfig::SparseVector(cfg) => {
2352                self.defaults.sparse_vector = Some(SparseVectorValueType {
2353                    sparse_vector_index: Some(SparseVectorIndexType {
2354                        enabled,
2355                        config: cfg,
2356                    }),
2357                });
2358            }
2359            IndexConfig::StringInverted(cfg) => {
2360                if self.defaults.string.is_none() {
2361                    self.defaults.string = Some(StringValueType {
2362                        fts_index: None,
2363                        string_inverted_index: None,
2364                    });
2365                }
2366                if let Some(string) = &mut self.defaults.string {
2367                    string.string_inverted_index = Some(StringInvertedIndexType {
2368                        enabled,
2369                        config: cfg,
2370                    });
2371                }
2372            }
2373            IndexConfig::IntInverted(cfg) => {
2374                self.defaults.int = Some(IntValueType {
2375                    int_inverted_index: Some(IntInvertedIndexType {
2376                        enabled,
2377                        config: cfg,
2378                    }),
2379                });
2380            }
2381            IndexConfig::FloatInverted(cfg) => {
2382                self.defaults.float = Some(FloatValueType {
2383                    float_inverted_index: Some(FloatInvertedIndexType {
2384                        enabled,
2385                        config: cfg,
2386                    }),
2387                });
2388            }
2389            IndexConfig::BoolInverted(cfg) => {
2390                self.defaults.boolean = Some(BoolValueType {
2391                    bool_inverted_index: Some(BoolInvertedIndexType {
2392                        enabled,
2393                        config: cfg,
2394                    }),
2395                });
2396            }
2397        }
2398
2399        Ok(())
2400    }
2401}
2402
2403// ============================================================================
2404// INDEX CONFIGURATION STRUCTURES
2405// ============================================================================
2406
2407#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2408#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2409#[serde(deny_unknown_fields)]
2410pub struct VectorIndexConfig {
2411    /// Vector space for similarity calculation (cosine, l2, ip)
2412    #[serde(skip_serializing_if = "Option::is_none")]
2413    pub space: Option<Space>,
2414    /// Embedding function configuration
2415    #[serde(skip_serializing_if = "Option::is_none")]
2416    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2417    /// Key to source the vector from
2418    #[serde(skip_serializing_if = "Option::is_none")]
2419    pub source_key: Option<String>,
2420    /// HNSW algorithm configuration
2421    #[serde(skip_serializing_if = "Option::is_none")]
2422    pub hnsw: Option<HnswIndexConfig>,
2423    /// SPANN algorithm configuration
2424    #[serde(skip_serializing_if = "Option::is_none")]
2425    pub spann: Option<SpannIndexConfig>,
2426}
2427
2428/// Configuration for HNSW vector index algorithm parameters
2429#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2430#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2431#[serde(deny_unknown_fields)]
2432pub struct HnswIndexConfig {
2433    #[serde(skip_serializing_if = "Option::is_none")]
2434    pub ef_construction: Option<usize>,
2435    #[serde(skip_serializing_if = "Option::is_none")]
2436    pub max_neighbors: Option<usize>,
2437    #[serde(skip_serializing_if = "Option::is_none")]
2438    pub ef_search: Option<usize>,
2439    #[serde(skip_serializing_if = "Option::is_none")]
2440    pub num_threads: Option<usize>,
2441    #[serde(skip_serializing_if = "Option::is_none")]
2442    #[validate(range(min = 2))]
2443    pub batch_size: Option<usize>,
2444    #[serde(skip_serializing_if = "Option::is_none")]
2445    #[validate(range(min = 2))]
2446    pub sync_threshold: Option<usize>,
2447    #[serde(skip_serializing_if = "Option::is_none")]
2448    pub resize_factor: Option<f64>,
2449}
2450
2451impl HnswIndexConfig {
2452    /// Check if this config has default values
2453    /// None values are considered default (not set by user)
2454    /// Note: We skip num_threads as it's variable based on available_parallelism
2455    pub fn is_default(&self) -> bool {
2456        if let Some(ef_construction) = self.ef_construction {
2457            if ef_construction != default_construction_ef() {
2458                return false;
2459            }
2460        }
2461        if let Some(max_neighbors) = self.max_neighbors {
2462            if max_neighbors != default_m() {
2463                return false;
2464            }
2465        }
2466        if let Some(ef_search) = self.ef_search {
2467            if ef_search != default_search_ef() {
2468                return false;
2469            }
2470        }
2471        if let Some(batch_size) = self.batch_size {
2472            if batch_size != default_batch_size() {
2473                return false;
2474            }
2475        }
2476        if let Some(sync_threshold) = self.sync_threshold {
2477            if sync_threshold != default_sync_threshold() {
2478                return false;
2479            }
2480        }
2481        if let Some(resize_factor) = self.resize_factor {
2482            if resize_factor != default_resize_factor() {
2483                return false;
2484            }
2485        }
2486        // Skip num_threads check as it's system-dependent
2487        true
2488    }
2489}
2490
2491/// Configuration for SPANN vector index algorithm parameters
2492#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2493#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2494#[serde(deny_unknown_fields)]
2495pub struct SpannIndexConfig {
2496    #[serde(skip_serializing_if = "Option::is_none")]
2497    #[validate(range(max = 128))]
2498    pub search_nprobe: Option<u32>,
2499    #[serde(skip_serializing_if = "Option::is_none")]
2500    #[validate(range(min = 1.0, max = 1.0))]
2501    pub search_rng_factor: Option<f32>,
2502    #[serde(skip_serializing_if = "Option::is_none")]
2503    #[validate(range(min = 5.0, max = 10.0))]
2504    pub search_rng_epsilon: Option<f32>,
2505    #[serde(skip_serializing_if = "Option::is_none")]
2506    #[validate(range(max = 8))]
2507    pub nreplica_count: Option<u32>,
2508    #[serde(skip_serializing_if = "Option::is_none")]
2509    #[validate(range(min = 1.0, max = 1.0))]
2510    pub write_rng_factor: Option<f32>,
2511    #[serde(skip_serializing_if = "Option::is_none")]
2512    #[validate(range(min = 5.0, max = 10.0))]
2513    pub write_rng_epsilon: Option<f32>,
2514    #[serde(skip_serializing_if = "Option::is_none")]
2515    #[validate(range(min = 50, max = 200))]
2516    pub split_threshold: Option<u32>,
2517    #[serde(skip_serializing_if = "Option::is_none")]
2518    #[validate(range(max = 1000))]
2519    pub num_samples_kmeans: Option<usize>,
2520    #[serde(skip_serializing_if = "Option::is_none")]
2521    #[validate(range(min = 100.0, max = 100.0))]
2522    pub initial_lambda: Option<f32>,
2523    #[serde(skip_serializing_if = "Option::is_none")]
2524    #[validate(range(max = 64))]
2525    pub reassign_neighbor_count: Option<u32>,
2526    #[serde(skip_serializing_if = "Option::is_none")]
2527    #[validate(range(min = 25, max = 100))]
2528    pub merge_threshold: Option<u32>,
2529    #[serde(skip_serializing_if = "Option::is_none")]
2530    #[validate(range(max = 8))]
2531    pub num_centers_to_merge_to: Option<u32>,
2532    #[serde(skip_serializing_if = "Option::is_none")]
2533    #[validate(range(max = 64))]
2534    pub write_nprobe: Option<u32>,
2535    #[serde(skip_serializing_if = "Option::is_none")]
2536    #[validate(range(max = 200))]
2537    pub ef_construction: Option<usize>,
2538    #[serde(skip_serializing_if = "Option::is_none")]
2539    #[validate(range(max = 200))]
2540    pub ef_search: Option<usize>,
2541    #[serde(skip_serializing_if = "Option::is_none")]
2542    #[validate(range(max = 64))]
2543    pub max_neighbors: Option<usize>,
2544}
2545
2546impl SpannIndexConfig {
2547    /// Check if this config has default values
2548    /// None values are considered default (not set by user)
2549    pub fn is_default(&self) -> bool {
2550        if let Some(search_nprobe) = self.search_nprobe {
2551            if search_nprobe != default_search_nprobe() {
2552                return false;
2553            }
2554        }
2555        if let Some(search_rng_factor) = self.search_rng_factor {
2556            if search_rng_factor != default_search_rng_factor() {
2557                return false;
2558            }
2559        }
2560        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2561            if search_rng_epsilon != default_search_rng_epsilon() {
2562                return false;
2563            }
2564        }
2565        if let Some(nreplica_count) = self.nreplica_count {
2566            if nreplica_count != default_nreplica_count() {
2567                return false;
2568            }
2569        }
2570        if let Some(write_rng_factor) = self.write_rng_factor {
2571            if write_rng_factor != default_write_rng_factor() {
2572                return false;
2573            }
2574        }
2575        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2576            if write_rng_epsilon != default_write_rng_epsilon() {
2577                return false;
2578            }
2579        }
2580        if let Some(split_threshold) = self.split_threshold {
2581            if split_threshold != default_split_threshold() {
2582                return false;
2583            }
2584        }
2585        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2586            if num_samples_kmeans != default_num_samples_kmeans() {
2587                return false;
2588            }
2589        }
2590        if let Some(initial_lambda) = self.initial_lambda {
2591            if initial_lambda != default_initial_lambda() {
2592                return false;
2593            }
2594        }
2595        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2596            if reassign_neighbor_count != default_reassign_neighbor_count() {
2597                return false;
2598            }
2599        }
2600        if let Some(merge_threshold) = self.merge_threshold {
2601            if merge_threshold != default_merge_threshold() {
2602                return false;
2603            }
2604        }
2605        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2606            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2607                return false;
2608            }
2609        }
2610        if let Some(write_nprobe) = self.write_nprobe {
2611            if write_nprobe != default_write_nprobe() {
2612                return false;
2613            }
2614        }
2615        if let Some(ef_construction) = self.ef_construction {
2616            if ef_construction != default_construction_ef_spann() {
2617                return false;
2618            }
2619        }
2620        if let Some(ef_search) = self.ef_search {
2621            if ef_search != default_search_ef_spann() {
2622                return false;
2623            }
2624        }
2625        if let Some(max_neighbors) = self.max_neighbors {
2626            if max_neighbors != default_m_spann() {
2627                return false;
2628            }
2629        }
2630        true
2631    }
2632}
2633
2634#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2635#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2636#[serde(deny_unknown_fields)]
2637pub struct SparseVectorIndexConfig {
2638    /// Embedding function configuration
2639    #[serde(skip_serializing_if = "Option::is_none")]
2640    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2641    /// Key to source the sparse vector from
2642    #[serde(skip_serializing_if = "Option::is_none")]
2643    pub source_key: Option<String>,
2644    /// Whether this embedding is BM25
2645    #[serde(skip_serializing_if = "Option::is_none")]
2646    pub bm25: Option<bool>,
2647}
2648
2649#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2650#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2651#[serde(deny_unknown_fields)]
2652pub struct FtsIndexConfig {
2653    // FTS index typically has no additional parameters
2654}
2655
2656#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2657#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2658#[serde(deny_unknown_fields)]
2659pub struct StringInvertedIndexConfig {
2660    // String inverted index typically has no additional parameters
2661}
2662
2663#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2664#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2665#[serde(deny_unknown_fields)]
2666pub struct IntInvertedIndexConfig {
2667    // Integer inverted index typically has no additional parameters
2668}
2669
2670#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2671#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2672#[serde(deny_unknown_fields)]
2673pub struct FloatInvertedIndexConfig {
2674    // Float inverted index typically has no additional parameters
2675}
2676
2677#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2678#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2679#[serde(deny_unknown_fields)]
2680pub struct BoolInvertedIndexConfig {
2681    // Boolean inverted index typically has no additional parameters
2682}
2683
2684// ============================================================================
2685// BUILDER PATTERN SUPPORT
2686// ============================================================================
2687
2688/// Union type for all index configurations (used by builder pattern)
2689#[derive(Clone, Debug)]
2690pub enum IndexConfig {
2691    Vector(VectorIndexConfig),
2692    SparseVector(SparseVectorIndexConfig),
2693    Fts(FtsIndexConfig),
2694    StringInverted(StringInvertedIndexConfig),
2695    IntInverted(IntInvertedIndexConfig),
2696    FloatInverted(FloatInvertedIndexConfig),
2697    BoolInverted(BoolInvertedIndexConfig),
2698}
2699
2700// Convenience From implementations for ergonomic usage
2701impl From<VectorIndexConfig> for IndexConfig {
2702    fn from(config: VectorIndexConfig) -> Self {
2703        IndexConfig::Vector(config)
2704    }
2705}
2706
2707impl From<SparseVectorIndexConfig> for IndexConfig {
2708    fn from(config: SparseVectorIndexConfig) -> Self {
2709        IndexConfig::SparseVector(config)
2710    }
2711}
2712
2713impl From<FtsIndexConfig> for IndexConfig {
2714    fn from(config: FtsIndexConfig) -> Self {
2715        IndexConfig::Fts(config)
2716    }
2717}
2718
2719impl From<StringInvertedIndexConfig> for IndexConfig {
2720    fn from(config: StringInvertedIndexConfig) -> Self {
2721        IndexConfig::StringInverted(config)
2722    }
2723}
2724
2725impl From<IntInvertedIndexConfig> for IndexConfig {
2726    fn from(config: IntInvertedIndexConfig) -> Self {
2727        IndexConfig::IntInverted(config)
2728    }
2729}
2730
2731impl From<FloatInvertedIndexConfig> for IndexConfig {
2732    fn from(config: FloatInvertedIndexConfig) -> Self {
2733        IndexConfig::FloatInverted(config)
2734    }
2735}
2736
2737impl From<BoolInvertedIndexConfig> for IndexConfig {
2738    fn from(config: BoolInvertedIndexConfig) -> Self {
2739        IndexConfig::BoolInverted(config)
2740    }
2741}
2742
2743impl TryFrom<&InternalCollectionConfiguration> for Schema {
2744    type Error = SchemaError;
2745
2746    fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2747        // Start with a default schema structure
2748        let mut schema = match &config.vector_index {
2749            VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2750            VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2751        };
2752        // Convert vector index configuration
2753        let vector_config = match &config.vector_index {
2754            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2755                space: Some(hnsw_config.space.clone()),
2756                embedding_function: config.embedding_function.clone(),
2757                source_key: None,
2758                hnsw: Some(HnswIndexConfig {
2759                    ef_construction: Some(hnsw_config.ef_construction),
2760                    max_neighbors: Some(hnsw_config.max_neighbors),
2761                    ef_search: Some(hnsw_config.ef_search),
2762                    num_threads: Some(hnsw_config.num_threads),
2763                    batch_size: Some(hnsw_config.batch_size),
2764                    sync_threshold: Some(hnsw_config.sync_threshold),
2765                    resize_factor: Some(hnsw_config.resize_factor),
2766                }),
2767                spann: None,
2768            },
2769            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
2770                space: Some(spann_config.space.clone()),
2771                embedding_function: config.embedding_function.clone(),
2772                source_key: None,
2773                hnsw: None,
2774                spann: Some(SpannIndexConfig {
2775                    search_nprobe: Some(spann_config.search_nprobe),
2776                    search_rng_factor: Some(spann_config.search_rng_factor),
2777                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
2778                    nreplica_count: Some(spann_config.nreplica_count),
2779                    write_rng_factor: Some(spann_config.write_rng_factor),
2780                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
2781                    split_threshold: Some(spann_config.split_threshold),
2782                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
2783                    initial_lambda: Some(spann_config.initial_lambda),
2784                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
2785                    merge_threshold: Some(spann_config.merge_threshold),
2786                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
2787                    write_nprobe: Some(spann_config.write_nprobe),
2788                    ef_construction: Some(spann_config.ef_construction),
2789                    ef_search: Some(spann_config.ef_search),
2790                    max_neighbors: Some(spann_config.max_neighbors),
2791                }),
2792            },
2793        };
2794
2795        // Update defaults (keep enabled=false, just update the config)
2796        // This serves as the template for any new float_list fields
2797        if let Some(float_list) = &mut schema.defaults.float_list {
2798            if let Some(vector_index) = &mut float_list.vector_index {
2799                vector_index.config = vector_config.clone();
2800            }
2801        }
2802
2803        // Update the vector_index in the existing #embedding key override
2804        // Keep enabled=true (already set by new_default) and update the config
2805        // Set source_key to DOCUMENT_KEY for the embedding key
2806        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
2807            if let Some(float_list) = &mut embedding_types.float_list {
2808                if let Some(vector_index) = &mut float_list.vector_index {
2809                    let mut vector_config = vector_config;
2810                    vector_config.source_key = Some(DOCUMENT_KEY.to_string());
2811                    vector_index.config = vector_config;
2812                }
2813            }
2814        }
2815
2816        Ok(schema)
2817    }
2818}
2819
2820#[cfg(test)]
2821mod tests {
2822    use super::*;
2823    use crate::hnsw_configuration::Space;
2824    use crate::metadata::SparseVector;
2825    use crate::{
2826        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2827    };
2828    use serde_json::json;
2829
2830    #[test]
2831    fn test_reconcile_with_defaults_none_user_schema() {
2832        // Test that when no user schema is provided, we get the default schema
2833        let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2834        let expected = Schema::new_default(KnnIndex::Spann);
2835        assert_eq!(result, expected);
2836    }
2837
2838    #[test]
2839    fn test_reconcile_with_defaults_empty_user_schema() {
2840        // Test merging with an empty user schema
2841        let user_schema = Schema {
2842            defaults: ValueTypes::default(),
2843            keys: HashMap::new(),
2844            cmek: None,
2845        };
2846
2847        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2848        let expected = Schema::new_default(KnnIndex::Spann);
2849        assert_eq!(result, expected);
2850    }
2851
2852    #[test]
2853    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2854        // Test that user can override string inverted index enabled state
2855        let mut user_schema = Schema {
2856            defaults: ValueTypes::default(),
2857            keys: HashMap::new(),
2858            cmek: None,
2859        };
2860
2861        user_schema.defaults.string = Some(StringValueType {
2862            string_inverted_index: Some(StringInvertedIndexType {
2863                enabled: false, // Override default (true) to false
2864                config: StringInvertedIndexConfig {},
2865            }),
2866            fts_index: None,
2867        });
2868
2869        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2870
2871        // Check that the user override took precedence
2872        assert!(
2873            !result
2874                .defaults
2875                .string
2876                .as_ref()
2877                .unwrap()
2878                .string_inverted_index
2879                .as_ref()
2880                .unwrap()
2881                .enabled
2882        );
2883        // Check that other defaults are still present
2884        assert!(result.defaults.float.is_some());
2885        assert!(result.defaults.int.is_some());
2886    }
2887
2888    #[test]
2889    fn test_reconcile_with_defaults_user_overrides_vector_config() {
2890        // Test field-level merging for vector configurations
2891        let mut user_schema = Schema {
2892            defaults: ValueTypes::default(),
2893            keys: HashMap::new(),
2894            cmek: None,
2895        };
2896
2897        user_schema.defaults.float_list = Some(FloatListValueType {
2898            vector_index: Some(VectorIndexType {
2899                enabled: true, // Enable vector index (default is false)
2900                config: VectorIndexConfig {
2901                    space: Some(Space::L2),                     // Override default space
2902                    embedding_function: None,                   // Will use default
2903                    source_key: Some("custom_key".to_string()), // Override default
2904                    hnsw: Some(HnswIndexConfig {
2905                        ef_construction: Some(500), // Override default
2906                        max_neighbors: None,        // Will use default
2907                        ef_search: None,            // Will use default
2908                        num_threads: None,
2909                        batch_size: None,
2910                        sync_threshold: None,
2911                        resize_factor: None,
2912                    }),
2913                    spann: None,
2914                },
2915            }),
2916        });
2917
2918        // Use HNSW defaults for this test so we have HNSW config to merge with
2919        let result = {
2920            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2921            let merged_defaults = Schema::merge_value_types(
2922                &default_schema.defaults,
2923                &user_schema.defaults,
2924                KnnIndex::Hnsw,
2925            )
2926            .unwrap();
2927            let mut merged_keys = default_schema.keys.clone();
2928            for (key, user_value_types) in user_schema.keys {
2929                if let Some(default_value_types) = merged_keys.get(&key) {
2930                    let merged_value_types = Schema::merge_value_types(
2931                        default_value_types,
2932                        &user_value_types,
2933                        KnnIndex::Hnsw,
2934                    )
2935                    .unwrap();
2936                    merged_keys.insert(key, merged_value_types);
2937                } else {
2938                    merged_keys.insert(key, user_value_types);
2939                }
2940            }
2941            Schema {
2942                defaults: merged_defaults,
2943                keys: merged_keys,
2944                cmek: None,
2945            }
2946        };
2947
2948        let vector_config = &result
2949            .defaults
2950            .float_list
2951            .as_ref()
2952            .unwrap()
2953            .vector_index
2954            .as_ref()
2955            .unwrap()
2956            .config;
2957
2958        // Check user overrides took precedence
2959        assert_eq!(vector_config.space, Some(Space::L2));
2960        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2961        assert_eq!(
2962            vector_config.hnsw.as_ref().unwrap().ef_construction,
2963            Some(500)
2964        );
2965
2966        // Check defaults were preserved for unspecified fields
2967        assert_eq!(vector_config.embedding_function, None);
2968        // Since user provided HNSW config, the default max_neighbors should be merged in
2969        assert_eq!(
2970            vector_config.hnsw.as_ref().unwrap().max_neighbors,
2971            Some(default_m())
2972        );
2973    }
2974
2975    #[test]
2976    fn test_reconcile_with_defaults_keys() {
2977        // Test that key overrides are properly merged
2978        let mut user_schema = Schema {
2979            defaults: ValueTypes::default(),
2980            keys: HashMap::new(),
2981            cmek: None,
2982        };
2983
2984        // Add a custom key override
2985        let custom_key_types = ValueTypes {
2986            string: Some(StringValueType {
2987                fts_index: Some(FtsIndexType {
2988                    enabled: true,
2989                    config: FtsIndexConfig {},
2990                }),
2991                string_inverted_index: Some(StringInvertedIndexType {
2992                    enabled: false,
2993                    config: StringInvertedIndexConfig {},
2994                }),
2995            }),
2996            ..Default::default()
2997        };
2998        user_schema
2999            .keys
3000            .insert("custom_key".to_string(), custom_key_types);
3001
3002        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3003
3004        // Check that default key overrides are preserved
3005        assert!(result.keys.contains_key(EMBEDDING_KEY));
3006        assert!(result.keys.contains_key(DOCUMENT_KEY));
3007
3008        // Check that user key override was added
3009        assert!(result.keys.contains_key("custom_key"));
3010        let custom_override = result.keys.get("custom_key").unwrap();
3011        assert!(
3012            custom_override
3013                .string
3014                .as_ref()
3015                .unwrap()
3016                .fts_index
3017                .as_ref()
3018                .unwrap()
3019                .enabled
3020        );
3021    }
3022
3023    #[test]
3024    fn test_reconcile_with_defaults_override_existing_key() {
3025        // Test overriding an existing key override (like #embedding)
3026        let mut user_schema = Schema {
3027            defaults: ValueTypes::default(),
3028            keys: HashMap::new(),
3029            cmek: None,
3030        };
3031
3032        // Override the #embedding key with custom settings
3033        let embedding_override = ValueTypes {
3034            float_list: Some(FloatListValueType {
3035                vector_index: Some(VectorIndexType {
3036                    enabled: false, // Override default enabled=true to false
3037                    config: VectorIndexConfig {
3038                        space: Some(Space::Ip), // Override default space
3039                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3040                        source_key: Some("custom_embedding_key".to_string()),
3041                        hnsw: None,
3042                        spann: None,
3043                    },
3044                }),
3045            }),
3046            ..Default::default()
3047        };
3048        user_schema
3049            .keys
3050            .insert(EMBEDDING_KEY.to_string(), embedding_override);
3051
3052        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3053
3054        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3055        let vector_config = &embedding_config
3056            .float_list
3057            .as_ref()
3058            .unwrap()
3059            .vector_index
3060            .as_ref()
3061            .unwrap();
3062
3063        // Check user overrides took precedence
3064        assert!(!vector_config.enabled);
3065        assert_eq!(vector_config.config.space, Some(Space::Ip));
3066        assert_eq!(
3067            vector_config.config.source_key,
3068            Some("custom_embedding_key".to_string())
3069        );
3070    }
3071
3072    #[test]
3073    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3074        let collection_config = InternalCollectionConfiguration {
3075            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3076                space: Space::Cosine,
3077                ef_construction: 128,
3078                ef_search: 96,
3079                max_neighbors: 42,
3080                num_threads: 8,
3081                resize_factor: 1.5,
3082                sync_threshold: 2_000,
3083                batch_size: 256,
3084            }),
3085            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3086                EmbeddingFunctionNewConfiguration {
3087                    name: "custom".to_string(),
3088                    config: json!({"alpha": 1}),
3089                },
3090            )),
3091        };
3092
3093        let schema = Schema::try_from(&collection_config).unwrap();
3094        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3095
3096        assert_eq!(reconstructed, collection_config);
3097    }
3098
3099    #[test]
3100    fn test_convert_schema_to_collection_config_spann_roundtrip() {
3101        let spann_config = InternalSpannConfiguration {
3102            space: Space::Cosine,
3103            search_nprobe: 11,
3104            search_rng_factor: 1.7,
3105            write_nprobe: 5,
3106            nreplica_count: 3,
3107            split_threshold: 150,
3108            merge_threshold: 80,
3109            ef_construction: 120,
3110            ef_search: 90,
3111            max_neighbors: 40,
3112            ..Default::default()
3113        };
3114
3115        let collection_config = InternalCollectionConfiguration {
3116            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3117            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3118                EmbeddingFunctionNewConfiguration {
3119                    name: "custom".to_string(),
3120                    config: json!({"beta": true}),
3121                },
3122            )),
3123        };
3124
3125        let schema = Schema::try_from(&collection_config).unwrap();
3126        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3127
3128        assert_eq!(reconstructed, collection_config);
3129    }
3130
3131    #[test]
3132    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3133        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3134        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3135            if let Some(float_list) = &mut embedding.float_list {
3136                if let Some(vector_index) = &mut float_list.vector_index {
3137                    vector_index.config.spann = Some(SpannIndexConfig {
3138                        search_nprobe: Some(1),
3139                        search_rng_factor: Some(1.0),
3140                        search_rng_epsilon: Some(0.1),
3141                        nreplica_count: Some(1),
3142                        write_rng_factor: Some(1.0),
3143                        write_rng_epsilon: Some(0.1),
3144                        split_threshold: Some(100),
3145                        num_samples_kmeans: Some(10),
3146                        initial_lambda: Some(0.5),
3147                        reassign_neighbor_count: Some(10),
3148                        merge_threshold: Some(50),
3149                        num_centers_to_merge_to: Some(3),
3150                        write_nprobe: Some(1),
3151                        ef_construction: Some(50),
3152                        ef_search: Some(40),
3153                        max_neighbors: Some(20),
3154                    });
3155                }
3156            }
3157        }
3158
3159        let result = InternalCollectionConfiguration::try_from(&schema);
3160        assert!(result.is_err());
3161    }
3162
3163    #[test]
3164    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3165        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3166        let before = schema.clone();
3167        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3168        assert!(!modified);
3169        assert_eq!(schema, before);
3170    }
3171
3172    #[test]
3173    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3174        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3175        assert!(!schema.keys.contains_key("custom_field"));
3176
3177        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3178
3179        assert!(modified);
3180        let entry = schema
3181            .keys
3182            .get("custom_field")
3183            .expect("expected new key override to be inserted");
3184        assert_eq!(entry.boolean, schema.defaults.boolean);
3185        assert!(entry.string.is_none());
3186        assert!(entry.int.is_none());
3187        assert!(entry.float.is_none());
3188        assert!(entry.float_list.is_none());
3189        assert!(entry.sparse_vector.is_none());
3190    }
3191
3192    #[test]
3193    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3194        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3195        let initial_len = schema.keys.len();
3196        schema.keys.insert(
3197            "custom_field".to_string(),
3198            ValueTypes {
3199                string: schema.defaults.string.clone(),
3200                ..Default::default()
3201            },
3202        );
3203
3204        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3205
3206        assert!(modified);
3207        assert_eq!(schema.keys.len(), initial_len + 1);
3208        let entry = schema
3209            .keys
3210            .get("custom_field")
3211            .expect("expected key override to exist after ensure call");
3212        assert!(entry.string.is_some());
3213        assert_eq!(entry.boolean, schema.defaults.boolean);
3214    }
3215
3216    #[test]
3217    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3218        let schema = Schema::new_default(KnnIndex::Spann);
3219        let result = schema.is_knn_key_indexing_enabled(
3220            "custom_sparse",
3221            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3222        );
3223
3224        let err = result.expect_err("expected indexing disabled error");
3225        match err {
3226            FilterValidationError::IndexingDisabled { key, value_type } => {
3227                assert_eq!(key, "custom_sparse");
3228                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3229            }
3230            other => panic!("unexpected error variant: {other:?}"),
3231        }
3232    }
3233
3234    #[test]
3235    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3236        let mut schema = Schema::new_default(KnnIndex::Spann);
3237        schema.keys.insert(
3238            "sparse_enabled".to_string(),
3239            ValueTypes {
3240                sparse_vector: Some(SparseVectorValueType {
3241                    sparse_vector_index: Some(SparseVectorIndexType {
3242                        enabled: true,
3243                        config: SparseVectorIndexConfig {
3244                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3245                            source_key: None,
3246                            bm25: None,
3247                        },
3248                    }),
3249                }),
3250                ..Default::default()
3251            },
3252        );
3253
3254        let result = schema.is_knn_key_indexing_enabled(
3255            "sparse_enabled",
3256            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3257        );
3258
3259        assert!(result.is_ok());
3260    }
3261
3262    #[test]
3263    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3264        let schema = Schema::new_default(KnnIndex::Spann);
3265        let result = schema.is_knn_key_indexing_enabled(
3266            EMBEDDING_KEY,
3267            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3268        );
3269
3270        assert!(result.is_ok());
3271    }
3272
3273    #[test]
3274    fn test_merge_hnsw_configs_field_level() {
3275        // Test field-level merging for HNSW configurations
3276        let default_hnsw = HnswIndexConfig {
3277            ef_construction: Some(200),
3278            max_neighbors: Some(16),
3279            ef_search: Some(10),
3280            num_threads: Some(4),
3281            batch_size: Some(100),
3282            sync_threshold: Some(1000),
3283            resize_factor: Some(1.2),
3284        };
3285
3286        let user_hnsw = HnswIndexConfig {
3287            ef_construction: Some(300), // Override
3288            max_neighbors: None,        // Will use default
3289            ef_search: Some(20),        // Override
3290            num_threads: None,          // Will use default
3291            batch_size: None,           // Will use default
3292            sync_threshold: Some(2000), // Override
3293            resize_factor: None,        // Will use default
3294        };
3295
3296        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3297
3298        // Check user overrides
3299        assert_eq!(result.ef_construction, Some(300));
3300        assert_eq!(result.ef_search, Some(20));
3301        assert_eq!(result.sync_threshold, Some(2000));
3302
3303        // Check defaults preserved
3304        assert_eq!(result.max_neighbors, Some(16));
3305        assert_eq!(result.num_threads, Some(4));
3306        assert_eq!(result.batch_size, Some(100));
3307        assert_eq!(result.resize_factor, Some(1.2));
3308    }
3309
3310    #[test]
3311    fn test_merge_spann_configs_field_level() {
3312        // Test field-level merging for SPANN configurations
3313        let default_spann = SpannIndexConfig {
3314            search_nprobe: Some(10),
3315            search_rng_factor: Some(1.0),  // Must be exactly 1.0
3316            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
3317            nreplica_count: Some(3),
3318            write_rng_factor: Some(1.0),  // Must be exactly 1.0
3319            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
3320            split_threshold: Some(100),   // Must be 50-200
3321            num_samples_kmeans: Some(100),
3322            initial_lambda: Some(100.0), // Must be exactly 100.0
3323            reassign_neighbor_count: Some(50),
3324            merge_threshold: Some(50),        // Must be 25-100
3325            num_centers_to_merge_to: Some(4), // Max is 8
3326            write_nprobe: Some(5),
3327            ef_construction: Some(100),
3328            ef_search: Some(10),
3329            max_neighbors: Some(16),
3330        };
3331
3332        let user_spann = SpannIndexConfig {
3333            search_nprobe: Some(20),       // Override
3334            search_rng_factor: None,       // Will use default
3335            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
3336            nreplica_count: None,          // Will use default
3337            write_rng_factor: None,
3338            write_rng_epsilon: None,
3339            split_threshold: Some(150), // Override (valid: 50-200)
3340            num_samples_kmeans: None,
3341            initial_lambda: None,
3342            reassign_neighbor_count: None,
3343            merge_threshold: None,
3344            num_centers_to_merge_to: None,
3345            write_nprobe: None,
3346            ef_construction: None,
3347            ef_search: None,
3348            max_neighbors: None,
3349        };
3350
3351        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3352
3353        // Check user overrides
3354        assert_eq!(result.search_nprobe, Some(20));
3355        assert_eq!(result.search_rng_epsilon, Some(8.0));
3356        assert_eq!(result.split_threshold, Some(150));
3357
3358        // Check defaults preserved
3359        assert_eq!(result.search_rng_factor, Some(1.0));
3360        assert_eq!(result.nreplica_count, Some(3));
3361        assert_eq!(result.initial_lambda, Some(100.0));
3362    }
3363
3364    #[test]
3365    fn test_spann_index_config_into_internal_configuration() {
3366        let config = SpannIndexConfig {
3367            search_nprobe: Some(33),
3368            search_rng_factor: Some(1.2),
3369            search_rng_epsilon: None,
3370            nreplica_count: None,
3371            write_rng_factor: Some(1.5),
3372            write_rng_epsilon: None,
3373            split_threshold: Some(75),
3374            num_samples_kmeans: None,
3375            initial_lambda: Some(0.9),
3376            reassign_neighbor_count: Some(40),
3377            merge_threshold: None,
3378            num_centers_to_merge_to: Some(4),
3379            write_nprobe: Some(60),
3380            ef_construction: Some(180),
3381            ef_search: Some(170),
3382            max_neighbors: Some(32),
3383        };
3384
3385        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3386        assert_eq!(with_space.space, Space::Cosine);
3387        assert_eq!(with_space.search_nprobe, 33);
3388        assert_eq!(with_space.search_rng_factor, 1.2);
3389        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3390        assert_eq!(with_space.write_rng_factor, 1.5);
3391        assert_eq!(with_space.write_nprobe, 60);
3392        assert_eq!(with_space.ef_construction, 180);
3393        assert_eq!(with_space.ef_search, 170);
3394        assert_eq!(with_space.max_neighbors, 32);
3395        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3396
3397        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3398        assert_eq!(default_space_config.space, default_space());
3399    }
3400
3401    #[test]
3402    fn test_merge_string_type_combinations() {
3403        // Test all combinations of default and user StringValueType
3404
3405        // Both Some - should merge
3406        let default = StringValueType {
3407            string_inverted_index: Some(StringInvertedIndexType {
3408                enabled: true,
3409                config: StringInvertedIndexConfig {},
3410            }),
3411            fts_index: Some(FtsIndexType {
3412                enabled: false,
3413                config: FtsIndexConfig {},
3414            }),
3415        };
3416
3417        let user = StringValueType {
3418            string_inverted_index: Some(StringInvertedIndexType {
3419                enabled: false, // Override
3420                config: StringInvertedIndexConfig {},
3421            }),
3422            fts_index: None, // Will use default
3423        };
3424
3425        let result = Schema::merge_string_type(Some(&default), Some(&user))
3426            .unwrap()
3427            .unwrap();
3428        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3429        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3430
3431        // Default Some, User None - should return default
3432        let result = Schema::merge_string_type(Some(&default), None)
3433            .unwrap()
3434            .unwrap();
3435        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3436
3437        // Default None, User Some - should return user
3438        let result = Schema::merge_string_type(None, Some(&user))
3439            .unwrap()
3440            .unwrap();
3441        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3442
3443        // Both None - should return None
3444        let result = Schema::merge_string_type(None, None).unwrap();
3445        assert!(result.is_none());
3446    }
3447
3448    #[test]
3449    fn test_merge_vector_index_config_comprehensive() {
3450        // Test comprehensive vector index config merging
3451        let default_config = VectorIndexConfig {
3452            space: Some(Space::Cosine),
3453            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3454            source_key: Some("default_key".to_string()),
3455            hnsw: Some(HnswIndexConfig {
3456                ef_construction: Some(200),
3457                max_neighbors: Some(16),
3458                ef_search: Some(10),
3459                num_threads: Some(4),
3460                batch_size: Some(100),
3461                sync_threshold: Some(1000),
3462                resize_factor: Some(1.2),
3463            }),
3464            spann: None,
3465        };
3466
3467        let user_config = VectorIndexConfig {
3468            space: Some(Space::L2),                   // Override
3469            embedding_function: None,                 // Will use default
3470            source_key: Some("user_key".to_string()), // Override
3471            hnsw: Some(HnswIndexConfig {
3472                ef_construction: Some(300), // Override
3473                max_neighbors: None,        // Will use default
3474                ef_search: None,            // Will use default
3475                num_threads: None,
3476                batch_size: None,
3477                sync_threshold: None,
3478                resize_factor: None,
3479            }),
3480            spann: Some(SpannIndexConfig {
3481                search_nprobe: Some(15),
3482                search_rng_factor: None,
3483                search_rng_epsilon: None,
3484                nreplica_count: None,
3485                write_rng_factor: None,
3486                write_rng_epsilon: None,
3487                split_threshold: None,
3488                num_samples_kmeans: None,
3489                initial_lambda: None,
3490                reassign_neighbor_count: None,
3491                merge_threshold: None,
3492                num_centers_to_merge_to: None,
3493                write_nprobe: None,
3494                ef_construction: None,
3495                ef_search: None,
3496                max_neighbors: None,
3497            }), // Add SPANN config
3498        };
3499
3500        let result =
3501            Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3502
3503        // Check field-level merging
3504        assert_eq!(result.space, Some(Space::L2)); // User override
3505        assert_eq!(
3506            result.embedding_function,
3507            Some(EmbeddingFunctionConfiguration::Legacy)
3508        ); // Default preserved
3509        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3510
3511        // Check HNSW merging
3512        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3513        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3514
3515        // Check SPANN is not present, since merging in the context of HNSW
3516        assert!(result.spann.is_none());
3517    }
3518
3519    #[test]
3520    fn test_merge_sparse_vector_index_config() {
3521        // Test sparse vector index config merging
3522        let default_config = SparseVectorIndexConfig {
3523            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3524            source_key: Some("default_sparse_key".to_string()),
3525            bm25: None,
3526        };
3527
3528        let user_config = SparseVectorIndexConfig {
3529            embedding_function: None,                        // Will use default
3530            source_key: Some("user_sparse_key".to_string()), // Override
3531            bm25: None,
3532        };
3533
3534        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3535
3536        // Check user override
3537        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3538        // Check default preserved
3539        assert_eq!(
3540            result.embedding_function,
3541            Some(EmbeddingFunctionConfiguration::Legacy)
3542        );
3543    }
3544
3545    #[test]
3546    fn test_complex_nested_merging_scenario() {
3547        // Test a complex scenario with multiple levels of merging
3548        let mut user_schema = Schema {
3549            defaults: ValueTypes::default(),
3550            keys: HashMap::new(),
3551            cmek: None,
3552        };
3553
3554        // Set up complex user defaults
3555        user_schema.defaults.string = Some(StringValueType {
3556            string_inverted_index: Some(StringInvertedIndexType {
3557                enabled: false,
3558                config: StringInvertedIndexConfig {},
3559            }),
3560            fts_index: Some(FtsIndexType {
3561                enabled: true,
3562                config: FtsIndexConfig {},
3563            }),
3564        });
3565
3566        user_schema.defaults.float_list = Some(FloatListValueType {
3567            vector_index: Some(VectorIndexType {
3568                enabled: true,
3569                config: VectorIndexConfig {
3570                    space: Some(Space::Ip),
3571                    embedding_function: None, // Will use default
3572                    source_key: Some("custom_vector_key".to_string()),
3573                    hnsw: Some(HnswIndexConfig {
3574                        ef_construction: Some(400),
3575                        max_neighbors: Some(32),
3576                        ef_search: None, // Will use default
3577                        num_threads: None,
3578                        batch_size: None,
3579                        sync_threshold: None,
3580                        resize_factor: None,
3581                    }),
3582                    spann: None,
3583                },
3584            }),
3585        });
3586
3587        // Set up key overrides
3588        let custom_key_override = ValueTypes {
3589            string: Some(StringValueType {
3590                fts_index: Some(FtsIndexType {
3591                    enabled: true,
3592                    config: FtsIndexConfig {},
3593                }),
3594                string_inverted_index: None,
3595            }),
3596            ..Default::default()
3597        };
3598        user_schema
3599            .keys
3600            .insert("custom_field".to_string(), custom_key_override);
3601
3602        // Use HNSW defaults for this test so we have HNSW config to merge with
3603        let result = {
3604            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3605            let merged_defaults = Schema::merge_value_types(
3606                &default_schema.defaults,
3607                &user_schema.defaults,
3608                KnnIndex::Hnsw,
3609            )
3610            .unwrap();
3611            let mut merged_keys = default_schema.keys.clone();
3612            for (key, user_value_types) in user_schema.keys {
3613                if let Some(default_value_types) = merged_keys.get(&key) {
3614                    let merged_value_types = Schema::merge_value_types(
3615                        default_value_types,
3616                        &user_value_types,
3617                        KnnIndex::Hnsw,
3618                    )
3619                    .unwrap();
3620                    merged_keys.insert(key, merged_value_types);
3621                } else {
3622                    merged_keys.insert(key, user_value_types);
3623                }
3624            }
3625            Schema {
3626                defaults: merged_defaults,
3627                keys: merged_keys,
3628                cmek: None,
3629            }
3630        };
3631
3632        // Verify complex merging worked correctly
3633
3634        // Check defaults merging
3635        assert!(
3636            !result
3637                .defaults
3638                .string
3639                .as_ref()
3640                .unwrap()
3641                .string_inverted_index
3642                .as_ref()
3643                .unwrap()
3644                .enabled
3645        );
3646        assert!(
3647            result
3648                .defaults
3649                .string
3650                .as_ref()
3651                .unwrap()
3652                .fts_index
3653                .as_ref()
3654                .unwrap()
3655                .enabled
3656        );
3657
3658        let vector_config = &result
3659            .defaults
3660            .float_list
3661            .as_ref()
3662            .unwrap()
3663            .vector_index
3664            .as_ref()
3665            .unwrap()
3666            .config;
3667        assert_eq!(vector_config.space, Some(Space::Ip));
3668        assert_eq!(vector_config.embedding_function, None); // Default preserved
3669        assert_eq!(
3670            vector_config.source_key,
3671            Some("custom_vector_key".to_string())
3672        );
3673        assert_eq!(
3674            vector_config.hnsw.as_ref().unwrap().ef_construction,
3675            Some(400)
3676        );
3677        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3678        assert_eq!(
3679            vector_config.hnsw.as_ref().unwrap().ef_search,
3680            Some(default_search_ef())
3681        ); // Default preserved
3682
3683        // Check key overrides
3684        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
3685        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
3686        assert!(result.keys.contains_key("custom_field")); // User added
3687
3688        let custom_override = result.keys.get("custom_field").unwrap();
3689        assert!(
3690            custom_override
3691                .string
3692                .as_ref()
3693                .unwrap()
3694                .fts_index
3695                .as_ref()
3696                .unwrap()
3697                .enabled
3698        );
3699        assert!(custom_override
3700            .string
3701            .as_ref()
3702            .unwrap()
3703            .string_inverted_index
3704            .is_none());
3705    }
3706
3707    #[test]
3708    fn test_reconcile_with_collection_config_default_config() {
3709        // Test that when collection config is default, schema is returned as-is
3710        let collection_config = InternalCollectionConfiguration::default_hnsw();
3711        let schema = Schema::try_from(&collection_config).unwrap();
3712
3713        let result =
3714            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3715                .unwrap();
3716        assert_eq!(result, schema);
3717    }
3718
3719    // Test all 8 cases of double default scenarios
3720    #[test]
3721    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
3722        let collection_config = InternalCollectionConfiguration::default_hnsw();
3723        let schema = Schema::new_default(KnnIndex::Hnsw);
3724        let result =
3725            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3726                .unwrap();
3727
3728        // Should create new schema with default_knn_index (Hnsw)
3729        assert!(result.defaults.float_list.is_some());
3730        assert!(result
3731            .defaults
3732            .float_list
3733            .as_ref()
3734            .unwrap()
3735            .vector_index
3736            .as_ref()
3737            .unwrap()
3738            .config
3739            .hnsw
3740            .is_some());
3741        assert!(result
3742            .defaults
3743            .float_list
3744            .as_ref()
3745            .unwrap()
3746            .vector_index
3747            .as_ref()
3748            .unwrap()
3749            .config
3750            .spann
3751            .is_none());
3752    }
3753
3754    #[test]
3755    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
3756        let collection_config = InternalCollectionConfiguration::default_hnsw();
3757        let schema = Schema::new_default(KnnIndex::Hnsw);
3758        let result =
3759            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3760                .unwrap();
3761
3762        // Should create new schema with default_knn_index (Spann)
3763        assert!(result.defaults.float_list.is_some());
3764        assert!(result
3765            .defaults
3766            .float_list
3767            .as_ref()
3768            .unwrap()
3769            .vector_index
3770            .as_ref()
3771            .unwrap()
3772            .config
3773            .spann
3774            .is_some());
3775        assert!(result
3776            .defaults
3777            .float_list
3778            .as_ref()
3779            .unwrap()
3780            .vector_index
3781            .as_ref()
3782            .unwrap()
3783            .config
3784            .hnsw
3785            .is_none());
3786    }
3787
3788    #[test]
3789    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
3790        let collection_config = InternalCollectionConfiguration::default_hnsw();
3791        let schema = Schema::new_default(KnnIndex::Spann);
3792        let result =
3793            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3794                .unwrap();
3795
3796        // Should create new schema with default_knn_index (Hnsw)
3797        assert!(result.defaults.float_list.is_some());
3798        assert!(result
3799            .defaults
3800            .float_list
3801            .as_ref()
3802            .unwrap()
3803            .vector_index
3804            .as_ref()
3805            .unwrap()
3806            .config
3807            .hnsw
3808            .is_some());
3809        assert!(result
3810            .defaults
3811            .float_list
3812            .as_ref()
3813            .unwrap()
3814            .vector_index
3815            .as_ref()
3816            .unwrap()
3817            .config
3818            .spann
3819            .is_none());
3820    }
3821
3822    #[test]
3823    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
3824        let collection_config = InternalCollectionConfiguration::default_hnsw();
3825        let schema = Schema::new_default(KnnIndex::Spann);
3826        let result =
3827            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3828                .unwrap();
3829
3830        // Should create new schema with default_knn_index (Spann)
3831        assert!(result.defaults.float_list.is_some());
3832        assert!(result
3833            .defaults
3834            .float_list
3835            .as_ref()
3836            .unwrap()
3837            .vector_index
3838            .as_ref()
3839            .unwrap()
3840            .config
3841            .spann
3842            .is_some());
3843        assert!(result
3844            .defaults
3845            .float_list
3846            .as_ref()
3847            .unwrap()
3848            .vector_index
3849            .as_ref()
3850            .unwrap()
3851            .config
3852            .hnsw
3853            .is_none());
3854    }
3855
3856    #[test]
3857    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
3858        let collection_config = InternalCollectionConfiguration::default_spann();
3859        let schema = Schema::new_default(KnnIndex::Spann);
3860        let result =
3861            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3862                .unwrap();
3863
3864        // Should create new schema with default_knn_index (Hnsw)
3865        assert!(result.defaults.float_list.is_some());
3866        assert!(result
3867            .defaults
3868            .float_list
3869            .as_ref()
3870            .unwrap()
3871            .vector_index
3872            .as_ref()
3873            .unwrap()
3874            .config
3875            .hnsw
3876            .is_some());
3877        assert!(result
3878            .defaults
3879            .float_list
3880            .as_ref()
3881            .unwrap()
3882            .vector_index
3883            .as_ref()
3884            .unwrap()
3885            .config
3886            .spann
3887            .is_none());
3888    }
3889
3890    #[test]
3891    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
3892        let collection_config = InternalCollectionConfiguration::default_spann();
3893        let schema = Schema::new_default(KnnIndex::Spann);
3894        let result =
3895            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3896                .unwrap();
3897
3898        // Should create new schema with default_knn_index (Spann)
3899        assert!(result.defaults.float_list.is_some());
3900        assert!(result
3901            .defaults
3902            .float_list
3903            .as_ref()
3904            .unwrap()
3905            .vector_index
3906            .as_ref()
3907            .unwrap()
3908            .config
3909            .spann
3910            .is_some());
3911        assert!(result
3912            .defaults
3913            .float_list
3914            .as_ref()
3915            .unwrap()
3916            .vector_index
3917            .as_ref()
3918            .unwrap()
3919            .config
3920            .hnsw
3921            .is_none());
3922        // Defaults should have source_key=None
3923        assert_eq!(
3924            result
3925                .defaults
3926                .float_list
3927                .as_ref()
3928                .unwrap()
3929                .vector_index
3930                .as_ref()
3931                .unwrap()
3932                .config
3933                .source_key,
3934            None
3935        );
3936    }
3937
3938    #[test]
3939    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
3940        let collection_config = InternalCollectionConfiguration::default_spann();
3941        let schema = Schema::new_default(KnnIndex::Hnsw);
3942        let result =
3943            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3944                .unwrap();
3945
3946        // Should create new schema with default_knn_index (Hnsw)
3947        assert!(result.defaults.float_list.is_some());
3948        assert!(result
3949            .defaults
3950            .float_list
3951            .as_ref()
3952            .unwrap()
3953            .vector_index
3954            .as_ref()
3955            .unwrap()
3956            .config
3957            .hnsw
3958            .is_some());
3959        assert!(result
3960            .defaults
3961            .float_list
3962            .as_ref()
3963            .unwrap()
3964            .vector_index
3965            .as_ref()
3966            .unwrap()
3967            .config
3968            .spann
3969            .is_none());
3970    }
3971
3972    #[test]
3973    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
3974        let collection_config = InternalCollectionConfiguration::default_spann();
3975        let schema = Schema::new_default(KnnIndex::Hnsw);
3976        let result =
3977            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3978                .unwrap();
3979
3980        // Should create new schema with default_knn_index (Spann)
3981        assert!(result.defaults.float_list.is_some());
3982        assert!(result
3983            .defaults
3984            .float_list
3985            .as_ref()
3986            .unwrap()
3987            .vector_index
3988            .as_ref()
3989            .unwrap()
3990            .config
3991            .spann
3992            .is_some());
3993        assert!(result
3994            .defaults
3995            .float_list
3996            .as_ref()
3997            .unwrap()
3998            .vector_index
3999            .as_ref()
4000            .unwrap()
4001            .config
4002            .hnsw
4003            .is_none());
4004    }
4005
4006    #[test]
4007    fn test_defaults_source_key_not_document() {
4008        // Test that defaults.float_list.vector_index.config.source_key is None, not DOCUMENT_KEY
4009        let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4010        let schema_spann = Schema::new_default(KnnIndex::Spann);
4011
4012        // Check HNSW default schema
4013        let defaults_hnsw = schema_hnsw
4014            .defaults
4015            .float_list
4016            .as_ref()
4017            .unwrap()
4018            .vector_index
4019            .as_ref()
4020            .unwrap();
4021        assert_eq!(defaults_hnsw.config.source_key, None);
4022
4023        // Check Spann default schema
4024        let defaults_spann = schema_spann
4025            .defaults
4026            .float_list
4027            .as_ref()
4028            .unwrap()
4029            .vector_index
4030            .as_ref()
4031            .unwrap();
4032        assert_eq!(defaults_spann.config.source_key, None);
4033
4034        // Test after reconcile with NON-default collection config
4035        // This path calls try_from where our fix is
4036        let collection_config_hnsw = InternalCollectionConfiguration {
4037            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4038                ef_construction: 300,
4039                max_neighbors: 32,
4040                ef_search: 50,
4041                num_threads: 8,
4042                batch_size: 200,
4043                sync_threshold: 2000,
4044                resize_factor: 1.5,
4045                space: Space::L2,
4046            }),
4047            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4048        };
4049        let result_hnsw = Schema::reconcile_with_collection_config(
4050            &schema_hnsw,
4051            &collection_config_hnsw,
4052            KnnIndex::Hnsw,
4053        )
4054        .unwrap();
4055        let reconciled_defaults_hnsw = result_hnsw
4056            .defaults
4057            .float_list
4058            .as_ref()
4059            .unwrap()
4060            .vector_index
4061            .as_ref()
4062            .unwrap();
4063        assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4064
4065        let collection_config_spann = InternalCollectionConfiguration {
4066            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4067                search_nprobe: 20,
4068                search_rng_factor: 3.0,
4069                search_rng_epsilon: 0.2,
4070                nreplica_count: 5,
4071                write_rng_factor: 2.0,
4072                write_rng_epsilon: 0.1,
4073                split_threshold: 2000,
4074                num_samples_kmeans: 200,
4075                initial_lambda: 0.8,
4076                reassign_neighbor_count: 100,
4077                merge_threshold: 800,
4078                num_centers_to_merge_to: 20,
4079                write_nprobe: 10,
4080                ef_construction: 400,
4081                ef_search: 60,
4082                max_neighbors: 24,
4083                space: Space::Cosine,
4084            }),
4085            embedding_function: None,
4086        };
4087        let result_spann = Schema::reconcile_with_collection_config(
4088            &schema_spann,
4089            &collection_config_spann,
4090            KnnIndex::Spann,
4091        )
4092        .unwrap();
4093        let reconciled_defaults_spann = result_spann
4094            .defaults
4095            .float_list
4096            .as_ref()
4097            .unwrap()
4098            .vector_index
4099            .as_ref()
4100            .unwrap();
4101        assert_eq!(reconciled_defaults_spann.config.source_key, None);
4102
4103        // Verify that #embedding key DOES have source_key set to DOCUMENT_KEY
4104        let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4105        let embedding_vector_index_hnsw = embedding_hnsw
4106            .float_list
4107            .as_ref()
4108            .unwrap()
4109            .vector_index
4110            .as_ref()
4111            .unwrap();
4112        assert_eq!(
4113            embedding_vector_index_hnsw.config.source_key,
4114            Some(DOCUMENT_KEY.to_string())
4115        );
4116
4117        let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4118        let embedding_vector_index_spann = embedding_spann
4119            .float_list
4120            .as_ref()
4121            .unwrap()
4122            .vector_index
4123            .as_ref()
4124            .unwrap();
4125        assert_eq!(
4126            embedding_vector_index_spann.config.source_key,
4127            Some(DOCUMENT_KEY.to_string())
4128        );
4129    }
4130
4131    #[test]
4132    fn test_try_from_source_key() {
4133        // Direct test of try_from to verify source_key behavior
4134        // Defaults should have source_key=None, #embedding should have source_key=DOCUMENT_KEY
4135
4136        // Test with HNSW config
4137        let collection_config_hnsw = InternalCollectionConfiguration {
4138            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4139                ef_construction: 300,
4140                max_neighbors: 32,
4141                ef_search: 50,
4142                num_threads: 8,
4143                batch_size: 200,
4144                sync_threshold: 2000,
4145                resize_factor: 1.5,
4146                space: Space::L2,
4147            }),
4148            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4149        };
4150        let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4151
4152        // Check defaults have source_key=None
4153        let defaults_hnsw = schema_hnsw
4154            .defaults
4155            .float_list
4156            .as_ref()
4157            .unwrap()
4158            .vector_index
4159            .as_ref()
4160            .unwrap();
4161        assert_eq!(defaults_hnsw.config.source_key, None);
4162
4163        // Check #embedding has source_key=DOCUMENT_KEY
4164        let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4165        let embedding_vector_index_hnsw = embedding_hnsw
4166            .float_list
4167            .as_ref()
4168            .unwrap()
4169            .vector_index
4170            .as_ref()
4171            .unwrap();
4172        assert_eq!(
4173            embedding_vector_index_hnsw.config.source_key,
4174            Some(DOCUMENT_KEY.to_string())
4175        );
4176
4177        // Test with Spann config
4178        let collection_config_spann = InternalCollectionConfiguration {
4179            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4180                search_nprobe: 20,
4181                search_rng_factor: 3.0,
4182                search_rng_epsilon: 0.2,
4183                nreplica_count: 5,
4184                write_rng_factor: 2.0,
4185                write_rng_epsilon: 0.1,
4186                split_threshold: 2000,
4187                num_samples_kmeans: 200,
4188                initial_lambda: 0.8,
4189                reassign_neighbor_count: 100,
4190                merge_threshold: 800,
4191                num_centers_to_merge_to: 20,
4192                write_nprobe: 10,
4193                ef_construction: 400,
4194                ef_search: 60,
4195                max_neighbors: 24,
4196                space: Space::Cosine,
4197            }),
4198            embedding_function: None,
4199        };
4200        let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4201
4202        // Check defaults have source_key=None
4203        let defaults_spann = schema_spann
4204            .defaults
4205            .float_list
4206            .as_ref()
4207            .unwrap()
4208            .vector_index
4209            .as_ref()
4210            .unwrap();
4211        assert_eq!(defaults_spann.config.source_key, None);
4212
4213        // Check #embedding has source_key=DOCUMENT_KEY
4214        let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4215        let embedding_vector_index_spann = embedding_spann
4216            .float_list
4217            .as_ref()
4218            .unwrap()
4219            .vector_index
4220            .as_ref()
4221            .unwrap();
4222        assert_eq!(
4223            embedding_vector_index_spann.config.source_key,
4224            Some(DOCUMENT_KEY.to_string())
4225        );
4226    }
4227
4228    #[test]
4229    fn test_default_hnsw_with_default_embedding_function() {
4230        // Test that when InternalCollectionConfiguration is default HNSW but has
4231        // an embedding function with name "default" and config as {}, it still
4232        // goes through the double default path and preserves source_key behavior
4233        use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4234
4235        let collection_config = InternalCollectionConfiguration {
4236            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4237            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4238                EmbeddingFunctionNewConfiguration {
4239                    name: "default".to_string(),
4240                    config: serde_json::json!({}),
4241                },
4242            )),
4243        };
4244
4245        // Verify it's still considered default
4246        assert!(collection_config.is_default());
4247
4248        let schema = Schema::new_default(KnnIndex::Hnsw);
4249        let result =
4250            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4251                .unwrap();
4252
4253        // Check that defaults have source_key=None
4254        let defaults = result
4255            .defaults
4256            .float_list
4257            .as_ref()
4258            .unwrap()
4259            .vector_index
4260            .as_ref()
4261            .unwrap();
4262        assert_eq!(defaults.config.source_key, None);
4263
4264        // Check that #embedding has source_key=DOCUMENT_KEY
4265        let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4266        let embedding_vector_index = embedding
4267            .float_list
4268            .as_ref()
4269            .unwrap()
4270            .vector_index
4271            .as_ref()
4272            .unwrap();
4273        assert_eq!(
4274            embedding_vector_index.config.source_key,
4275            Some(DOCUMENT_KEY.to_string())
4276        );
4277
4278        // verify vector index config is set to spann
4279        let vector_index_config = defaults.config.clone();
4280        assert!(vector_index_config.spann.is_some());
4281        assert!(vector_index_config.hnsw.is_none());
4282
4283        // Verify embedding function was set correctly
4284        assert_eq!(
4285            embedding_vector_index.config.embedding_function,
4286            Some(EmbeddingFunctionConfiguration::Known(
4287                EmbeddingFunctionNewConfiguration {
4288                    name: "default".to_string(),
4289                    config: serde_json::json!({}),
4290                },
4291            ))
4292        );
4293        assert_eq!(
4294            defaults.config.embedding_function,
4295            Some(EmbeddingFunctionConfiguration::Known(
4296                EmbeddingFunctionNewConfiguration {
4297                    name: "default".to_string(),
4298                    config: serde_json::json!({}),
4299                },
4300            ))
4301        );
4302    }
4303
4304    #[test]
4305    fn test_reconcile_with_collection_config_both_non_default() {
4306        // Test that when both schema and collection config are non-default, it returns an error
4307        let mut schema = Schema::new_default(KnnIndex::Hnsw);
4308        schema.defaults.string = Some(StringValueType {
4309            fts_index: Some(FtsIndexType {
4310                enabled: true,
4311                config: FtsIndexConfig {},
4312            }),
4313            string_inverted_index: None,
4314        });
4315
4316        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4317        // Make collection config non-default by changing a parameter
4318        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4319        {
4320            hnsw_config.ef_construction = 500; // Non-default value
4321        }
4322
4323        // Use reconcile_schema_and_config which has the early validation
4324        let result = Schema::reconcile_schema_and_config(
4325            Some(&schema),
4326            Some(&collection_config),
4327            KnnIndex::Spann,
4328        );
4329        assert!(result.is_err());
4330        assert!(matches!(
4331            result.unwrap_err(),
4332            SchemaError::ConfigAndSchemaConflict
4333        ));
4334    }
4335
4336    #[test]
4337    fn test_reconcile_with_collection_config_hnsw_override() {
4338        // Test that non-default HNSW collection config overrides default schema
4339        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
4340
4341        let collection_config = InternalCollectionConfiguration {
4342            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4343                ef_construction: 300,
4344                max_neighbors: 32,
4345                ef_search: 50,
4346                num_threads: 8,
4347                batch_size: 200,
4348                sync_threshold: 2000,
4349                resize_factor: 1.5,
4350                space: Space::L2,
4351            }),
4352            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4353        };
4354
4355        let result =
4356            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4357                .unwrap();
4358
4359        // Check that #embedding key override was created with the collection config settings
4360        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4361        let vector_index = embedding_override
4362            .float_list
4363            .as_ref()
4364            .unwrap()
4365            .vector_index
4366            .as_ref()
4367            .unwrap();
4368
4369        assert!(vector_index.enabled);
4370        assert_eq!(vector_index.config.space, Some(Space::L2));
4371        assert_eq!(
4372            vector_index.config.embedding_function,
4373            Some(EmbeddingFunctionConfiguration::Legacy)
4374        );
4375        assert_eq!(
4376            vector_index.config.source_key,
4377            Some(DOCUMENT_KEY.to_string())
4378        );
4379
4380        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4381        assert_eq!(hnsw_config.ef_construction, Some(300));
4382        assert_eq!(hnsw_config.max_neighbors, Some(32));
4383        assert_eq!(hnsw_config.ef_search, Some(50));
4384        assert_eq!(hnsw_config.num_threads, Some(8));
4385        assert_eq!(hnsw_config.batch_size, Some(200));
4386        assert_eq!(hnsw_config.sync_threshold, Some(2000));
4387        assert_eq!(hnsw_config.resize_factor, Some(1.5));
4388
4389        assert!(vector_index.config.spann.is_none());
4390    }
4391
4392    #[test]
4393    fn test_reconcile_with_collection_config_spann_override() {
4394        // Test that non-default SPANN collection config overrides default schema
4395        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
4396
4397        let collection_config = InternalCollectionConfiguration {
4398            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4399                search_nprobe: 20,
4400                search_rng_factor: 3.0,
4401                search_rng_epsilon: 0.2,
4402                nreplica_count: 5,
4403                write_rng_factor: 2.0,
4404                write_rng_epsilon: 0.1,
4405                split_threshold: 2000,
4406                num_samples_kmeans: 200,
4407                initial_lambda: 0.8,
4408                reassign_neighbor_count: 100,
4409                merge_threshold: 800,
4410                num_centers_to_merge_to: 20,
4411                write_nprobe: 10,
4412                ef_construction: 400,
4413                ef_search: 60,
4414                max_neighbors: 24,
4415                space: Space::Cosine,
4416            }),
4417            embedding_function: None,
4418        };
4419
4420        let result =
4421            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4422                .unwrap();
4423
4424        // Check that #embedding key override was created with the collection config settings
4425        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4426        let vector_index = embedding_override
4427            .float_list
4428            .as_ref()
4429            .unwrap()
4430            .vector_index
4431            .as_ref()
4432            .unwrap();
4433
4434        assert!(vector_index.enabled);
4435        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4436        assert_eq!(vector_index.config.embedding_function, None);
4437        assert_eq!(
4438            vector_index.config.source_key,
4439            Some(DOCUMENT_KEY.to_string())
4440        );
4441
4442        assert!(vector_index.config.hnsw.is_none());
4443
4444        let spann_config = vector_index.config.spann.as_ref().unwrap();
4445        assert_eq!(spann_config.search_nprobe, Some(20));
4446        assert_eq!(spann_config.search_rng_factor, Some(3.0));
4447        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4448        assert_eq!(spann_config.nreplica_count, Some(5));
4449        assert_eq!(spann_config.write_rng_factor, Some(2.0));
4450        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4451        assert_eq!(spann_config.split_threshold, Some(2000));
4452        assert_eq!(spann_config.num_samples_kmeans, Some(200));
4453        assert_eq!(spann_config.initial_lambda, Some(0.8));
4454        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4455        assert_eq!(spann_config.merge_threshold, Some(800));
4456        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4457        assert_eq!(spann_config.write_nprobe, Some(10));
4458        assert_eq!(spann_config.ef_construction, Some(400));
4459        assert_eq!(spann_config.ef_search, Some(60));
4460        assert_eq!(spann_config.max_neighbors, Some(24));
4461    }
4462
4463    #[test]
4464    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4465        // Test that collection config updates BOTH defaults.float_list.vector_index
4466        // AND keys["embedding"].float_list.vector_index
4467        let schema = Schema::new_default(KnnIndex::Hnsw);
4468
4469        let collection_config = InternalCollectionConfiguration {
4470            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4471                ef_construction: 300,
4472                max_neighbors: 32,
4473                ef_search: 50,
4474                num_threads: 8,
4475                batch_size: 200,
4476                sync_threshold: 2000,
4477                resize_factor: 1.5,
4478                space: Space::L2,
4479            }),
4480            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4481        };
4482
4483        let result =
4484            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4485                .unwrap();
4486
4487        // Check that defaults.float_list.vector_index was updated
4488        let defaults_vector_index = result
4489            .defaults
4490            .float_list
4491            .as_ref()
4492            .unwrap()
4493            .vector_index
4494            .as_ref()
4495            .unwrap();
4496
4497        // Should be disabled in defaults (template for new keys)
4498        assert!(!defaults_vector_index.enabled);
4499        // But config should be updated
4500        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4501        assert_eq!(
4502            defaults_vector_index.config.embedding_function,
4503            Some(EmbeddingFunctionConfiguration::Legacy)
4504        );
4505        assert_eq!(defaults_vector_index.config.source_key, None);
4506        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4507        assert_eq!(defaults_hnsw.ef_construction, Some(300));
4508        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4509
4510        // Check that #embedding key override was also updated
4511        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4512        let embedding_vector_index = embedding_override
4513            .float_list
4514            .as_ref()
4515            .unwrap()
4516            .vector_index
4517            .as_ref()
4518            .unwrap();
4519
4520        // Should be enabled on #embedding
4521        assert!(embedding_vector_index.enabled);
4522        // Config should match defaults
4523        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4524        assert_eq!(
4525            embedding_vector_index.config.embedding_function,
4526            Some(EmbeddingFunctionConfiguration::Legacy)
4527        );
4528        assert_eq!(
4529            embedding_vector_index.config.source_key,
4530            Some(DOCUMENT_KEY.to_string())
4531        );
4532        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4533        assert_eq!(embedding_hnsw.ef_construction, Some(300));
4534        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4535    }
4536
4537    #[test]
4538    fn test_is_schema_default() {
4539        // Test that actual default schemas are correctly identified
4540        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4541        assert!(default_hnsw_schema.is_default());
4542
4543        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4544        assert!(default_spann_schema.is_default());
4545
4546        // Test that a modified default schema is not considered default
4547        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4548        // Make a clear modification - change the string inverted index enabled state
4549        if let Some(ref mut string_type) = modified_schema.defaults.string {
4550            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4551                string_inverted.enabled = false; // Default is true, so this should make it non-default
4552            }
4553        }
4554        assert!(!modified_schema.is_default());
4555
4556        // Test that schema with additional key overrides is not default
4557        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4558        schema_with_extra_overrides
4559            .keys
4560            .insert("custom_key".to_string(), ValueTypes::default());
4561        assert!(!schema_with_extra_overrides.is_default());
4562    }
4563
4564    #[test]
4565    fn test_is_schema_default_with_space() {
4566        let schema = Schema::new_default(KnnIndex::Hnsw);
4567        assert!(schema.is_default());
4568
4569        let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4570        if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4571            if let Some(ref mut vector_index) = float_list.vector_index {
4572                vector_index.config.space = Some(Space::Cosine);
4573            }
4574        }
4575        assert!(!schema_with_space.is_default());
4576
4577        let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4578        if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4579            .keys
4580            .get_mut(EMBEDDING_KEY)
4581        {
4582            if let Some(ref mut float_list) = embedding_key.float_list {
4583                if let Some(ref mut vector_index) = float_list.vector_index {
4584                    vector_index.config.space = Some(Space::Cosine);
4585                }
4586            }
4587        }
4588        assert!(!schema_with_space_in_embedding_key.is_default());
4589    }
4590
4591    #[test]
4592    fn test_is_schema_default_with_embedding_function() {
4593        let schema = Schema::new_default(KnnIndex::Hnsw);
4594        assert!(schema.is_default());
4595
4596        let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4597        if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4598            if let Some(ref mut vector_index) = float_list.vector_index {
4599                vector_index.config.embedding_function =
4600                    Some(EmbeddingFunctionConfiguration::Legacy);
4601            }
4602        }
4603        assert!(!schema_with_embedding_function.is_default());
4604
4605        let mut schema_with_embedding_function_in_embedding_key =
4606            Schema::new_default(KnnIndex::Spann);
4607        if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4608            .keys
4609            .get_mut(EMBEDDING_KEY)
4610        {
4611            if let Some(ref mut float_list) = embedding_key.float_list {
4612                if let Some(ref mut vector_index) = float_list.vector_index {
4613                    vector_index.config.embedding_function =
4614                        Some(EmbeddingFunctionConfiguration::Legacy);
4615                }
4616            }
4617        }
4618        assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4619    }
4620
4621    #[test]
4622    fn test_add_merges_keys_by_value_type() {
4623        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4624        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4625
4626        let string_override = ValueTypes {
4627            string: Some(StringValueType {
4628                string_inverted_index: Some(StringInvertedIndexType {
4629                    enabled: true,
4630                    config: StringInvertedIndexConfig {},
4631                }),
4632                fts_index: None,
4633            }),
4634            ..Default::default()
4635        };
4636        schema_a
4637            .keys
4638            .insert("custom_field".to_string(), string_override);
4639
4640        let float_override = ValueTypes {
4641            float: Some(FloatValueType {
4642                float_inverted_index: Some(FloatInvertedIndexType {
4643                    enabled: true,
4644                    config: FloatInvertedIndexConfig {},
4645                }),
4646            }),
4647            ..Default::default()
4648        };
4649        schema_b
4650            .keys
4651            .insert("custom_field".to_string(), float_override);
4652
4653        let merged = schema_a.merge(&schema_b).unwrap();
4654        let merged_override = merged.keys.get("custom_field").unwrap();
4655
4656        assert!(merged_override.string.is_some());
4657        assert!(merged_override.float.is_some());
4658        assert!(
4659            merged_override
4660                .string
4661                .as_ref()
4662                .unwrap()
4663                .string_inverted_index
4664                .as_ref()
4665                .unwrap()
4666                .enabled
4667        );
4668        assert!(
4669            merged_override
4670                .float
4671                .as_ref()
4672                .unwrap()
4673                .float_inverted_index
4674                .as_ref()
4675                .unwrap()
4676                .enabled
4677        );
4678    }
4679
4680    #[test]
4681    fn test_add_rejects_different_defaults() {
4682        let schema_a = Schema::new_default(KnnIndex::Hnsw);
4683        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4684
4685        if let Some(string_type) = schema_b.defaults.string.as_mut() {
4686            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
4687                string_index.enabled = false;
4688            }
4689        }
4690
4691        let err = schema_a.merge(&schema_b).unwrap_err();
4692        assert!(matches!(err, SchemaError::DefaultsMismatch));
4693    }
4694
4695    #[test]
4696    fn test_add_detects_conflicting_value_type_configuration() {
4697        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4698        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4699
4700        let string_override_enabled = ValueTypes {
4701            string: Some(StringValueType {
4702                string_inverted_index: Some(StringInvertedIndexType {
4703                    enabled: true,
4704                    config: StringInvertedIndexConfig {},
4705                }),
4706                fts_index: None,
4707            }),
4708            ..Default::default()
4709        };
4710        schema_a
4711            .keys
4712            .insert("custom_field".to_string(), string_override_enabled);
4713
4714        let string_override_disabled = ValueTypes {
4715            string: Some(StringValueType {
4716                string_inverted_index: Some(StringInvertedIndexType {
4717                    enabled: false,
4718                    config: StringInvertedIndexConfig {},
4719                }),
4720                fts_index: None,
4721            }),
4722            ..Default::default()
4723        };
4724        schema_b
4725            .keys
4726            .insert("custom_field".to_string(), string_override_disabled);
4727
4728        let err = schema_a.merge(&schema_b).unwrap_err();
4729        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
4730    }
4731
4732    // TODO(Sanket): Remove this test once deployed
4733    #[test]
4734    fn test_backward_compatibility_aliases() {
4735        // Test that old format with # and $ prefixes and key_overrides can be deserialized
4736        let old_format_json = r###"{
4737            "defaults": {
4738                "#string": {
4739                    "$fts_index": {
4740                        "enabled": true,
4741                        "config": {}
4742                    }
4743                },
4744                "#int": {
4745                    "$int_inverted_index": {
4746                        "enabled": true,
4747                        "config": {}
4748                    }
4749                },
4750                "#float_list": {
4751                    "$vector_index": {
4752                        "enabled": true,
4753                        "config": {
4754                            "spann": {
4755                                "search_nprobe": 10
4756                            }
4757                        }
4758                    }
4759                }
4760            },
4761            "key_overrides": {
4762                "#document": {
4763                    "#string": {
4764                        "$fts_index": {
4765                            "enabled": false,
4766                            "config": {}
4767                        }
4768                    }
4769                }
4770            }
4771        }"###;
4772
4773        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
4774
4775        // Test that new format without prefixes and keys can be deserialized
4776        let new_format_json = r###"{
4777            "defaults": {
4778                "string": {
4779                    "fts_index": {
4780                        "enabled": true,
4781                        "config": {}
4782                    }
4783                },
4784                "int": {
4785                    "int_inverted_index": {
4786                        "enabled": true,
4787                        "config": {}
4788                    }
4789                },
4790                "float_list": {
4791                    "vector_index": {
4792                        "enabled": true,
4793                        "config": {
4794                            "spann": {
4795                                "search_nprobe": 10
4796                            }
4797                        }
4798                    }
4799                }
4800            },
4801            "keys": {
4802                "#document": {
4803                    "string": {
4804                        "fts_index": {
4805                            "enabled": false,
4806                            "config": {}
4807                        }
4808                    }
4809                }
4810            }
4811        }"###;
4812
4813        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4814
4815        // Both should deserialize to the same structure
4816        assert_eq!(schema_from_old, schema_from_new);
4817
4818        // Verify the deserialized content is correct
4819        assert!(schema_from_old.defaults.string.is_some());
4820        assert!(schema_from_old
4821            .defaults
4822            .string
4823            .as_ref()
4824            .unwrap()
4825            .fts_index
4826            .is_some());
4827        assert!(
4828            schema_from_old
4829                .defaults
4830                .string
4831                .as_ref()
4832                .unwrap()
4833                .fts_index
4834                .as_ref()
4835                .unwrap()
4836                .enabled
4837        );
4838
4839        assert!(schema_from_old.defaults.int.is_some());
4840        assert!(schema_from_old
4841            .defaults
4842            .int
4843            .as_ref()
4844            .unwrap()
4845            .int_inverted_index
4846            .is_some());
4847
4848        assert!(schema_from_old.defaults.float_list.is_some());
4849        assert!(schema_from_old
4850            .defaults
4851            .float_list
4852            .as_ref()
4853            .unwrap()
4854            .vector_index
4855            .is_some());
4856
4857        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4858        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4859        assert!(doc_override.string.is_some());
4860        assert!(
4861            !doc_override
4862                .string
4863                .as_ref()
4864                .unwrap()
4865                .fts_index
4866                .as_ref()
4867                .unwrap()
4868                .enabled
4869        );
4870
4871        // Test that serialization always outputs the new format (without prefixes)
4872        let serialized = serde_json::to_string(&schema_from_old).unwrap();
4873
4874        // Should contain new format keys
4875        assert!(serialized.contains(r#""keys":"#));
4876        assert!(serialized.contains(r#""string":"#));
4877        assert!(serialized.contains(r#""fts_index":"#));
4878        assert!(serialized.contains(r#""int_inverted_index":"#));
4879        assert!(serialized.contains(r#""vector_index":"#));
4880
4881        // Should NOT contain old format keys
4882        assert!(!serialized.contains(r#""key_overrides":"#));
4883        assert!(!serialized.contains(r###""#string":"###));
4884        assert!(!serialized.contains(r###""$fts_index":"###));
4885        assert!(!serialized.contains(r###""$int_inverted_index":"###));
4886        assert!(!serialized.contains(r###""$vector_index":"###));
4887    }
4888
4889    #[test]
4890    fn test_hnsw_index_config_validation() {
4891        use validator::Validate;
4892
4893        // Valid configuration - should pass
4894        let valid_config = HnswIndexConfig {
4895            batch_size: Some(10),
4896            sync_threshold: Some(100),
4897            ef_construction: Some(100),
4898            max_neighbors: Some(16),
4899            ..Default::default()
4900        };
4901        assert!(valid_config.validate().is_ok());
4902
4903        // Invalid: batch_size too small (min 2)
4904        let invalid_batch_size = HnswIndexConfig {
4905            batch_size: Some(1),
4906            ..Default::default()
4907        };
4908        assert!(invalid_batch_size.validate().is_err());
4909
4910        // Invalid: sync_threshold too small (min 2)
4911        let invalid_sync_threshold = HnswIndexConfig {
4912            sync_threshold: Some(1),
4913            ..Default::default()
4914        };
4915        assert!(invalid_sync_threshold.validate().is_err());
4916
4917        // Valid: boundary values (exactly 2) should pass
4918        let boundary_config = HnswIndexConfig {
4919            batch_size: Some(2),
4920            sync_threshold: Some(2),
4921            ..Default::default()
4922        };
4923        assert!(boundary_config.validate().is_ok());
4924
4925        // Valid: None values should pass validation
4926        let all_none_config = HnswIndexConfig {
4927            ..Default::default()
4928        };
4929        assert!(all_none_config.validate().is_ok());
4930
4931        // Valid: fields without validation can be any value
4932        let other_fields_config = HnswIndexConfig {
4933            ef_construction: Some(1),
4934            max_neighbors: Some(1),
4935            ef_search: Some(1),
4936            num_threads: Some(1),
4937            resize_factor: Some(0.1),
4938            ..Default::default()
4939        };
4940        assert!(other_fields_config.validate().is_ok());
4941    }
4942
4943    #[test]
4944    fn test_spann_index_config_validation() {
4945        use validator::Validate;
4946
4947        // Valid configuration - should pass
4948        let valid_config = SpannIndexConfig {
4949            write_nprobe: Some(32),
4950            nreplica_count: Some(4),
4951            split_threshold: Some(100),
4952            merge_threshold: Some(50),
4953            reassign_neighbor_count: Some(32),
4954            num_centers_to_merge_to: Some(4),
4955            ef_construction: Some(100),
4956            ef_search: Some(100),
4957            max_neighbors: Some(32),
4958            search_rng_factor: Some(1.0),
4959            write_rng_factor: Some(1.0),
4960            search_rng_epsilon: Some(7.5),
4961            write_rng_epsilon: Some(7.5),
4962            ..Default::default()
4963        };
4964        assert!(valid_config.validate().is_ok());
4965
4966        // Invalid: write_nprobe too large (max 64)
4967        let invalid_write_nprobe = SpannIndexConfig {
4968            write_nprobe: Some(200),
4969            ..Default::default()
4970        };
4971        assert!(invalid_write_nprobe.validate().is_err());
4972
4973        // Invalid: split_threshold too small (min 50)
4974        let invalid_split_threshold = SpannIndexConfig {
4975            split_threshold: Some(10),
4976            ..Default::default()
4977        };
4978        assert!(invalid_split_threshold.validate().is_err());
4979
4980        // Invalid: split_threshold too large (max 200)
4981        let invalid_split_threshold_high = SpannIndexConfig {
4982            split_threshold: Some(250),
4983            ..Default::default()
4984        };
4985        assert!(invalid_split_threshold_high.validate().is_err());
4986
4987        // Invalid: nreplica_count too large (max 8)
4988        let invalid_nreplica = SpannIndexConfig {
4989            nreplica_count: Some(10),
4990            ..Default::default()
4991        };
4992        assert!(invalid_nreplica.validate().is_err());
4993
4994        // Invalid: reassign_neighbor_count too large (max 64)
4995        let invalid_reassign = SpannIndexConfig {
4996            reassign_neighbor_count: Some(100),
4997            ..Default::default()
4998        };
4999        assert!(invalid_reassign.validate().is_err());
5000
5001        // Invalid: merge_threshold out of range (min 25, max 100)
5002        let invalid_merge_threshold_low = SpannIndexConfig {
5003            merge_threshold: Some(5),
5004            ..Default::default()
5005        };
5006        assert!(invalid_merge_threshold_low.validate().is_err());
5007
5008        let invalid_merge_threshold_high = SpannIndexConfig {
5009            merge_threshold: Some(150),
5010            ..Default::default()
5011        };
5012        assert!(invalid_merge_threshold_high.validate().is_err());
5013
5014        // Invalid: num_centers_to_merge_to too large (max 8)
5015        let invalid_num_centers = SpannIndexConfig {
5016            num_centers_to_merge_to: Some(10),
5017            ..Default::default()
5018        };
5019        assert!(invalid_num_centers.validate().is_err());
5020
5021        // Invalid: ef_construction too large (max 200)
5022        let invalid_ef_construction = SpannIndexConfig {
5023            ef_construction: Some(300),
5024            ..Default::default()
5025        };
5026        assert!(invalid_ef_construction.validate().is_err());
5027
5028        // Invalid: ef_search too large (max 200)
5029        let invalid_ef_search = SpannIndexConfig {
5030            ef_search: Some(300),
5031            ..Default::default()
5032        };
5033        assert!(invalid_ef_search.validate().is_err());
5034
5035        // Invalid: max_neighbors too large (max 64)
5036        let invalid_max_neighbors = SpannIndexConfig {
5037            max_neighbors: Some(100),
5038            ..Default::default()
5039        };
5040        assert!(invalid_max_neighbors.validate().is_err());
5041
5042        // Invalid: search_nprobe too large (max 128)
5043        let invalid_search_nprobe = SpannIndexConfig {
5044            search_nprobe: Some(200),
5045            ..Default::default()
5046        };
5047        assert!(invalid_search_nprobe.validate().is_err());
5048
5049        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5050        let invalid_search_rng_factor_low = SpannIndexConfig {
5051            search_rng_factor: Some(0.9),
5052            ..Default::default()
5053        };
5054        assert!(invalid_search_rng_factor_low.validate().is_err());
5055
5056        let invalid_search_rng_factor_high = SpannIndexConfig {
5057            search_rng_factor: Some(1.1),
5058            ..Default::default()
5059        };
5060        assert!(invalid_search_rng_factor_high.validate().is_err());
5061
5062        // Valid: search_rng_factor exactly 1.0
5063        let valid_search_rng_factor = SpannIndexConfig {
5064            search_rng_factor: Some(1.0),
5065            ..Default::default()
5066        };
5067        assert!(valid_search_rng_factor.validate().is_ok());
5068
5069        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
5070        let invalid_search_rng_epsilon_low = SpannIndexConfig {
5071            search_rng_epsilon: Some(4.0),
5072            ..Default::default()
5073        };
5074        assert!(invalid_search_rng_epsilon_low.validate().is_err());
5075
5076        let invalid_search_rng_epsilon_high = SpannIndexConfig {
5077            search_rng_epsilon: Some(11.0),
5078            ..Default::default()
5079        };
5080        assert!(invalid_search_rng_epsilon_high.validate().is_err());
5081
5082        // Valid: search_rng_epsilon within range
5083        let valid_search_rng_epsilon = SpannIndexConfig {
5084            search_rng_epsilon: Some(7.5),
5085            ..Default::default()
5086        };
5087        assert!(valid_search_rng_epsilon.validate().is_ok());
5088
5089        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5090        let invalid_write_rng_factor_low = SpannIndexConfig {
5091            write_rng_factor: Some(0.9),
5092            ..Default::default()
5093        };
5094        assert!(invalid_write_rng_factor_low.validate().is_err());
5095
5096        let invalid_write_rng_factor_high = SpannIndexConfig {
5097            write_rng_factor: Some(1.1),
5098            ..Default::default()
5099        };
5100        assert!(invalid_write_rng_factor_high.validate().is_err());
5101
5102        // Valid: write_rng_factor exactly 1.0
5103        let valid_write_rng_factor = SpannIndexConfig {
5104            write_rng_factor: Some(1.0),
5105            ..Default::default()
5106        };
5107        assert!(valid_write_rng_factor.validate().is_ok());
5108
5109        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
5110        let invalid_write_rng_epsilon_low = SpannIndexConfig {
5111            write_rng_epsilon: Some(4.0),
5112            ..Default::default()
5113        };
5114        assert!(invalid_write_rng_epsilon_low.validate().is_err());
5115
5116        let invalid_write_rng_epsilon_high = SpannIndexConfig {
5117            write_rng_epsilon: Some(11.0),
5118            ..Default::default()
5119        };
5120        assert!(invalid_write_rng_epsilon_high.validate().is_err());
5121
5122        // Valid: write_rng_epsilon within range
5123        let valid_write_rng_epsilon = SpannIndexConfig {
5124            write_rng_epsilon: Some(7.5),
5125            ..Default::default()
5126        };
5127        assert!(valid_write_rng_epsilon.validate().is_ok());
5128
5129        // Invalid: num_samples_kmeans too large (max 1000)
5130        let invalid_num_samples_kmeans = SpannIndexConfig {
5131            num_samples_kmeans: Some(1500),
5132            ..Default::default()
5133        };
5134        assert!(invalid_num_samples_kmeans.validate().is_err());
5135
5136        // Valid: num_samples_kmeans within range
5137        let valid_num_samples_kmeans = SpannIndexConfig {
5138            num_samples_kmeans: Some(500),
5139            ..Default::default()
5140        };
5141        assert!(valid_num_samples_kmeans.validate().is_ok());
5142
5143        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
5144        let invalid_initial_lambda_high = SpannIndexConfig {
5145            initial_lambda: Some(150.0),
5146            ..Default::default()
5147        };
5148        assert!(invalid_initial_lambda_high.validate().is_err());
5149
5150        let invalid_initial_lambda_low = SpannIndexConfig {
5151            initial_lambda: Some(50.0),
5152            ..Default::default()
5153        };
5154        assert!(invalid_initial_lambda_low.validate().is_err());
5155
5156        // Valid: initial_lambda exactly 100.0
5157        let valid_initial_lambda = SpannIndexConfig {
5158            initial_lambda: Some(100.0),
5159            ..Default::default()
5160        };
5161        assert!(valid_initial_lambda.validate().is_ok());
5162
5163        // Valid: None values should pass validation
5164        let all_none_config = SpannIndexConfig {
5165            ..Default::default()
5166        };
5167        assert!(all_none_config.validate().is_ok());
5168    }
5169
5170    #[test]
5171    fn test_builder_pattern_crud_workflow() {
5172        // Test comprehensive CRUD workflow using the builder pattern
5173
5174        // CREATE: Build a schema with multiple indexes
5175        let schema = Schema::new_default(KnnIndex::Hnsw)
5176            .create_index(
5177                None,
5178                IndexConfig::Vector(VectorIndexConfig {
5179                    space: Some(Space::Cosine),
5180                    embedding_function: None,
5181                    source_key: None,
5182                    hnsw: Some(HnswIndexConfig {
5183                        ef_construction: Some(200),
5184                        max_neighbors: Some(32),
5185                        ef_search: Some(50),
5186                        num_threads: None,
5187                        batch_size: None,
5188                        sync_threshold: None,
5189                        resize_factor: None,
5190                    }),
5191                    spann: None,
5192                }),
5193            )
5194            .expect("vector config should succeed")
5195            .create_index(
5196                Some("category"),
5197                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5198            )
5199            .expect("string inverted on key should succeed")
5200            .create_index(
5201                Some("year"),
5202                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5203            )
5204            .expect("int inverted on key should succeed")
5205            .create_index(
5206                Some("rating"),
5207                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5208            )
5209            .expect("float inverted on key should succeed")
5210            .create_index(
5211                Some("is_active"),
5212                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5213            )
5214            .expect("bool inverted on key should succeed");
5215
5216        // READ: Verify the schema was built correctly
5217        // Check vector config
5218        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5219        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5220        assert!(embedding.float_list.is_some());
5221        let vector_index = embedding
5222            .float_list
5223            .as_ref()
5224            .unwrap()
5225            .vector_index
5226            .as_ref()
5227            .unwrap();
5228        assert!(vector_index.enabled);
5229        assert_eq!(vector_index.config.space, Some(Space::Cosine));
5230        assert_eq!(
5231            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5232            Some(200)
5233        );
5234
5235        // Check per-key indexes
5236        assert!(schema.keys.contains_key("category"));
5237        assert!(schema.keys.contains_key("year"));
5238        assert!(schema.keys.contains_key("rating"));
5239        assert!(schema.keys.contains_key("is_active"));
5240
5241        // Verify category string inverted index
5242        let category = schema.keys.get("category").unwrap();
5243        assert!(category.string.is_some());
5244        let string_idx = category
5245            .string
5246            .as_ref()
5247            .unwrap()
5248            .string_inverted_index
5249            .as_ref()
5250            .unwrap();
5251        assert!(string_idx.enabled);
5252
5253        // Verify year int inverted index
5254        let year = schema.keys.get("year").unwrap();
5255        assert!(year.int.is_some());
5256        let int_idx = year
5257            .int
5258            .as_ref()
5259            .unwrap()
5260            .int_inverted_index
5261            .as_ref()
5262            .unwrap();
5263        assert!(int_idx.enabled);
5264
5265        // UPDATE/DELETE: Disable some indexes
5266        let schema = schema
5267            .delete_index(
5268                Some("category"),
5269                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5270            )
5271            .expect("delete string inverted should succeed")
5272            .delete_index(
5273                Some("year"),
5274                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5275            )
5276            .expect("delete int inverted should succeed");
5277
5278        // VERIFY DELETE: Check that indexes were disabled
5279        let category = schema.keys.get("category").unwrap();
5280        let string_idx = category
5281            .string
5282            .as_ref()
5283            .unwrap()
5284            .string_inverted_index
5285            .as_ref()
5286            .unwrap();
5287        assert!(!string_idx.enabled); // Should be disabled now
5288
5289        let year = schema.keys.get("year").unwrap();
5290        let int_idx = year
5291            .int
5292            .as_ref()
5293            .unwrap()
5294            .int_inverted_index
5295            .as_ref()
5296            .unwrap();
5297        assert!(!int_idx.enabled); // Should be disabled now
5298
5299        // Verify other indexes still enabled
5300        let rating = schema.keys.get("rating").unwrap();
5301        let float_idx = rating
5302            .float
5303            .as_ref()
5304            .unwrap()
5305            .float_inverted_index
5306            .as_ref()
5307            .unwrap();
5308        assert!(float_idx.enabled); // Should still be enabled
5309
5310        let is_active = schema.keys.get("is_active").unwrap();
5311        let bool_idx = is_active
5312            .boolean
5313            .as_ref()
5314            .unwrap()
5315            .bool_inverted_index
5316            .as_ref()
5317            .unwrap();
5318        assert!(bool_idx.enabled); // Should still be enabled
5319    }
5320
5321    #[test]
5322    fn test_builder_create_index_validation_errors() {
5323        // Test all validation errors for create_index() as documented in the docstring:
5324        // - Attempting to create index on special keys (#document, #embedding)
5325        // - Invalid configuration (e.g., vector index on non-embedding key)
5326        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
5327
5328        // Error: Vector index on specific key (must be global)
5329        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5330            Some("my_vectors"),
5331            IndexConfig::Vector(VectorIndexConfig {
5332                space: Some(Space::L2),
5333                embedding_function: None,
5334                source_key: None,
5335                hnsw: None,
5336                spann: None,
5337            }),
5338        );
5339        assert!(result.is_err());
5340        assert!(matches!(
5341            result.unwrap_err(),
5342            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5343        ));
5344
5345        // Error: FTS index on specific key (must be global)
5346        let result = Schema::new_default(KnnIndex::Hnsw)
5347            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5348        assert!(result.is_err());
5349        assert!(matches!(
5350            result.unwrap_err(),
5351            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5352        ));
5353
5354        // Error: Cannot create index on special key #document
5355        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5356            Some(DOCUMENT_KEY),
5357            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5358        );
5359        assert!(result.is_err());
5360        assert!(matches!(
5361            result.unwrap_err(),
5362            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5363        ));
5364
5365        // Error: Cannot create index on special key #embedding
5366        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5367            Some(EMBEDDING_KEY),
5368            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5369        );
5370        assert!(result.is_err());
5371        assert!(matches!(
5372            result.unwrap_err(),
5373            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5374        ));
5375
5376        // Error: Sparse vector without key (must specify key)
5377        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5378            None,
5379            IndexConfig::SparseVector(SparseVectorIndexConfig {
5380                embedding_function: None,
5381                source_key: None,
5382                bm25: None,
5383            }),
5384        );
5385        assert!(result.is_err());
5386        assert!(matches!(
5387            result.unwrap_err(),
5388            SchemaBuilderError::SparseVectorRequiresKey
5389        ));
5390
5391        // Error: Multiple sparse vector indexes (only one allowed per collection)
5392        let result = Schema::new_default(KnnIndex::Hnsw)
5393            .create_index(
5394                Some("sparse1"),
5395                IndexConfig::SparseVector(SparseVectorIndexConfig {
5396                    embedding_function: None,
5397                    source_key: None,
5398                    bm25: None,
5399                }),
5400            )
5401            .expect("first sparse should succeed")
5402            .create_index(
5403                Some("sparse2"),
5404                IndexConfig::SparseVector(SparseVectorIndexConfig {
5405                    embedding_function: None,
5406                    source_key: None,
5407                    bm25: None,
5408                }),
5409            );
5410        assert!(result.is_err());
5411        assert!(matches!(
5412            result.unwrap_err(),
5413            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5414        ));
5415    }
5416
5417    #[test]
5418    fn test_builder_delete_index_validation_errors() {
5419        // Test all validation errors for delete_index() as documented in the docstring:
5420        // - Attempting to delete index on special keys (#document, #embedding)
5421        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
5422
5423        // Error: Delete on special key #embedding
5424        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5425            Some(EMBEDDING_KEY),
5426            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5427        );
5428        assert!(result.is_err());
5429        assert!(matches!(
5430            result.unwrap_err(),
5431            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5432        ));
5433
5434        // Error: Delete on special key #document
5435        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5436            Some(DOCUMENT_KEY),
5437            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5438        );
5439        assert!(result.is_err());
5440        assert!(matches!(
5441            result.unwrap_err(),
5442            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5443        ));
5444
5445        // Error: Delete vector index (not currently supported)
5446        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5447            None,
5448            IndexConfig::Vector(VectorIndexConfig {
5449                space: None,
5450                embedding_function: None,
5451                source_key: None,
5452                hnsw: None,
5453                spann: None,
5454            }),
5455        );
5456        assert!(result.is_err());
5457        assert!(matches!(
5458            result.unwrap_err(),
5459            SchemaBuilderError::VectorIndexDeletionNotSupported
5460        ));
5461
5462        // Error: Delete FTS index (not currently supported)
5463        let result = Schema::new_default(KnnIndex::Hnsw)
5464            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5465        assert!(result.is_err());
5466        assert!(matches!(
5467            result.unwrap_err(),
5468            SchemaBuilderError::FtsIndexDeletionNotSupported
5469        ));
5470
5471        // Error: Delete sparse vector index (not currently supported)
5472        let result = Schema::new_default(KnnIndex::Hnsw)
5473            .create_index(
5474                Some("sparse"),
5475                IndexConfig::SparseVector(SparseVectorIndexConfig {
5476                    embedding_function: None,
5477                    source_key: None,
5478                    bm25: None,
5479                }),
5480            )
5481            .expect("create should succeed")
5482            .delete_index(
5483                Some("sparse"),
5484                IndexConfig::SparseVector(SparseVectorIndexConfig {
5485                    embedding_function: None,
5486                    source_key: None,
5487                    bm25: None,
5488                }),
5489            );
5490        assert!(result.is_err());
5491        assert!(matches!(
5492            result.unwrap_err(),
5493            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5494        ));
5495    }
5496
5497    #[test]
5498    fn test_builder_pattern_chaining() {
5499        // Test complex chaining scenario
5500        let schema = Schema::new_default(KnnIndex::Hnsw)
5501            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5502            .unwrap()
5503            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5504            .unwrap()
5505            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5506            .unwrap()
5507            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5508            .unwrap()
5509            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5510            .unwrap()
5511            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5512            .unwrap();
5513
5514        // Verify tag1 is enabled
5515        assert!(
5516            schema
5517                .keys
5518                .get("tag1")
5519                .unwrap()
5520                .string
5521                .as_ref()
5522                .unwrap()
5523                .string_inverted_index
5524                .as_ref()
5525                .unwrap()
5526                .enabled
5527        );
5528
5529        // Verify tag2 is disabled
5530        assert!(
5531            !schema
5532                .keys
5533                .get("tag2")
5534                .unwrap()
5535                .string
5536                .as_ref()
5537                .unwrap()
5538                .string_inverted_index
5539                .as_ref()
5540                .unwrap()
5541                .enabled
5542        );
5543
5544        // Verify tag3 is enabled
5545        assert!(
5546            schema
5547                .keys
5548                .get("tag3")
5549                .unwrap()
5550                .string
5551                .as_ref()
5552                .unwrap()
5553                .string_inverted_index
5554                .as_ref()
5555                .unwrap()
5556                .enabled
5557        );
5558
5559        // Verify count is enabled
5560        assert!(
5561            schema
5562                .keys
5563                .get("count")
5564                .unwrap()
5565                .int
5566                .as_ref()
5567                .unwrap()
5568                .int_inverted_index
5569                .as_ref()
5570                .unwrap()
5571                .enabled
5572        );
5573
5574        // Verify score is enabled
5575        assert!(
5576            schema
5577                .keys
5578                .get("score")
5579                .unwrap()
5580                .float
5581                .as_ref()
5582                .unwrap()
5583                .float_inverted_index
5584                .as_ref()
5585                .unwrap()
5586                .enabled
5587        );
5588    }
5589
5590    #[test]
5591    fn test_schema_default_matches_python() {
5592        // Test that Schema::default() matches Python's Schema() behavior exactly
5593        let schema = Schema::default();
5594
5595        // ============================================================================
5596        // VERIFY DEFAULTS (match Python's _initialize_defaults)
5597        // ============================================================================
5598
5599        // String defaults: FTS disabled, string inverted enabled
5600        assert!(schema.defaults.string.is_some());
5601        let string = schema.defaults.string.as_ref().unwrap();
5602        assert!(!string.fts_index.as_ref().unwrap().enabled);
5603        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5604
5605        // Float list defaults: vector index disabled
5606        assert!(schema.defaults.float_list.is_some());
5607        let float_list = schema.defaults.float_list.as_ref().unwrap();
5608        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5609        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5610        assert_eq!(vector_config.space, None); // Python leaves as None
5611        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
5612        assert_eq!(vector_config.spann, None); // Python doesn't specify
5613        assert_eq!(vector_config.source_key, None);
5614
5615        // Sparse vector defaults: disabled
5616        assert!(schema.defaults.sparse_vector.is_some());
5617        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5618        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5619
5620        // Int defaults: inverted index enabled
5621        assert!(schema.defaults.int.is_some());
5622        assert!(
5623            schema
5624                .defaults
5625                .int
5626                .as_ref()
5627                .unwrap()
5628                .int_inverted_index
5629                .as_ref()
5630                .unwrap()
5631                .enabled
5632        );
5633
5634        // Float defaults: inverted index enabled
5635        assert!(schema.defaults.float.is_some());
5636        assert!(
5637            schema
5638                .defaults
5639                .float
5640                .as_ref()
5641                .unwrap()
5642                .float_inverted_index
5643                .as_ref()
5644                .unwrap()
5645                .enabled
5646        );
5647
5648        // Bool defaults: inverted index enabled
5649        assert!(schema.defaults.boolean.is_some());
5650        assert!(
5651            schema
5652                .defaults
5653                .boolean
5654                .as_ref()
5655                .unwrap()
5656                .bool_inverted_index
5657                .as_ref()
5658                .unwrap()
5659                .enabled
5660        );
5661
5662        // ============================================================================
5663        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
5664        // ============================================================================
5665
5666        // #document: FTS enabled, string inverted disabled
5667        assert!(schema.keys.contains_key(DOCUMENT_KEY));
5668        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
5669        assert!(doc.string.is_some());
5670        assert!(
5671            doc.string
5672                .as_ref()
5673                .unwrap()
5674                .fts_index
5675                .as_ref()
5676                .unwrap()
5677                .enabled
5678        );
5679        assert!(
5680            !doc.string
5681                .as_ref()
5682                .unwrap()
5683                .string_inverted_index
5684                .as_ref()
5685                .unwrap()
5686                .enabled
5687        );
5688
5689        // #embedding: vector index enabled with source_key=#document
5690        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5691        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5692        assert!(embedding.float_list.is_some());
5693        let vec_idx = embedding
5694            .float_list
5695            .as_ref()
5696            .unwrap()
5697            .vector_index
5698            .as_ref()
5699            .unwrap();
5700        assert!(vec_idx.enabled);
5701        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
5702        assert_eq!(vec_idx.config.space, None); // Python leaves as None
5703        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
5704        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
5705
5706        // Verify only these two special keys exist
5707        assert_eq!(schema.keys.len(), 2);
5708    }
5709
5710    #[test]
5711    fn test_schema_default_works_with_builder() {
5712        // Test that Schema::default() can be used with builder pattern
5713        let schema = Schema::default()
5714            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
5715            .expect("should succeed");
5716
5717        // Verify the new index was added
5718        assert!(schema.keys.contains_key("category"));
5719        assert!(schema.keys.contains_key(DOCUMENT_KEY));
5720        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5721        assert_eq!(schema.keys.len(), 3);
5722    }
5723
5724    #[cfg(feature = "testing")]
5725    mod proptests {
5726        use super::*;
5727        use crate::strategies::{
5728            embedding_function_strategy, internal_collection_configuration_strategy,
5729            internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
5730            knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
5731        };
5732        use crate::{
5733            HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
5734        };
5735        use proptest::prelude::*;
5736        use proptest::strategy::BoxedStrategy;
5737        use proptest::string::string_regex;
5738        use serde_json::json;
5739
5740        fn default_embedding_function_strategy(
5741        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5742            proptest::option::of(prop_oneof![
5743                Just(EmbeddingFunctionConfiguration::Unknown),
5744                Just(EmbeddingFunctionConfiguration::Known(
5745                    EmbeddingFunctionNewConfiguration {
5746                        name: "default".to_string(),
5747                        config: json!({ "alpha": 1 }),
5748                    }
5749                )),
5750            ])
5751        }
5752
5753        fn sparse_embedding_function_strategy(
5754        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5755            let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
5756                EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
5757                    name,
5758                    config: json!({ "alpha": 1 }),
5759                })
5760            });
5761
5762            proptest::option::of(prop_oneof![
5763                Just(EmbeddingFunctionConfiguration::Unknown),
5764                known_strategy,
5765            ])
5766        }
5767
5768        fn non_default_internal_collection_configuration_strategy(
5769        ) -> impl Strategy<Value = InternalCollectionConfiguration> {
5770            internal_collection_configuration_strategy()
5771                .prop_filter("non-default configuration", |config| !config.is_default())
5772        }
5773
5774        fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
5775            (
5776                proptest::option::of(1usize..=512),
5777                proptest::option::of(1usize..=128),
5778                proptest::option::of(1usize..=512),
5779                proptest::option::of(1usize..=64),
5780                proptest::option::of(2usize..=4096),
5781                proptest::option::of(2usize..=4096),
5782                proptest::option::of(prop_oneof![
5783                    Just(0.5f64),
5784                    Just(1.0f64),
5785                    Just(1.5f64),
5786                    Just(2.0f64)
5787                ]),
5788            )
5789                .prop_map(
5790                    |(
5791                        ef_construction,
5792                        max_neighbors,
5793                        ef_search,
5794                        num_threads,
5795                        batch_size,
5796                        sync_threshold,
5797                        resize_factor,
5798                    )| HnswIndexConfig {
5799                        ef_construction,
5800                        max_neighbors,
5801                        ef_search,
5802                        num_threads,
5803                        batch_size,
5804                        sync_threshold,
5805                        resize_factor,
5806                    },
5807                )
5808        }
5809
5810        fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
5811            let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
5812            (
5813                (
5814                    proptest::option::of(1u32..=128),               // search_nprobe
5815                    proptest::option::of(Just(1.0f32)), // search_rng_factor (must be 1.0)
5816                    proptest::option::of(epsilon_strategy.clone()), // search_rng_epsilon
5817                    proptest::option::of(1u32..=8),     // nreplica_count
5818                    proptest::option::of(Just(1.0f32)), // write_rng_factor (must be 1.0)
5819                    proptest::option::of(epsilon_strategy), // write_rng_epsilon
5820                    proptest::option::of(50u32..=200),  // split_threshold
5821                    proptest::option::of(1usize..=1000), // num_samples_kmeans
5822                ),
5823                (
5824                    proptest::option::of(Just(100.0f32)), // initial_lambda (must be 100.0)
5825                    proptest::option::of(1u32..=64),      // reassign_neighbor_count
5826                    proptest::option::of(25u32..=100),    // merge_threshold
5827                    proptest::option::of(1u32..=8),       // num_centers_to_merge_to
5828                    proptest::option::of(1u32..=64),      // write_nprobe
5829                    proptest::option::of(1usize..=200),   // ef_construction
5830                    proptest::option::of(1usize..=200),   // ef_search
5831                    proptest::option::of(1usize..=64),    // max_neighbors
5832                ),
5833            )
5834                .prop_map(
5835                    |(
5836                        (
5837                            search_nprobe,
5838                            search_rng_factor,
5839                            search_rng_epsilon,
5840                            nreplica_count,
5841                            write_rng_factor,
5842                            write_rng_epsilon,
5843                            split_threshold,
5844                            num_samples_kmeans,
5845                        ),
5846                        (
5847                            initial_lambda,
5848                            reassign_neighbor_count,
5849                            merge_threshold,
5850                            num_centers_to_merge_to,
5851                            write_nprobe,
5852                            ef_construction,
5853                            ef_search,
5854                            max_neighbors,
5855                        ),
5856                    )| SpannIndexConfig {
5857                        search_nprobe,
5858                        search_rng_factor,
5859                        search_rng_epsilon,
5860                        nreplica_count,
5861                        write_rng_factor,
5862                        write_rng_epsilon,
5863                        split_threshold,
5864                        num_samples_kmeans,
5865                        initial_lambda,
5866                        reassign_neighbor_count,
5867                        merge_threshold,
5868                        num_centers_to_merge_to,
5869                        write_nprobe,
5870                        ef_construction,
5871                        ef_search,
5872                        max_neighbors,
5873                    },
5874                )
5875        }
5876
5877        proptest! {
5878            #[test]
5879            fn merge_hnsw_configs_preserves_user_overrides(
5880                base in partial_hnsw_index_config_strategy(),
5881                user in partial_hnsw_index_config_strategy(),
5882            ) {
5883                let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
5884                    .expect("merge should return Some when both are Some");
5885
5886                // Property: user values always take precedence when Some
5887                if user.ef_construction.is_some() {
5888                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
5889                }
5890                if user.max_neighbors.is_some() {
5891                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5892                }
5893                if user.ef_search.is_some() {
5894                    prop_assert_eq!(merged.ef_search, user.ef_search);
5895                }
5896                if user.num_threads.is_some() {
5897                    prop_assert_eq!(merged.num_threads, user.num_threads);
5898                }
5899                if user.batch_size.is_some() {
5900                    prop_assert_eq!(merged.batch_size, user.batch_size);
5901                }
5902                if user.sync_threshold.is_some() {
5903                    prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
5904                }
5905                if user.resize_factor.is_some() {
5906                    prop_assert_eq!(merged.resize_factor, user.resize_factor);
5907                }
5908            }
5909
5910            #[test]
5911            fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
5912                base in partial_hnsw_index_config_strategy(),
5913            ) {
5914                let merged = Schema::merge_hnsw_configs(Some(&base), None)
5915                    .expect("merge should return Some when base is Some");
5916
5917                // Property: when user is None, base values are preserved
5918                prop_assert_eq!(merged, base);
5919            }
5920
5921            #[test]
5922            fn merge_hnsw_configs_returns_user_when_base_is_none(
5923                user in partial_hnsw_index_config_strategy(),
5924            ) {
5925                let merged = Schema::merge_hnsw_configs(None, Some(&user))
5926                    .expect("merge should return Some when user is Some");
5927
5928                // Property: when base is None, user values are preserved
5929                prop_assert_eq!(merged, user);
5930            }
5931
5932            #[test]
5933            fn merge_spann_configs_preserves_user_overrides(
5934                base in partial_spann_index_config_strategy(),
5935                user in partial_spann_index_config_strategy(),
5936            ) {
5937                let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
5938                    .expect("merge should return Some when both are Some");
5939
5940                // Property: user values always take precedence when Some
5941                if user.search_nprobe.is_some() {
5942                    prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
5943                }
5944                if user.search_rng_epsilon.is_some() {
5945                    prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
5946                }
5947                if user.split_threshold.is_some() {
5948                    prop_assert_eq!(merged.split_threshold, user.split_threshold);
5949                }
5950                if user.ef_construction.is_some() {
5951                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
5952                }
5953                if user.ef_search.is_some() {
5954                    prop_assert_eq!(merged.ef_search, user.ef_search);
5955                }
5956                if user.max_neighbors.is_some() {
5957                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5958                }
5959            }
5960
5961            #[test]
5962            fn merge_spann_configs_falls_back_to_base_when_user_is_none(
5963                base in partial_spann_index_config_strategy(),
5964            ) {
5965                let merged = Schema::merge_spann_configs(Some(&base), None)
5966                    .expect("merge should return Some when base is Some");
5967
5968                // Property: when user is None, base values are preserved
5969                prop_assert_eq!(merged, base);
5970            }
5971
5972            #[test]
5973            fn merge_vector_index_config_preserves_user_overrides(
5974                base in vector_index_config_strategy(),
5975                user in vector_index_config_strategy(),
5976                knn in knn_index_strategy(),
5977            ) {
5978                let merged = Schema::merge_vector_index_config(&base, &user, knn);
5979
5980                // Property: user values take precedence for top-level fields
5981                if user.space.is_some() {
5982                    prop_assert_eq!(merged.space, user.space);
5983                }
5984                if user.embedding_function.is_some() {
5985                    prop_assert_eq!(merged.embedding_function, user.embedding_function);
5986                }
5987                if user.source_key.is_some() {
5988                    prop_assert_eq!(merged.source_key, user.source_key);
5989                }
5990
5991                // Property: nested configs are merged according to merge rules
5992                match knn {
5993                    KnnIndex::Hnsw => {
5994                        if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
5995                            let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
5996                            if user_hnsw.ef_construction.is_some() {
5997                                prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
5998                            }
5999                        }
6000                    }
6001                    KnnIndex::Spann => {
6002                        if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6003                            let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6004                            if user_spann.search_nprobe.is_some() {
6005                                prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6006                            }
6007                        }
6008                    }
6009                }
6010            }
6011        }
6012
6013        fn expected_vector_index_config(
6014            config: &InternalCollectionConfiguration,
6015        ) -> VectorIndexConfig {
6016            match &config.vector_index {
6017                VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6018                    space: Some(hnsw_config.space.clone()),
6019                    embedding_function: config.embedding_function.clone(),
6020                    source_key: None,
6021                    hnsw: Some(HnswIndexConfig {
6022                        ef_construction: Some(hnsw_config.ef_construction),
6023                        max_neighbors: Some(hnsw_config.max_neighbors),
6024                        ef_search: Some(hnsw_config.ef_search),
6025                        num_threads: Some(hnsw_config.num_threads),
6026                        batch_size: Some(hnsw_config.batch_size),
6027                        sync_threshold: Some(hnsw_config.sync_threshold),
6028                        resize_factor: Some(hnsw_config.resize_factor),
6029                    }),
6030                    spann: None,
6031                },
6032                VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6033                    space: Some(spann_config.space.clone()),
6034                    embedding_function: config.embedding_function.clone(),
6035                    source_key: None,
6036                    hnsw: None,
6037                    spann: Some(SpannIndexConfig {
6038                        search_nprobe: Some(spann_config.search_nprobe),
6039                        search_rng_factor: Some(spann_config.search_rng_factor),
6040                        search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6041                        nreplica_count: Some(spann_config.nreplica_count),
6042                        write_rng_factor: Some(spann_config.write_rng_factor),
6043                        write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6044                        split_threshold: Some(spann_config.split_threshold),
6045                        num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6046                        initial_lambda: Some(spann_config.initial_lambda),
6047                        reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6048                        merge_threshold: Some(spann_config.merge_threshold),
6049                        num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6050                        write_nprobe: Some(spann_config.write_nprobe),
6051                        ef_construction: Some(spann_config.ef_construction),
6052                        ef_search: Some(spann_config.ef_search),
6053                        max_neighbors: Some(spann_config.max_neighbors),
6054                    }),
6055                },
6056            }
6057        }
6058
6059        fn non_special_key_strategy() -> BoxedStrategy<String> {
6060            string_regex(TEST_NAME_PATTERN)
6061                .unwrap()
6062                .prop_filter("exclude special keys", |key| {
6063                    key != DOCUMENT_KEY && key != EMBEDDING_KEY
6064                })
6065                .boxed()
6066        }
6067
6068        fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6069            proptest::option::of(prop_oneof![
6070                Just(DOCUMENT_KEY.to_string()),
6071                string_regex(TEST_NAME_PATTERN).unwrap(),
6072            ])
6073            .boxed()
6074        }
6075
6076        fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6077            any::<bool>().prop_map(|enabled| FtsIndexType {
6078                enabled,
6079                config: FtsIndexConfig {},
6080            })
6081        }
6082
6083        fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6084            any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6085                enabled,
6086                config: StringInvertedIndexConfig {},
6087            })
6088        }
6089
6090        fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6091            proptest::option::of(
6092                (
6093                    proptest::option::of(string_inverted_index_type_strategy()),
6094                    proptest::option::of(fts_index_type_strategy()),
6095                )
6096                    .prop_map(|(string_inverted_index, fts_index)| {
6097                        StringValueType {
6098                            string_inverted_index,
6099                            fts_index,
6100                        }
6101                    }),
6102            )
6103            .boxed()
6104        }
6105
6106        fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6107            any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6108                enabled,
6109                config: FloatInvertedIndexConfig {},
6110            })
6111        }
6112
6113        fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6114            proptest::option::of(
6115                proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6116                    |float_inverted_index| FloatValueType {
6117                        float_inverted_index,
6118                    },
6119                ),
6120            )
6121            .boxed()
6122        }
6123
6124        fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6125            any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6126                enabled,
6127                config: IntInvertedIndexConfig {},
6128            })
6129        }
6130
6131        fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6132            proptest::option::of(
6133                proptest::option::of(int_inverted_index_type_strategy())
6134                    .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6135            )
6136            .boxed()
6137        }
6138
6139        fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6140            any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6141                enabled,
6142                config: BoolInvertedIndexConfig {},
6143            })
6144        }
6145
6146        fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6147            proptest::option::of(
6148                proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6149                    |bool_inverted_index| BoolValueType {
6150                        bool_inverted_index,
6151                    },
6152                ),
6153            )
6154            .boxed()
6155        }
6156
6157        fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6158            (
6159                sparse_embedding_function_strategy(),
6160                source_key_strategy(),
6161                proptest::option::of(any::<bool>()),
6162            )
6163                .prop_map(|(embedding_function, source_key, bm25)| {
6164                    SparseVectorIndexConfig {
6165                        embedding_function,
6166                        source_key,
6167                        bm25,
6168                    }
6169                })
6170        }
6171
6172        fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6173            proptest::option::of(
6174                (
6175                    any::<bool>(),
6176                    proptest::option::of(sparse_vector_index_config_strategy()),
6177                )
6178                    .prop_map(|(enabled, config)| SparseVectorValueType {
6179                        sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6180                            enabled,
6181                            config: cfg,
6182                        }),
6183                    }),
6184            )
6185            .boxed()
6186        }
6187
6188        fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6189            internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6190                ef_construction: Some(config.ef_construction),
6191                max_neighbors: Some(config.max_neighbors),
6192                ef_search: Some(config.ef_search),
6193                num_threads: Some(config.num_threads),
6194                batch_size: Some(config.batch_size),
6195                sync_threshold: Some(config.sync_threshold),
6196                resize_factor: Some(config.resize_factor),
6197            })
6198        }
6199
6200        fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6201            internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6202                search_nprobe: Some(config.search_nprobe),
6203                search_rng_factor: Some(config.search_rng_factor),
6204                search_rng_epsilon: Some(config.search_rng_epsilon),
6205                nreplica_count: Some(config.nreplica_count),
6206                write_rng_factor: Some(config.write_rng_factor),
6207                write_rng_epsilon: Some(config.write_rng_epsilon),
6208                split_threshold: Some(config.split_threshold),
6209                num_samples_kmeans: Some(config.num_samples_kmeans),
6210                initial_lambda: Some(config.initial_lambda),
6211                reassign_neighbor_count: Some(config.reassign_neighbor_count),
6212                merge_threshold: Some(config.merge_threshold),
6213                num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6214                write_nprobe: Some(config.write_nprobe),
6215                ef_construction: Some(config.ef_construction),
6216                ef_search: Some(config.ef_search),
6217                max_neighbors: Some(config.max_neighbors),
6218            })
6219        }
6220
6221        fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6222            (
6223                proptest::option::of(space_strategy()),
6224                embedding_function_strategy(),
6225                source_key_strategy(),
6226                proptest::option::of(hnsw_index_config_strategy()),
6227                proptest::option::of(spann_index_config_strategy()),
6228            )
6229                .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6230                    VectorIndexConfig {
6231                        space,
6232                        embedding_function,
6233                        source_key,
6234                        hnsw,
6235                        spann,
6236                    }
6237                })
6238        }
6239
6240        fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6241            (any::<bool>(), vector_index_config_strategy())
6242                .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6243        }
6244
6245        fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6246            proptest::option::of(
6247                proptest::option::of(vector_index_type_strategy())
6248                    .prop_map(|vector_index| FloatListValueType { vector_index }),
6249            )
6250            .boxed()
6251        }
6252
6253        fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6254            (
6255                string_value_type_strategy(),
6256                float_list_value_type_strategy(),
6257                sparse_vector_value_type_strategy(),
6258                int_value_type_strategy(),
6259                float_value_type_strategy(),
6260                bool_value_type_strategy(),
6261            )
6262                .prop_map(
6263                    |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6264                        string,
6265                        float_list,
6266                        sparse_vector,
6267                        int,
6268                        float,
6269                        boolean,
6270                    },
6271                )
6272                .boxed()
6273        }
6274
6275        fn schema_strategy() -> BoxedStrategy<Schema> {
6276            (
6277                value_types_strategy(),
6278                proptest::collection::hash_map(
6279                    non_special_key_strategy(),
6280                    value_types_strategy(),
6281                    0..=3,
6282                ),
6283                proptest::option::of(value_types_strategy()),
6284                proptest::option::of(value_types_strategy()),
6285            )
6286                .prop_map(
6287                    |(defaults, mut extra_keys, document_override, embedding_override)| {
6288                        if let Some(doc) = document_override {
6289                            extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6290                        }
6291                        if let Some(embed) = embedding_override {
6292                            extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6293                        }
6294                        Schema {
6295                            defaults,
6296                            keys: extra_keys,
6297                            cmek: None,
6298                        }
6299                    },
6300                )
6301                .boxed()
6302        }
6303
6304        fn force_non_default_schema(mut schema: Schema) -> Schema {
6305            if schema.is_default() {
6306                if let Some(string_value) = schema
6307                    .defaults
6308                    .string
6309                    .as_mut()
6310                    .and_then(|string_value| string_value.string_inverted_index.as_mut())
6311                {
6312                    string_value.enabled = !string_value.enabled;
6313                } else {
6314                    schema.defaults.string = Some(StringValueType {
6315                        string_inverted_index: Some(StringInvertedIndexType {
6316                            enabled: false,
6317                            config: StringInvertedIndexConfig {},
6318                        }),
6319                        fts_index: None,
6320                    });
6321                }
6322            }
6323            schema
6324        }
6325
6326        fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6327            schema_strategy().prop_map(force_non_default_schema).boxed()
6328        }
6329
6330        fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6331            let defaults = schema
6332                .defaults
6333                .float_list
6334                .as_ref()
6335                .and_then(|fl| fl.vector_index.as_ref())
6336                .map(|vi| vi.config.clone())
6337                .expect("defaults vector index missing");
6338
6339            let embedding = schema
6340                .keys
6341                .get(EMBEDDING_KEY)
6342                .and_then(|value_types| value_types.float_list.as_ref())
6343                .and_then(|fl| fl.vector_index.as_ref())
6344                .map(|vi| vi.config.clone())
6345                .expect("#embedding vector index missing");
6346
6347            (defaults, embedding)
6348        }
6349
6350        proptest! {
6351            #[test]
6352            fn reconcile_schema_and_config_matches_convert_for_config_only(
6353                config in internal_collection_configuration_strategy(),
6354                knn in knn_index_strategy(),
6355            ) {
6356                let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6357                    .expect("reconciliation should succeed");
6358
6359                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6360                let expected_config = expected_vector_index_config(&config);
6361
6362                prop_assert_eq!(defaults_vi, expected_config.clone());
6363
6364                let mut expected_embedding_config = expected_config;
6365                expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6366                prop_assert_eq!(embedding_vi, expected_embedding_config);
6367
6368                prop_assert_eq!(result.keys.len(), 2);
6369            }
6370        }
6371
6372        proptest! {
6373            #[test]
6374            fn reconcile_schema_and_config_errors_when_both_non_default(
6375                config in non_default_internal_collection_configuration_strategy(),
6376                knn in knn_index_strategy(),
6377            ) {
6378                let schema = Schema::try_from(&config)
6379                    .expect("conversion should succeed");
6380                prop_assume!(!schema.is_default());
6381
6382                let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6383
6384                prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6385            }
6386        }
6387
6388        proptest! {
6389            #[test]
6390            fn reconcile_schema_and_config_matches_schema_only_path(
6391                schema in schema_strategy(),
6392                knn in knn_index_strategy(),
6393            ) {
6394                let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6395                    .expect("reconciliation should succeed");
6396
6397                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6398
6399                // Property: schema defaults.float_list vector_index config should be merged into defaults
6400                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6401                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6402                        // Property: schema values take precedence over defaults
6403                        if let Some(schema_space) = &schema_vi.config.space {
6404                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6405                        }
6406                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6407                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6408                        }
6409                        // Test nested config merging properties
6410                        match knn {
6411                            KnnIndex::Hnsw => {
6412                                if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6413                                    if let Some(merged_hnsw) = &defaults_vi.hnsw {
6414                                        if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6415                                            prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6416                                        }
6417                                    }
6418                                }
6419                            }
6420                            KnnIndex::Spann => {
6421                                if let Some(schema_spann) = &schema_vi.config.spann {
6422                                    if let Some(merged_spann) = &defaults_vi.spann {
6423                                        if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6424                                            prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6425                                        }
6426                                    }
6427                                }
6428                            }
6429                        }
6430                    }
6431                }
6432
6433                // Property: schema #embedding float_list vector_index config should be merged into embedding
6434                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6435                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6436                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6437                            if let Some(schema_space) = &embedding_vi_type.config.space {
6438                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6439                            }
6440                        }
6441                    }
6442                }
6443            }
6444        }
6445
6446        proptest! {
6447            #[test]
6448            fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6449                embedding_function in default_embedding_function_strategy(),
6450                knn in knn_index_strategy(),
6451            ) {
6452                let schema = Schema::new_default(knn);
6453                let mut config = match knn {
6454                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6455                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6456                };
6457                config.embedding_function = embedding_function.clone();
6458
6459                let result = Schema::reconcile_schema_and_config(
6460                    Some(&schema),
6461                    Some(&config),
6462                    knn,
6463                )
6464                .expect("reconciliation should succeed");
6465
6466                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6467
6468                // Property: embedding function from config should be applied to both defaults and embedding
6469                if let Some(ef) = embedding_function {
6470                    prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6471                    prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6472                } else {
6473                    // Property: when embedding function is None, it should remain None
6474                    prop_assert_eq!(defaults_vi.embedding_function, None);
6475                    prop_assert_eq!(embedding_vi.embedding_function, None);
6476                }
6477            }
6478        }
6479
6480        proptest! {
6481            #[test]
6482            fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6483                schema in non_default_schema_strategy(),
6484                knn in knn_index_strategy(),
6485            ) {
6486                let default_config = match knn {
6487                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6488                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6489                };
6490
6491                let result = Schema::reconcile_schema_and_config(
6492                    Some(&schema),
6493                    Some(&default_config),
6494                    knn,
6495                )
6496                .expect("reconciliation should succeed");
6497
6498                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6499
6500                // Property: when config is default, schema values should be preserved
6501                // Test that schema defaults.float_list vector_index config is applied
6502                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6503                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6504                        if let Some(schema_space) = &schema_vi.config.space {
6505                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6506                        }
6507                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6508                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6509                        }
6510                    }
6511                }
6512
6513                // Property: schema #embedding float_list vector_index config should be applied
6514                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6515                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6516                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6517                            if let Some(schema_space) = &embedding_vi_type.config.space {
6518                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6519                            }
6520                        }
6521                    }
6522                }
6523            }
6524        }
6525    }
6526}