Skip to main content

chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11    EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12    UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18    default_batch_size, default_center_drift_threshold, default_construction_ef,
19    default_construction_ef_spann, default_initial_lambda, default_m, default_m_spann,
20    default_merge_threshold, default_nreplica_count, default_num_centers_to_merge_to,
21    default_num_samples_kmeans, default_num_threads, default_reassign_neighbor_count,
22    default_resize_factor, default_search_ef, default_search_ef_spann, default_search_nprobe,
23    default_search_rng_epsilon, default_search_rng_factor, default_space, default_split_threshold,
24    default_sync_threshold, default_write_nprobe, default_write_rng_epsilon,
25    default_write_rng_factor, ConversionError, HnswParametersFromSegmentError,
26    InternalHnswConfiguration, InternalSpannConfiguration, InternalUpdateCollectionConfiguration,
27    KnnIndex, Segment, UpdateCollectionConfiguration, CHROMA_KEY,
28};
29
30impl ChromaError for SchemaError {
31    fn code(&self) -> ErrorCodes {
32        match self {
33            // Internal errors (500)
34            // These indicate system/internal issues during schema operations
35            SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
36            SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
37            // DefaultsMismatch and ConfigurationConflict only occur during schema merge()
38            // which happens internally during compaction, not from user input
39            SchemaError::DefaultsMismatch => ErrorCodes::Internal,
40            SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
41            SchemaError::InvalidConfigurationUpdate { .. } => ErrorCodes::Internal,
42
43            // User/External errors (400)
44            // These indicate user-provided invalid input
45            SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
46            SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
47            SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
48            SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
49            SchemaError::Builder(e) => e.code(),
50        }
51    }
52}
53
54#[derive(Debug, Error)]
55pub enum SchemaError {
56    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
57    MissingIndexConfiguration { key: String, value_type: String },
58    #[error("Schema reconciliation failed: {reason}")]
59    InvalidSchema { reason: String },
60    #[error("Cannot set both collection config and schema simultaneously")]
61    ConfigAndSchemaConflict,
62    #[error("Cannot merge schemas with differing defaults")]
63    DefaultsMismatch,
64    #[error("Conflicting configuration for {context}")]
65    ConfigurationConflict { context: String },
66    #[error("Invalid HNSW configuration: {0}")]
67    InvalidHnswConfig(validator::ValidationErrors),
68    #[error("Invalid SPANN configuration: {0}")]
69    InvalidSpannConfig(validator::ValidationErrors),
70    #[error("Invalid schema input: {reason}")]
71    InvalidUserInput { reason: String },
72    #[error("Invalid configuration update: {message}")]
73    InvalidConfigurationUpdate { message: String },
74    #[error(transparent)]
75    Builder(#[from] SchemaBuilderError),
76}
77
78#[derive(Debug, Error)]
79pub enum SchemaBuilderError {
80    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
81    VectorIndexMustBeGlobal { key: String },
82    #[error("Cannot modify special key '{key}' - it is managed automatically by the system.")]
83    SpecialKeyModificationNotAllowed { key: String },
84    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
85    SparseVectorRequiresKey,
86    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
87    MultipleSparseVectorIndexes { existing_key: String },
88    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
89    VectorIndexDeletionNotSupported,
90    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91    SparseVectorIndexDeletionNotSupported,
92    #[error(
93        "Key '{key}' cannot begin with '#'. Keys starting with '#' are reserved for system use."
94    )]
95    ReservedKeyPrefix { key: String },
96    #[error("FTS index deletion is only supported on #document key.")]
97    FtsIndexDeletionOnlyOnDocument,
98    #[error("FTS index can only be enabled on #document key. Use create_index(Some(\"#document\"), FtsIndexConfig) to enable FTS.")]
99    FtsIndexOnlyOnDocument,
100}
101
102#[derive(Debug, Error)]
103pub enum FilterValidationError {
104    #[error(
105        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
106    )]
107    IndexingDisabled {
108        key: String,
109        value_type: MetadataValueType,
110    },
111    #[error("Cannot filter using full-text search because FTS indexing is disabled")]
112    FtsDisabled,
113    #[error(transparent)]
114    Schema(#[from] SchemaError),
115}
116
117impl ChromaError for SchemaBuilderError {
118    fn code(&self) -> ErrorCodes {
119        ErrorCodes::InvalidArgument
120    }
121}
122
123impl ChromaError for FilterValidationError {
124    fn code(&self) -> ErrorCodes {
125        match self {
126            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
127            FilterValidationError::FtsDisabled => ErrorCodes::InvalidArgument,
128            FilterValidationError::Schema(_) => ErrorCodes::Internal,
129        }
130    }
131}
132
133// ============================================================================
134// SCHEMA CONSTANTS
135// ============================================================================
136// These constants must match the Python constants in chromadb/api/types.py
137
138// Value type name constants
139pub const STRING_VALUE_NAME: &str = "string";
140pub const INT_VALUE_NAME: &str = "int";
141pub const BOOL_VALUE_NAME: &str = "bool";
142pub const FLOAT_VALUE_NAME: &str = "float";
143pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
144pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
145
146// Index type name constants
147pub const FTS_INDEX_NAME: &str = "fts_index";
148pub const VECTOR_INDEX_NAME: &str = "vector_index";
149pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
150pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
151pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
152pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
153pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
154
155// Special metadata keys - must match Python constants in chromadb/api/types.py
156pub const DOCUMENT_KEY: &str = "#document";
157pub const EMBEDDING_KEY: &str = "#embedding";
158
159// Static regex pattern to validate CMEK for GCP
160static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
161    Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
162        .expect("The CMEK pattern for GCP should be valid")
163});
164
165/// Customer-managed encryption key for storage encryption.
166///
167/// CMEK allows you to use your own encryption keys managed by cloud providers'
168/// key management services (KMS) instead of default provider-managed keys.
169#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
170#[serde(rename_all = "snake_case")]
171pub enum Cmek {
172    /// Google Cloud Platform KMS key resource name.
173    ///
174    /// Format: `projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{cryptoKey}`
175    Gcp(Arc<String>),
176}
177
178impl Cmek {
179    /// Create a GCP CMEK from a KMS resource name
180    ///
181    /// # Example
182    /// ```
183    /// use chroma_types::Cmek;
184    /// let cmek = Cmek::gcp(
185    ///     "projects/my-project/locations/us-central1/keyRings/my-ring/cryptoKeys/my-key".to_string()
186    /// );
187    /// ```
188    pub fn gcp(resource: String) -> Self {
189        Cmek::Gcp(Arc::new(resource))
190    }
191
192    /// Validates that the CMEK resource name matches the expected pattern.
193    ///
194    /// Returns `true` if the resource name is well-formed according to the
195    /// provider's format requirements. Does not verify that the key exists
196    /// or is accessible.
197    pub fn validate_pattern(&self) -> bool {
198        match self {
199            Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
200        }
201    }
202}
203
204impl TryFrom<chroma_proto::Cmek> for Cmek {
205    type Error = ConversionError;
206
207    fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
208        match proto.provider {
209            Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
210            None => Err(ConversionError::DecodeError),
211        }
212    }
213}
214
215impl From<Cmek> for chroma_proto::Cmek {
216    fn from(cmek: Cmek) -> Self {
217        match cmek {
218            Cmek::Gcp(resource) => chroma_proto::Cmek {
219                provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
220            },
221        }
222    }
223}
224
225// ============================================================================
226// SCHEMA STRUCTURES
227// ============================================================================
228
229/// Schema representation for collection index configurations
230///
231/// This represents the server-side schema structure used for index management
232
233#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
234#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
235pub struct Schema {
236    /// Default index configurations for each value type
237    pub defaults: ValueTypes,
238    /// Key-specific index overrides
239    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
240    #[serde(rename = "keys", alias = "key_overrides")]
241    pub keys: HashMap<String, ValueTypes>,
242    /// Customer-managed encryption key for collection data
243    #[serde(skip_serializing_if = "Option::is_none")]
244    #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
245    pub cmek: Option<Cmek>,
246    /// ID of the attached function that created this output collection (if applicable)
247    #[serde(skip_serializing_if = "Option::is_none")]
248    pub source_attached_function_id: Option<String>,
249}
250
251impl Schema {
252    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
253        if let Some(vector_update) = &configuration.vector_index {
254            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
255                Self::apply_vector_index_update(default_vector_index, vector_update);
256            }
257            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
258                Self::apply_vector_index_update(embedding_vector_index, vector_update);
259            }
260        }
261
262        if let Some(embedding_function) = configuration.embedding_function.as_ref() {
263            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
264                default_vector_index.config.embedding_function = Some(embedding_function.clone());
265            }
266            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
267                embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
268            }
269        }
270    }
271
272    /// Apply updates from UpdateCollectionConfiguration.
273    ///
274    /// Only supports updating:
275    /// - `spann`: SPANN configuration parameters (search_nprobe, ef_search)
276    /// - `embedding_function`: Embedding function configuration
277    ///
278    /// Returns an error if:
279    /// - `hnsw` is provided (HNSW updates are not supported)
280    /// - Schema is missing expected structure (defaults/embedding vector index or spann config)
281    pub fn apply_update_configuration(
282        &mut self,
283        config: &UpdateCollectionConfiguration,
284    ) -> Result<(), SchemaError> {
285        // HNSW updates are not allowed
286        if config.hnsw.is_some() {
287            return Err(SchemaError::InvalidConfigurationUpdate {
288                message: "HNSW configuration updates are not supported".to_string(),
289            });
290        }
291
292        // Apply spann updates
293        if let Some(ref spann_update) = config.spann {
294            let defaults_spann = self
295                .defaults_vector_index_mut()
296                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
297                    message: "schema missing defaults.float_list.vector_index".to_string(),
298                })?
299                .config
300                .spann
301                .as_mut()
302                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
303                    message: "schema missing defaults spann config".to_string(),
304                })?;
305
306            if let Some(search_nprobe) = spann_update.search_nprobe {
307                defaults_spann.search_nprobe = Some(search_nprobe);
308            }
309            if let Some(ef_search) = spann_update.ef_search {
310                defaults_spann.ef_search = Some(ef_search);
311            }
312
313            let embedding_spann = self
314                .embedding_vector_index_mut()
315                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
316                    message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
317                })?
318                .config
319                .spann
320                .as_mut()
321                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
322                    message: "schema missing #embedding spann config".to_string(),
323                })?;
324
325            if let Some(search_nprobe) = spann_update.search_nprobe {
326                embedding_spann.search_nprobe = Some(search_nprobe);
327            }
328            if let Some(ef_search) = spann_update.ef_search {
329                embedding_spann.ef_search = Some(ef_search);
330            }
331        }
332
333        // Apply embedding function updates
334        if let Some(ref ef) = config.embedding_function {
335            self.defaults_vector_index_mut()
336                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
337                    message: "schema missing defaults.float_list.vector_index".to_string(),
338                })?
339                .config
340                .embedding_function = Some(ef.clone());
341
342            self.embedding_vector_index_mut()
343                .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
344                    message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
345                })?
346                .config
347                .embedding_function = Some(ef.clone());
348        }
349
350        Ok(())
351    }
352
353    fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
354        self.defaults
355            .float_list
356            .as_mut()
357            .and_then(|float_list| float_list.vector_index.as_mut())
358    }
359
360    fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
361        self.keys
362            .get_mut(EMBEDDING_KEY)
363            .and_then(|value_types| value_types.float_list.as_mut())
364            .and_then(|float_list| float_list.vector_index.as_mut())
365    }
366
367    fn apply_vector_index_update(
368        vector_index: &mut VectorIndexType,
369        update: &UpdateVectorIndexConfiguration,
370    ) {
371        match update {
372            UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
373                if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
374                    if let Some(ef_search) = hnsw_update.ef_search {
375                        hnsw_config.ef_search = Some(ef_search);
376                    }
377                    if let Some(max_neighbors) = hnsw_update.max_neighbors {
378                        hnsw_config.max_neighbors = Some(max_neighbors);
379                    }
380                    if let Some(num_threads) = hnsw_update.num_threads {
381                        hnsw_config.num_threads = Some(num_threads);
382                    }
383                    if let Some(resize_factor) = hnsw_update.resize_factor {
384                        hnsw_config.resize_factor = Some(resize_factor);
385                    }
386                    if let Some(sync_threshold) = hnsw_update.sync_threshold {
387                        hnsw_config.sync_threshold = Some(sync_threshold);
388                    }
389                    if let Some(batch_size) = hnsw_update.batch_size {
390                        hnsw_config.batch_size = Some(batch_size);
391                    }
392                }
393            }
394            UpdateVectorIndexConfiguration::Hnsw(None) => {}
395            UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
396                if let Some(spann_config) = vector_index.config.spann.as_mut() {
397                    if let Some(search_nprobe) = spann_update.search_nprobe {
398                        spann_config.search_nprobe = Some(search_nprobe);
399                    }
400                    if let Some(ef_search) = spann_update.ef_search {
401                        spann_config.ef_search = Some(ef_search);
402                    }
403                }
404            }
405            UpdateVectorIndexConfiguration::Spann(None) => {}
406        }
407    }
408
409    pub fn is_sparse_index_enabled(&self) -> bool {
410        let defaults_enabled = self
411            .defaults
412            .sparse_vector
413            .as_ref()
414            .and_then(|sv| sv.sparse_vector_index.as_ref())
415            .is_some_and(|idx| idx.enabled);
416        let key_enabled = self.keys.values().any(|value_types| {
417            value_types
418                .sparse_vector
419                .as_ref()
420                .and_then(|sv| sv.sparse_vector_index.as_ref())
421                .is_some_and(|idx| idx.enabled)
422        });
423        defaults_enabled || key_enabled
424    }
425
426    pub fn is_fts_enabled(&self) -> bool {
427        // Check key-specific override first, then fall back to global defaults
428        self.keys
429            .get(DOCUMENT_KEY)
430            .and_then(|vt| vt.string.as_ref())
431            .and_then(|s| s.fts_index.as_ref())
432            .or_else(|| {
433                self.defaults
434                    .string
435                    .as_ref()
436                    .and_then(|s| s.fts_index.as_ref())
437            })
438            .is_none_or(|idx| idx.enabled)
439    }
440}
441
442impl Default for Schema {
443    /// Create a default Schema that matches Python's behavior exactly.
444    ///
445    /// Python creates a Schema with:
446    /// - All inverted indexes enabled by default (string, int, float, bool)
447    /// - Vector and FTS indexes disabled in defaults
448    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
449    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
450    ///
451    /// # Examples
452    /// ```
453    /// use chroma_types::Schema;
454    ///
455    /// let schema = Schema::default();
456    /// assert!(schema.keys.contains_key("#document"));
457    /// assert!(schema.keys.contains_key("#embedding"));
458    /// ```
459    fn default() -> Self {
460        // Initialize defaults - match Python's _initialize_defaults()
461        let defaults = ValueTypes {
462            string: Some(StringValueType {
463                fts_index: Some(FtsIndexType {
464                    enabled: false,
465                    config: FtsIndexConfig {},
466                }),
467                string_inverted_index: Some(StringInvertedIndexType {
468                    enabled: true,
469                    config: StringInvertedIndexConfig {},
470                }),
471            }),
472            float_list: Some(FloatListValueType {
473                vector_index: Some(VectorIndexType {
474                    enabled: false,
475                    config: VectorIndexConfig {
476                        space: None, // Python leaves as None (resolved on serialization)
477                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
478                        source_key: None,
479                        hnsw: None,  // Python doesn't specify
480                        spann: None, // Python doesn't specify
481                    },
482                }),
483            }),
484            sparse_vector: Some(SparseVectorValueType {
485                sparse_vector_index: Some(SparseVectorIndexType {
486                    enabled: false,
487                    config: SparseVectorIndexConfig {
488                        embedding_function: None,
489                        source_key: None,
490                        bm25: None,
491                    },
492                }),
493            }),
494            int: Some(IntValueType {
495                int_inverted_index: Some(IntInvertedIndexType {
496                    enabled: true,
497                    config: IntInvertedIndexConfig {},
498                }),
499            }),
500            float: Some(FloatValueType {
501                float_inverted_index: Some(FloatInvertedIndexType {
502                    enabled: true,
503                    config: FloatInvertedIndexConfig {},
504                }),
505            }),
506            boolean: Some(BoolValueType {
507                bool_inverted_index: Some(BoolInvertedIndexType {
508                    enabled: true,
509                    config: BoolInvertedIndexConfig {},
510                }),
511            }),
512        };
513
514        // Initialize key-specific overrides - match Python's _initialize_keys()
515        let mut keys = HashMap::new();
516
517        // #document: FTS enabled, string inverted disabled
518        keys.insert(
519            DOCUMENT_KEY.to_string(),
520            ValueTypes {
521                string: Some(StringValueType {
522                    fts_index: Some(FtsIndexType {
523                        enabled: true,
524                        config: FtsIndexConfig {},
525                    }),
526                    string_inverted_index: Some(StringInvertedIndexType {
527                        enabled: false,
528                        config: StringInvertedIndexConfig {},
529                    }),
530                }),
531                ..Default::default()
532            },
533        );
534
535        // #embedding: Vector index enabled with source_key=#document
536        keys.insert(
537            EMBEDDING_KEY.to_string(),
538            ValueTypes {
539                float_list: Some(FloatListValueType {
540                    vector_index: Some(VectorIndexType {
541                        enabled: true,
542                        config: VectorIndexConfig {
543                            space: None, // Python leaves as None (resolved on serialization)
544                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
545                            source_key: Some(DOCUMENT_KEY.to_string()),
546                            hnsw: None,  // Python doesn't specify
547                            spann: None, // Python doesn't specify
548                        },
549                    }),
550                }),
551                ..Default::default()
552            },
553        );
554
555        Schema {
556            defaults,
557            keys,
558            cmek: None,
559            source_attached_function_id: None,
560        }
561    }
562}
563
564pub fn is_embedding_function_default(
565    embedding_function: &Option<EmbeddingFunctionConfiguration>,
566) -> bool {
567    match embedding_function {
568        None => true,
569        Some(embedding_function) => embedding_function.is_default(),
570    }
571}
572
573/// Check if space is default (None means default, or if present, should be default space)
574pub fn is_space_default(space: &Option<Space>) -> bool {
575    match space {
576        None => true,                     // None means default
577        Some(s) => *s == default_space(), // If present, check if it's the default space
578    }
579}
580
581/// Check if HNSW config is default
582pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
583    hnsw_config.ef_construction == Some(default_construction_ef())
584        && hnsw_config.ef_search == Some(default_search_ef())
585        && hnsw_config.max_neighbors == Some(default_m())
586        && hnsw_config.num_threads == Some(default_num_threads())
587        && hnsw_config.batch_size == Some(default_batch_size())
588        && hnsw_config.sync_threshold == Some(default_sync_threshold())
589        && hnsw_config.resize_factor == Some(default_resize_factor())
590}
591
592// ============================================================================
593// NEW STRONGLY-TYPED SCHEMA STRUCTURES
594// ============================================================================
595
596/// Strongly-typed value type configurations
597/// Contains optional configurations for each supported value type
598#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
599#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
600pub struct ValueTypes {
601    #[serde(
602        rename = "string",
603        alias = "#string",
604        skip_serializing_if = "Option::is_none"
605    )] // STRING_VALUE_NAME
606    pub string: Option<StringValueType>,
607
608    #[serde(
609        rename = "float_list",
610        alias = "#float_list",
611        skip_serializing_if = "Option::is_none"
612    )]
613    // FLOAT_LIST_VALUE_NAME
614    pub float_list: Option<FloatListValueType>,
615
616    #[serde(
617        rename = "sparse_vector",
618        alias = "#sparse_vector",
619        skip_serializing_if = "Option::is_none"
620    )]
621    // SPARSE_VECTOR_VALUE_NAME
622    pub sparse_vector: Option<SparseVectorValueType>,
623
624    #[serde(
625        rename = "int",
626        alias = "#int",
627        skip_serializing_if = "Option::is_none"
628    )] // INT_VALUE_NAME
629    pub int: Option<IntValueType>,
630
631    #[serde(
632        rename = "float",
633        alias = "#float",
634        skip_serializing_if = "Option::is_none"
635    )] // FLOAT_VALUE_NAME
636    pub float: Option<FloatValueType>,
637
638    #[serde(
639        rename = "bool",
640        alias = "#bool",
641        skip_serializing_if = "Option::is_none"
642    )] // BOOL_VALUE_NAME
643    pub boolean: Option<BoolValueType>,
644}
645
646/// String value type index configurations
647#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
648#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
649pub struct StringValueType {
650    #[serde(
651        rename = "fts_index",
652        alias = "$fts_index",
653        skip_serializing_if = "Option::is_none"
654    )] // FTS_INDEX_NAME
655    pub fts_index: Option<FtsIndexType>,
656
657    #[serde(
658        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
659        alias = "$string_inverted_index",
660        skip_serializing_if = "Option::is_none"
661    )]
662    pub string_inverted_index: Option<StringInvertedIndexType>,
663}
664
665/// Float list value type index configurations (for vectors)
666#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
667#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
668pub struct FloatListValueType {
669    #[serde(
670        rename = "vector_index",
671        alias = "$vector_index",
672        skip_serializing_if = "Option::is_none"
673    )] // VECTOR_INDEX_NAME
674    pub vector_index: Option<VectorIndexType>,
675}
676
677/// Sparse vector value type index configurations
678#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
679#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
680pub struct SparseVectorValueType {
681    #[serde(
682        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
683        alias = "$sparse_vector_index",
684        skip_serializing_if = "Option::is_none"
685    )]
686    pub sparse_vector_index: Option<SparseVectorIndexType>,
687}
688
689/// Integer value type index configurations
690#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
691#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
692pub struct IntValueType {
693    #[serde(
694        rename = "int_inverted_index",
695        alias = "$int_inverted_index",
696        skip_serializing_if = "Option::is_none"
697    )]
698    // INT_INVERTED_INDEX_NAME
699    pub int_inverted_index: Option<IntInvertedIndexType>,
700}
701
702/// Float value type index configurations
703#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
704#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
705pub struct FloatValueType {
706    #[serde(
707        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
708        alias = "$float_inverted_index",
709        skip_serializing_if = "Option::is_none"
710    )]
711    pub float_inverted_index: Option<FloatInvertedIndexType>,
712}
713
714/// Boolean value type index configurations
715#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
716#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
717pub struct BoolValueType {
718    #[serde(
719        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
720        alias = "$bool_inverted_index",
721        skip_serializing_if = "Option::is_none"
722    )]
723    pub bool_inverted_index: Option<BoolInvertedIndexType>,
724}
725
726// Individual index type structs with enabled status and config
727#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
728#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
729pub struct FtsIndexType {
730    pub enabled: bool,
731    pub config: FtsIndexConfig,
732}
733
734#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
735#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
736pub struct VectorIndexType {
737    pub enabled: bool,
738    pub config: VectorIndexConfig,
739}
740
741#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
742#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
743pub struct SparseVectorIndexType {
744    pub enabled: bool,
745    pub config: SparseVectorIndexConfig,
746}
747
748#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
749#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
750pub struct StringInvertedIndexType {
751    pub enabled: bool,
752    pub config: StringInvertedIndexConfig,
753}
754
755#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
756#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
757pub struct IntInvertedIndexType {
758    pub enabled: bool,
759    pub config: IntInvertedIndexConfig,
760}
761
762#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
763#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
764pub struct FloatInvertedIndexType {
765    pub enabled: bool,
766    pub config: FloatInvertedIndexConfig,
767}
768
769#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
770#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
771pub struct BoolInvertedIndexType {
772    pub enabled: bool,
773    pub config: BoolInvertedIndexConfig,
774}
775
776impl Schema {
777    /// Create a new Schema with strongly-typed default configurations
778    pub fn new_default(default_knn_index: KnnIndex) -> Self {
779        // Vector index disabled on all keys except #embedding.
780        let vector_config = VectorIndexType {
781            enabled: false,
782            config: VectorIndexConfig {
783                space: Some(default_space()),
784                embedding_function: None,
785                source_key: None,
786                hnsw: match default_knn_index {
787                    KnnIndex::Hnsw => Some(HnswIndexConfig {
788                        ef_construction: Some(default_construction_ef()),
789                        max_neighbors: Some(default_m()),
790                        ef_search: Some(default_search_ef()),
791                        num_threads: Some(default_num_threads()),
792                        batch_size: Some(default_batch_size()),
793                        sync_threshold: Some(default_sync_threshold()),
794                        resize_factor: Some(default_resize_factor()),
795                    }),
796                    KnnIndex::Spann => None,
797                },
798                spann: match default_knn_index {
799                    KnnIndex::Hnsw => None,
800                    KnnIndex::Spann => Some(SpannIndexConfig {
801                        search_nprobe: Some(default_search_nprobe()),
802                        search_rng_factor: Some(default_search_rng_factor()),
803                        search_rng_epsilon: Some(default_search_rng_epsilon()),
804                        nreplica_count: Some(default_nreplica_count()),
805                        write_rng_factor: Some(default_write_rng_factor()),
806                        write_rng_epsilon: Some(default_write_rng_epsilon()),
807                        split_threshold: Some(default_split_threshold()),
808                        num_samples_kmeans: Some(default_num_samples_kmeans()),
809                        initial_lambda: Some(default_initial_lambda()),
810                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
811                        merge_threshold: Some(default_merge_threshold()),
812                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
813                        write_nprobe: Some(default_write_nprobe()),
814                        ef_construction: Some(default_construction_ef_spann()),
815                        ef_search: Some(default_search_ef_spann()),
816                        max_neighbors: Some(default_m_spann()),
817                        center_drift_threshold: None,
818                        quantize: Quantization::None,
819                    }),
820                },
821            },
822        };
823
824        // Initialize defaults struct directly instead of using Default::default() + field assignments
825        let defaults = ValueTypes {
826            string: Some(StringValueType {
827                string_inverted_index: Some(StringInvertedIndexType {
828                    enabled: true,
829                    config: StringInvertedIndexConfig {},
830                }),
831                fts_index: Some(FtsIndexType {
832                    enabled: false,
833                    config: FtsIndexConfig {},
834                }),
835            }),
836            float: Some(FloatValueType {
837                float_inverted_index: Some(FloatInvertedIndexType {
838                    enabled: true,
839                    config: FloatInvertedIndexConfig {},
840                }),
841            }),
842            int: Some(IntValueType {
843                int_inverted_index: Some(IntInvertedIndexType {
844                    enabled: true,
845                    config: IntInvertedIndexConfig {},
846                }),
847            }),
848            boolean: Some(BoolValueType {
849                bool_inverted_index: Some(BoolInvertedIndexType {
850                    enabled: true,
851                    config: BoolInvertedIndexConfig {},
852                }),
853            }),
854            float_list: Some(FloatListValueType {
855                vector_index: Some(vector_config),
856            }),
857            sparse_vector: Some(SparseVectorValueType {
858                sparse_vector_index: Some(SparseVectorIndexType {
859                    enabled: false,
860                    config: SparseVectorIndexConfig {
861                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
862                        source_key: None,
863                        bm25: Some(false),
864                    },
865                }),
866            }),
867        };
868
869        // Set up key overrides
870        let mut keys = HashMap::new();
871
872        // Enable vector index for #embedding.
873        let embedding_defaults = ValueTypes {
874            float_list: Some(FloatListValueType {
875                vector_index: Some(VectorIndexType {
876                    enabled: true,
877                    config: VectorIndexConfig {
878                        space: Some(default_space()),
879                        embedding_function: None,
880                        source_key: Some(DOCUMENT_KEY.to_string()),
881                        hnsw: match default_knn_index {
882                            KnnIndex::Hnsw => Some(HnswIndexConfig {
883                                ef_construction: Some(default_construction_ef()),
884                                max_neighbors: Some(default_m()),
885                                ef_search: Some(default_search_ef()),
886                                num_threads: Some(default_num_threads()),
887                                batch_size: Some(default_batch_size()),
888                                sync_threshold: Some(default_sync_threshold()),
889                                resize_factor: Some(default_resize_factor()),
890                            }),
891                            KnnIndex::Spann => None,
892                        },
893                        spann: match default_knn_index {
894                            KnnIndex::Hnsw => None,
895                            KnnIndex::Spann => Some(SpannIndexConfig {
896                                search_nprobe: Some(default_search_nprobe()),
897                                search_rng_factor: Some(default_search_rng_factor()),
898                                search_rng_epsilon: Some(default_search_rng_epsilon()),
899                                nreplica_count: Some(default_nreplica_count()),
900                                write_rng_factor: Some(default_write_rng_factor()),
901                                write_rng_epsilon: Some(default_write_rng_epsilon()),
902                                split_threshold: Some(default_split_threshold()),
903                                num_samples_kmeans: Some(default_num_samples_kmeans()),
904                                initial_lambda: Some(default_initial_lambda()),
905                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
906                                merge_threshold: Some(default_merge_threshold()),
907                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
908                                write_nprobe: Some(default_write_nprobe()),
909                                ef_construction: Some(default_construction_ef_spann()),
910                                ef_search: Some(default_search_ef_spann()),
911                                max_neighbors: Some(default_m_spann()),
912                                center_drift_threshold: None,
913                                quantize: Quantization::None,
914                            }),
915                        },
916                    },
917                }),
918            }),
919            ..Default::default()
920        };
921        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
922
923        // Document defaults - initialize directly instead of Default::default() + field assignment
924        let document_defaults = ValueTypes {
925            string: Some(StringValueType {
926                fts_index: Some(FtsIndexType {
927                    enabled: true,
928                    config: FtsIndexConfig {},
929                }),
930                string_inverted_index: Some(StringInvertedIndexType {
931                    enabled: false,
932                    config: StringInvertedIndexConfig {},
933                }),
934            }),
935            ..Default::default()
936        };
937        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
938
939        Schema {
940            defaults,
941            keys,
942            cmek: None,
943            source_attached_function_id: None,
944        }
945    }
946
947    pub fn get_spann_config(&self) -> Option<(SpannIndexConfig, Space)> {
948        let extract = |vector_index: &VectorIndexType| {
949            let space = vector_index.config.space.clone().unwrap_or_default();
950            vector_index
951                .config
952                .spann
953                .clone()
954                .map(|config| (config, space))
955        };
956
957        self.keys
958            .get(EMBEDDING_KEY)
959            .and_then(|value_types| value_types.float_list.as_ref())
960            .and_then(|float_list| float_list.vector_index.as_ref())
961            .and_then(extract)
962            .or_else(|| {
963                self.defaults
964                    .float_list
965                    .as_ref()
966                    .and_then(|float_list| float_list.vector_index.as_ref())
967                    .and_then(extract)
968            })
969    }
970
971    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
972        let to_internal = |vector_index: &VectorIndexType| {
973            let space = vector_index.config.space.clone();
974            vector_index
975                .config
976                .spann
977                .clone()
978                .map(|config| (space.as_ref(), &config).into())
979        };
980
981        self.keys
982            .get(EMBEDDING_KEY)
983            .and_then(|value_types| value_types.float_list.as_ref())
984            .and_then(|float_list| float_list.vector_index.as_ref())
985            .and_then(to_internal)
986            .or_else(|| {
987                self.defaults
988                    .float_list
989                    .as_ref()
990                    .and_then(|float_list| float_list.vector_index.as_ref())
991                    .and_then(to_internal)
992            })
993    }
994
995    /// Check if quantization is enabled in the SPANN index configuration
996    pub fn is_quantization_enabled(&self) -> bool {
997        let check_spann = |vector_index: &VectorIndexType| {
998            vector_index
999                .config
1000                .spann
1001                .as_ref()
1002                .is_some_and(|config| !matches!(config.quantize, Quantization::None))
1003        };
1004
1005        self.keys
1006            .get(EMBEDDING_KEY)
1007            .and_then(|value_types| value_types.float_list.as_ref())
1008            .and_then(|float_list| float_list.vector_index.as_ref())
1009            .map(check_spann)
1010            .unwrap_or_else(|| {
1011                self.defaults
1012                    .float_list
1013                    .as_ref()
1014                    .and_then(|float_list| float_list.vector_index.as_ref())
1015                    .map(check_spann)
1016                    .unwrap_or(false)
1017            })
1018    }
1019
1020    /// Get a mutable reference to the SPANN index configuration
1021    /// Checks the #embedding key first, then falls back to defaults
1022    pub fn get_spann_config_mut(&mut self) -> Option<&mut SpannIndexConfig> {
1023        // Try #embedding key first
1024        if let Some(value_types) = self.keys.get_mut(EMBEDDING_KEY) {
1025            if let Some(float_list) = &mut value_types.float_list {
1026                if let Some(vector_index) = &mut float_list.vector_index {
1027                    if let Some(spann_config) = &mut vector_index.config.spann {
1028                        return Some(spann_config);
1029                    }
1030                }
1031            }
1032        }
1033
1034        // Fall back to defaults
1035        if let Some(float_list) = &mut self.defaults.float_list {
1036            if let Some(vector_index) = &mut float_list.vector_index {
1037                if let Some(spann_config) = &mut vector_index.config.spann {
1038                    return Some(spann_config);
1039                }
1040            }
1041        }
1042
1043        None
1044    }
1045
1046    /// Set the quantization variant and apply impl-specific SPANN config defaults.
1047    /// Note: this intentionally skips `SpannIndexConfig::validate()` because the
1048    /// hardcoded quantization defaults (e.g. split_threshold=512) exceed the
1049    /// user-facing validation ranges. Those ranges gate user input only;
1050    /// programmatic defaults set here are known-good constants.
1051    pub fn quantize(&mut self, variant: Quantization) {
1052        if let Some(spann_config) = self.get_spann_config_mut() {
1053            *spann_config = match variant {
1054                Quantization::None => SpannIndexConfig {
1055                    quantize: variant,
1056                    ..*spann_config
1057                },
1058                Quantization::FourBitRabitQWithUSearch => SpannIndexConfig {
1059                    search_nprobe: Some(64),
1060                    nreplica_count: Some(2),
1061                    write_rng_factor: Some(4.0),
1062                    write_rng_epsilon: Some(8.0),
1063                    split_threshold: Some(512),
1064                    reassign_neighbor_count: Some(32),
1065                    merge_threshold: Some(128),
1066                    write_nprobe: Some(64),
1067                    ef_construction: Some(256),
1068                    ef_search: Some(128),
1069                    max_neighbors: Some(24),
1070                    center_drift_threshold: Some(0.125),
1071                    quantize: variant,
1072                    ..*spann_config
1073                },
1074            };
1075        }
1076    }
1077
1078    pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
1079        let to_internal = |vector_index: &VectorIndexType| {
1080            if vector_index.config.spann.is_some() {
1081                return None;
1082            }
1083            let space = vector_index.config.space.as_ref();
1084            let hnsw_config = vector_index.config.hnsw.as_ref();
1085            Some((space, hnsw_config).into())
1086        };
1087
1088        self.keys
1089            .get(EMBEDDING_KEY)
1090            .and_then(|value_types| value_types.float_list.as_ref())
1091            .and_then(|float_list| float_list.vector_index.as_ref())
1092            .and_then(to_internal)
1093            .or_else(|| {
1094                self.defaults
1095                    .float_list
1096                    .as_ref()
1097                    .and_then(|float_list| float_list.vector_index.as_ref())
1098                    .and_then(to_internal)
1099            })
1100    }
1101
1102    pub fn get_internal_hnsw_config_with_legacy_fallback(
1103        &self,
1104        segment: &Segment,
1105    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
1106        if let Some(config) = self.get_internal_hnsw_config() {
1107            let config_from_metadata =
1108                InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
1109
1110            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
1111                return Ok(Some(config_from_metadata));
1112            }
1113
1114            return Ok(Some(config));
1115        }
1116
1117        Ok(None)
1118    }
1119
1120    /// Reconcile user-provided schema with system defaults
1121    ///
1122    /// This method merges user configurations with system defaults, ensuring that:
1123    /// - User overrides take precedence over defaults
1124    /// - Missing user configurations fall back to system defaults
1125    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
1126    pub fn reconcile_with_defaults(
1127        user_schema: Option<&Schema>,
1128        knn_index: KnnIndex,
1129    ) -> Result<Self, SchemaError> {
1130        let default_schema = Schema::new_default(knn_index);
1131
1132        match user_schema {
1133            Some(user) => {
1134                // Merge defaults with user overrides
1135                let merged_defaults =
1136                    Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
1137
1138                // Merge key overrides
1139                let mut merged_keys = default_schema.keys.clone();
1140                for (key, user_value_types) in &user.keys {
1141                    if let Some(default_value_types) = merged_keys.get(key) {
1142                        // Merge with existing default key override
1143                        let merged_value_types = Self::merge_value_types(
1144                            default_value_types,
1145                            user_value_types,
1146                            knn_index,
1147                        )?;
1148                        merged_keys.insert(key.clone(), merged_value_types);
1149                    } else {
1150                        // New key override from user
1151                        merged_keys.insert(key.clone(), user_value_types.clone());
1152                    }
1153                }
1154
1155                Ok(Schema {
1156                    defaults: merged_defaults,
1157                    keys: merged_keys,
1158                    cmek: user.cmek.clone().or(default_schema.cmek.clone()),
1159                    source_attached_function_id: user
1160                        .source_attached_function_id
1161                        .clone()
1162                        .or(default_schema.source_attached_function_id.clone()),
1163                })
1164            }
1165            None => Ok(default_schema),
1166        }
1167    }
1168
1169    /// Merge two schemas together, combining key overrides when possible.
1170    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
1171        if self.defaults != other.defaults {
1172            return Err(SchemaError::DefaultsMismatch);
1173        }
1174
1175        let mut keys = self.keys.clone();
1176
1177        for (key, other_value_types) in &other.keys {
1178            if let Some(existing) = keys.get(key).cloned() {
1179                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
1180                keys.insert(key.clone(), merged);
1181            } else {
1182                keys.insert(key.clone(), other_value_types.clone());
1183            }
1184        }
1185
1186        Ok(Schema {
1187            defaults: self.defaults.clone(),
1188            keys,
1189            cmek: other.cmek.clone().or(self.cmek.clone()),
1190            source_attached_function_id: other
1191                .source_attached_function_id
1192                .clone()
1193                .or(self.source_attached_function_id.clone()),
1194        })
1195    }
1196
1197    fn merge_override_value_types(
1198        key: &str,
1199        left: &ValueTypes,
1200        right: &ValueTypes,
1201    ) -> Result<ValueTypes, SchemaError> {
1202        Ok(ValueTypes {
1203            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
1204            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
1205            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
1206            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
1207            float_list: Self::merge_float_list_override(
1208                key,
1209                left.float_list.as_ref(),
1210                right.float_list.as_ref(),
1211            )?,
1212            sparse_vector: Self::merge_sparse_vector_override(
1213                key,
1214                left.sparse_vector.as_ref(),
1215                right.sparse_vector.as_ref(),
1216            )?,
1217        })
1218    }
1219
1220    fn merge_string_override(
1221        key: &str,
1222        left: Option<&StringValueType>,
1223        right: Option<&StringValueType>,
1224    ) -> Result<Option<StringValueType>, SchemaError> {
1225        match (left, right) {
1226            (Some(l), Some(r)) => Ok(Some(StringValueType {
1227                string_inverted_index: Self::merge_index_or_error(
1228                    l.string_inverted_index.as_ref(),
1229                    r.string_inverted_index.as_ref(),
1230                    &format!("key '{key}' string.string_inverted_index"),
1231                )?,
1232                fts_index: Self::merge_index_or_error(
1233                    l.fts_index.as_ref(),
1234                    r.fts_index.as_ref(),
1235                    &format!("key '{key}' string.fts_index"),
1236                )?,
1237            })),
1238            (Some(l), None) => Ok(Some(l.clone())),
1239            (None, Some(r)) => Ok(Some(r.clone())),
1240            (None, None) => Ok(None),
1241        }
1242    }
1243
1244    fn merge_float_override(
1245        key: &str,
1246        left: Option<&FloatValueType>,
1247        right: Option<&FloatValueType>,
1248    ) -> Result<Option<FloatValueType>, SchemaError> {
1249        match (left, right) {
1250            (Some(l), Some(r)) => Ok(Some(FloatValueType {
1251                float_inverted_index: Self::merge_index_or_error(
1252                    l.float_inverted_index.as_ref(),
1253                    r.float_inverted_index.as_ref(),
1254                    &format!("key '{key}' float.float_inverted_index"),
1255                )?,
1256            })),
1257            (Some(l), None) => Ok(Some(l.clone())),
1258            (None, Some(r)) => Ok(Some(r.clone())),
1259            (None, None) => Ok(None),
1260        }
1261    }
1262
1263    fn merge_int_override(
1264        key: &str,
1265        left: Option<&IntValueType>,
1266        right: Option<&IntValueType>,
1267    ) -> Result<Option<IntValueType>, SchemaError> {
1268        match (left, right) {
1269            (Some(l), Some(r)) => Ok(Some(IntValueType {
1270                int_inverted_index: Self::merge_index_or_error(
1271                    l.int_inverted_index.as_ref(),
1272                    r.int_inverted_index.as_ref(),
1273                    &format!("key '{key}' int.int_inverted_index"),
1274                )?,
1275            })),
1276            (Some(l), None) => Ok(Some(l.clone())),
1277            (None, Some(r)) => Ok(Some(r.clone())),
1278            (None, None) => Ok(None),
1279        }
1280    }
1281
1282    fn merge_bool_override(
1283        key: &str,
1284        left: Option<&BoolValueType>,
1285        right: Option<&BoolValueType>,
1286    ) -> Result<Option<BoolValueType>, SchemaError> {
1287        match (left, right) {
1288            (Some(l), Some(r)) => Ok(Some(BoolValueType {
1289                bool_inverted_index: Self::merge_index_or_error(
1290                    l.bool_inverted_index.as_ref(),
1291                    r.bool_inverted_index.as_ref(),
1292                    &format!("key '{key}' bool.bool_inverted_index"),
1293                )?,
1294            })),
1295            (Some(l), None) => Ok(Some(l.clone())),
1296            (None, Some(r)) => Ok(Some(r.clone())),
1297            (None, None) => Ok(None),
1298        }
1299    }
1300
1301    fn merge_float_list_override(
1302        key: &str,
1303        left: Option<&FloatListValueType>,
1304        right: Option<&FloatListValueType>,
1305    ) -> Result<Option<FloatListValueType>, SchemaError> {
1306        match (left, right) {
1307            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1308                vector_index: Self::merge_index_or_error(
1309                    l.vector_index.as_ref(),
1310                    r.vector_index.as_ref(),
1311                    &format!("key '{key}' float_list.vector_index"),
1312                )?,
1313            })),
1314            (Some(l), None) => Ok(Some(l.clone())),
1315            (None, Some(r)) => Ok(Some(r.clone())),
1316            (None, None) => Ok(None),
1317        }
1318    }
1319
1320    fn merge_sparse_vector_override(
1321        key: &str,
1322        left: Option<&SparseVectorValueType>,
1323        right: Option<&SparseVectorValueType>,
1324    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1325        match (left, right) {
1326            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1327                sparse_vector_index: Self::merge_index_or_error(
1328                    l.sparse_vector_index.as_ref(),
1329                    r.sparse_vector_index.as_ref(),
1330                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
1331                )?,
1332            })),
1333            (Some(l), None) => Ok(Some(l.clone())),
1334            (None, Some(r)) => Ok(Some(r.clone())),
1335            (None, None) => Ok(None),
1336        }
1337    }
1338
1339    fn merge_index_or_error<T: Clone + PartialEq>(
1340        left: Option<&T>,
1341        right: Option<&T>,
1342        context: &str,
1343    ) -> Result<Option<T>, SchemaError> {
1344        match (left, right) {
1345            (Some(l), Some(r)) => {
1346                if l == r {
1347                    Ok(Some(l.clone()))
1348                } else {
1349                    Err(SchemaError::ConfigurationConflict {
1350                        context: context.to_string(),
1351                    })
1352                }
1353            }
1354            (Some(l), None) => Ok(Some(l.clone())),
1355            (None, Some(r)) => Ok(Some(r.clone())),
1356            (None, None) => Ok(None),
1357        }
1358    }
1359
1360    /// Merge two ValueTypes with field-level merging
1361    /// User values take precedence over default values
1362    fn merge_value_types(
1363        default: &ValueTypes,
1364        user: &ValueTypes,
1365        knn_index: KnnIndex,
1366    ) -> Result<ValueTypes, SchemaError> {
1367        // Merge float_list first
1368        let float_list = Self::merge_float_list_type(
1369            default.float_list.as_ref(),
1370            user.float_list.as_ref(),
1371            knn_index,
1372        )?;
1373
1374        // Validate the merged float_list (covers all merge cases)
1375        if let Some(ref fl) = float_list {
1376            Self::validate_float_list_value_type(fl)?;
1377        }
1378
1379        Ok(ValueTypes {
1380            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1381            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1382            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1383            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1384            float_list,
1385            sparse_vector: Self::merge_sparse_vector_type(
1386                default.sparse_vector.as_ref(),
1387                user.sparse_vector.as_ref(),
1388            )?,
1389        })
1390    }
1391
1392    /// Merge StringValueType configurations
1393    fn merge_string_type(
1394        default: Option<&StringValueType>,
1395        user: Option<&StringValueType>,
1396    ) -> Result<Option<StringValueType>, SchemaError> {
1397        match (default, user) {
1398            (Some(default), Some(user)) => Ok(Some(StringValueType {
1399                string_inverted_index: Self::merge_string_inverted_index_type(
1400                    default.string_inverted_index.as_ref(),
1401                    user.string_inverted_index.as_ref(),
1402                )?,
1403                fts_index: Self::merge_fts_index_type(
1404                    default.fts_index.as_ref(),
1405                    user.fts_index.as_ref(),
1406                )?,
1407            })),
1408            (Some(default), None) => Ok(Some(default.clone())),
1409            (None, Some(user)) => Ok(Some(user.clone())),
1410            (None, None) => Ok(None),
1411        }
1412    }
1413
1414    /// Merge FloatValueType configurations
1415    fn merge_float_type(
1416        default: Option<&FloatValueType>,
1417        user: Option<&FloatValueType>,
1418    ) -> Result<Option<FloatValueType>, SchemaError> {
1419        match (default, user) {
1420            (Some(default), Some(user)) => Ok(Some(FloatValueType {
1421                float_inverted_index: Self::merge_float_inverted_index_type(
1422                    default.float_inverted_index.as_ref(),
1423                    user.float_inverted_index.as_ref(),
1424                )?,
1425            })),
1426            (Some(default), None) => Ok(Some(default.clone())),
1427            (None, Some(user)) => Ok(Some(user.clone())),
1428            (None, None) => Ok(None),
1429        }
1430    }
1431
1432    /// Merge IntValueType configurations
1433    fn merge_int_type(
1434        default: Option<&IntValueType>,
1435        user: Option<&IntValueType>,
1436    ) -> Result<Option<IntValueType>, SchemaError> {
1437        match (default, user) {
1438            (Some(default), Some(user)) => Ok(Some(IntValueType {
1439                int_inverted_index: Self::merge_int_inverted_index_type(
1440                    default.int_inverted_index.as_ref(),
1441                    user.int_inverted_index.as_ref(),
1442                )?,
1443            })),
1444            (Some(default), None) => Ok(Some(default.clone())),
1445            (None, Some(user)) => Ok(Some(user.clone())),
1446            (None, None) => Ok(None),
1447        }
1448    }
1449
1450    /// Merge BoolValueType configurations
1451    fn merge_bool_type(
1452        default: Option<&BoolValueType>,
1453        user: Option<&BoolValueType>,
1454    ) -> Result<Option<BoolValueType>, SchemaError> {
1455        match (default, user) {
1456            (Some(default), Some(user)) => Ok(Some(BoolValueType {
1457                bool_inverted_index: Self::merge_bool_inverted_index_type(
1458                    default.bool_inverted_index.as_ref(),
1459                    user.bool_inverted_index.as_ref(),
1460                )?,
1461            })),
1462            (Some(default), None) => Ok(Some(default.clone())),
1463            (None, Some(user)) => Ok(Some(user.clone())),
1464            (None, None) => Ok(None),
1465        }
1466    }
1467
1468    /// Merge FloatListValueType configurations
1469    fn merge_float_list_type(
1470        default: Option<&FloatListValueType>,
1471        user: Option<&FloatListValueType>,
1472        knn_index: KnnIndex,
1473    ) -> Result<Option<FloatListValueType>, SchemaError> {
1474        match (default, user) {
1475            (Some(default), Some(user)) => Ok(Some(FloatListValueType {
1476                vector_index: Self::merge_vector_index_type(
1477                    default.vector_index.as_ref(),
1478                    user.vector_index.as_ref(),
1479                    knn_index,
1480                )?,
1481            })),
1482            (Some(default), None) => Ok(Some(default.clone())),
1483            (None, Some(user)) => Ok(Some(user.clone())),
1484            (None, None) => Ok(None),
1485        }
1486    }
1487
1488    /// Merge SparseVectorValueType configurations
1489    fn merge_sparse_vector_type(
1490        default: Option<&SparseVectorValueType>,
1491        user: Option<&SparseVectorValueType>,
1492    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1493        match (default, user) {
1494            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1495                sparse_vector_index: Self::merge_sparse_vector_index_type(
1496                    default.sparse_vector_index.as_ref(),
1497                    user.sparse_vector_index.as_ref(),
1498                )?,
1499            })),
1500            (Some(default), None) => Ok(Some(default.clone())),
1501            (None, Some(user)) => Ok(Some(user.clone())),
1502            (None, None) => Ok(None),
1503        }
1504    }
1505
1506    /// Merge individual index type configurations
1507    fn merge_string_inverted_index_type(
1508        default: Option<&StringInvertedIndexType>,
1509        user: Option<&StringInvertedIndexType>,
1510    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1511        match (default, user) {
1512            (Some(_default), Some(user)) => {
1513                Ok(Some(StringInvertedIndexType {
1514                    enabled: user.enabled,       // User enabled state takes precedence
1515                    config: user.config.clone(), // User config takes precedence
1516                }))
1517            }
1518            (Some(default), None) => Ok(Some(default.clone())),
1519            (None, Some(user)) => Ok(Some(user.clone())),
1520            (None, None) => Ok(None),
1521        }
1522    }
1523
1524    fn merge_fts_index_type(
1525        default: Option<&FtsIndexType>,
1526        user: Option<&FtsIndexType>,
1527    ) -> Result<Option<FtsIndexType>, SchemaError> {
1528        match (default, user) {
1529            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1530                enabled: user.enabled,
1531                config: user.config.clone(),
1532            })),
1533            (Some(default), None) => Ok(Some(default.clone())),
1534            (None, Some(user)) => Ok(Some(user.clone())),
1535            (None, None) => Ok(None),
1536        }
1537    }
1538
1539    fn merge_float_inverted_index_type(
1540        default: Option<&FloatInvertedIndexType>,
1541        user: Option<&FloatInvertedIndexType>,
1542    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1543        match (default, user) {
1544            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1545                enabled: user.enabled,
1546                config: user.config.clone(),
1547            })),
1548            (Some(default), None) => Ok(Some(default.clone())),
1549            (None, Some(user)) => Ok(Some(user.clone())),
1550            (None, None) => Ok(None),
1551        }
1552    }
1553
1554    fn merge_int_inverted_index_type(
1555        default: Option<&IntInvertedIndexType>,
1556        user: Option<&IntInvertedIndexType>,
1557    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1558        match (default, user) {
1559            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1560                enabled: user.enabled,
1561                config: user.config.clone(),
1562            })),
1563            (Some(default), None) => Ok(Some(default.clone())),
1564            (None, Some(user)) => Ok(Some(user.clone())),
1565            (None, None) => Ok(None),
1566        }
1567    }
1568
1569    fn merge_bool_inverted_index_type(
1570        default: Option<&BoolInvertedIndexType>,
1571        user: Option<&BoolInvertedIndexType>,
1572    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1573        match (default, user) {
1574            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1575                enabled: user.enabled,
1576                config: user.config.clone(),
1577            })),
1578            (Some(default), None) => Ok(Some(default.clone())),
1579            (None, Some(user)) => Ok(Some(user.clone())),
1580            (None, None) => Ok(None),
1581        }
1582    }
1583
1584    fn merge_vector_index_type(
1585        default: Option<&VectorIndexType>,
1586        user: Option<&VectorIndexType>,
1587        knn_index: KnnIndex,
1588    ) -> Result<Option<VectorIndexType>, SchemaError> {
1589        match (default, user) {
1590            (Some(default), Some(user)) => Ok(Some(VectorIndexType {
1591                enabled: user.enabled,
1592                config: Self::merge_vector_index_config(&default.config, &user.config, knn_index)?,
1593            })),
1594            (Some(default), None) => Ok(Some(default.clone())),
1595            (None, Some(user)) => Ok(Some(user.clone())),
1596            (None, None) => Ok(None),
1597        }
1598    }
1599
1600    fn merge_sparse_vector_index_type(
1601        default: Option<&SparseVectorIndexType>,
1602        user: Option<&SparseVectorIndexType>,
1603    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1604        match (default, user) {
1605            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1606                enabled: user.enabled,
1607                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1608            })),
1609            (Some(default), None) => Ok(Some(default.clone())),
1610            (None, Some(user)) => Ok(Some(user.clone())),
1611            (None, None) => Ok(None),
1612        }
1613    }
1614
1615    /// Validate FloatListValueType vector index configurations
1616    /// This validates HNSW and SPANN configs within the merged float_list
1617    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1618        if let Some(vector_index) = &float_list.vector_index {
1619            if let Some(hnsw) = &vector_index.config.hnsw {
1620                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1621            }
1622            if let Some(spann) = &vector_index.config.spann {
1623                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1624            }
1625        }
1626        Ok(())
1627    }
1628
1629    /// Merge VectorIndexConfig with field-level merging
1630    fn merge_vector_index_config(
1631        default: &VectorIndexConfig,
1632        user: &VectorIndexConfig,
1633        knn_index: KnnIndex,
1634    ) -> Result<VectorIndexConfig, SchemaError> {
1635        match knn_index {
1636            KnnIndex::Hnsw => Ok(VectorIndexConfig {
1637                space: user.space.clone().or(default.space.clone()),
1638                embedding_function: user
1639                    .embedding_function
1640                    .clone()
1641                    .or(default.embedding_function.clone()),
1642                source_key: user.source_key.clone().or(default.source_key.clone()),
1643                hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1644                spann: None,
1645            }),
1646            KnnIndex::Spann => Ok(VectorIndexConfig {
1647                space: user.space.clone().or(default.space.clone()),
1648                embedding_function: user
1649                    .embedding_function
1650                    .clone()
1651                    .or(default.embedding_function.clone()),
1652                source_key: user.source_key.clone().or(default.source_key.clone()),
1653                hnsw: None,
1654                spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref())?,
1655            }),
1656        }
1657    }
1658
1659    /// Merge SparseVectorIndexConfig with field-level merging
1660    fn merge_sparse_vector_index_config(
1661        default: &SparseVectorIndexConfig,
1662        user: &SparseVectorIndexConfig,
1663    ) -> SparseVectorIndexConfig {
1664        SparseVectorIndexConfig {
1665            embedding_function: user
1666                .embedding_function
1667                .clone()
1668                .or(default.embedding_function.clone()),
1669            source_key: user.source_key.clone().or(default.source_key.clone()),
1670            bm25: user.bm25.or(default.bm25),
1671        }
1672    }
1673
1674    /// Merge HNSW configurations with field-level merging
1675    fn merge_hnsw_configs(
1676        default_hnsw: Option<&HnswIndexConfig>,
1677        user_hnsw: Option<&HnswIndexConfig>,
1678    ) -> Option<HnswIndexConfig> {
1679        match (default_hnsw, user_hnsw) {
1680            (Some(default), Some(user)) => Some(HnswIndexConfig {
1681                ef_construction: user.ef_construction.or(default.ef_construction),
1682                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1683                ef_search: user.ef_search.or(default.ef_search),
1684                num_threads: user.num_threads.or(default.num_threads),
1685                batch_size: user.batch_size.or(default.batch_size),
1686                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1687                resize_factor: user.resize_factor.or(default.resize_factor),
1688            }),
1689            (Some(default), None) => Some(default.clone()),
1690            (None, Some(user)) => Some(user.clone()),
1691            (None, None) => None,
1692        }
1693    }
1694
1695    /// Merge SPANN configurations with field-level merging
1696    fn merge_spann_configs(
1697        default_spann: Option<&SpannIndexConfig>,
1698        user_spann: Option<&SpannIndexConfig>,
1699    ) -> Result<Option<SpannIndexConfig>, SchemaError> {
1700        match (default_spann, user_spann) {
1701            (Some(default), Some(user)) => {
1702                // Validate that quantize is always None (should only be set programmatically by frontend)
1703                if !matches!(user.quantize, Quantization::None)
1704                    || !matches!(default.quantize, Quantization::None)
1705                {
1706                    return Err(SchemaError::InvalidUserInput {
1707                        reason: "quantize field cannot be set in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1708                    });
1709                }
1710                Ok(Some(SpannIndexConfig {
1711                    search_nprobe: user.search_nprobe.or(default.search_nprobe),
1712                    search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1713                    search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1714                    nreplica_count: user.nreplica_count.or(default.nreplica_count),
1715                    write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1716                    write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1717                    split_threshold: user.split_threshold.or(default.split_threshold),
1718                    num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1719                    initial_lambda: user.initial_lambda.or(default.initial_lambda),
1720                    reassign_neighbor_count: user
1721                        .reassign_neighbor_count
1722                        .or(default.reassign_neighbor_count),
1723                    merge_threshold: user.merge_threshold.or(default.merge_threshold),
1724                    num_centers_to_merge_to: user
1725                        .num_centers_to_merge_to
1726                        .or(default.num_centers_to_merge_to),
1727                    write_nprobe: user.write_nprobe.or(default.write_nprobe),
1728                    ef_construction: user.ef_construction.or(default.ef_construction),
1729                    ef_search: user.ef_search.or(default.ef_search),
1730                    max_neighbors: user.max_neighbors.or(default.max_neighbors),
1731                    center_drift_threshold: user
1732                        .center_drift_threshold
1733                        .or(default.center_drift_threshold),
1734                    quantize: Quantization::None, // Always None - quantization is set programmatically
1735                }))
1736            }
1737            (Some(default), None) => {
1738                // Validate default is also None
1739                if !matches!(default.quantize, Quantization::None) {
1740                    return Err(SchemaError::InvalidUserInput {
1741                        reason: "quantize field cannot be set in default schema. Quantization can only be enabled via frontend configuration.".to_string(),
1742                    });
1743                }
1744                Ok(Some(default.clone()))
1745            }
1746            (None, Some(user)) => {
1747                // Validate user is None
1748                if !matches!(user.quantize, Quantization::None) {
1749                    return Err(SchemaError::InvalidUserInput {
1750                        reason: "quantize field cannot be set in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1751                    });
1752                }
1753                Ok(Some(user.clone()))
1754            }
1755            (None, None) => Ok(None),
1756        }
1757    }
1758
1759    /// Reconcile Schema with InternalCollectionConfiguration
1760    ///
1761    /// Simple reconciliation logic:
1762    /// 1. If collection config is default → return schema (schema is source of truth)
1763    /// 2. If collection config is non-default and schema is default → override schema with collection config
1764    ///
1765    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1766    pub fn reconcile_with_collection_config(
1767        schema: &Schema,
1768        collection_config: &InternalCollectionConfiguration,
1769        default_knn_index: KnnIndex,
1770    ) -> Result<Schema, SchemaError> {
1771        // 1. Check if collection config is default
1772        if collection_config.is_default() {
1773            if schema.is_default() {
1774                // if both are default, use the schema, and apply the ef from config if available
1775                // for both defaults and #embedding key
1776                let mut new_schema = Schema::new_default(default_knn_index);
1777
1778                if collection_config.embedding_function.is_some() {
1779                    if let Some(float_list) = &mut new_schema.defaults.float_list {
1780                        if let Some(vector_index) = &mut float_list.vector_index {
1781                            vector_index.config.embedding_function =
1782                                collection_config.embedding_function.clone();
1783                        }
1784                    }
1785                    if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1786                        if let Some(float_list) = &mut embedding_types.float_list {
1787                            if let Some(vector_index) = &mut float_list.vector_index {
1788                                vector_index.config.embedding_function =
1789                                    collection_config.embedding_function.clone();
1790                            }
1791                        }
1792                    }
1793                }
1794                return Ok(new_schema);
1795            } else {
1796                // Collection config is default and schema is non-default → schema is source of truth
1797                return Ok(schema.clone());
1798            }
1799        }
1800
1801        // 2. Collection config is non-default, schema must be default (already validated earlier)
1802        // Convert collection config to schema
1803        Self::try_from(collection_config)
1804    }
1805
1806    pub fn reconcile_schema_and_config(
1807        schema: Option<&Schema>,
1808        configuration: Option<&InternalCollectionConfiguration>,
1809        knn_index: KnnIndex,
1810    ) -> Result<Schema, SchemaError> {
1811        // Early validation: check if both user-provided schema and config are non-default
1812        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1813            if !user_schema.is_default() && !config.is_default() {
1814                return Err(SchemaError::ConfigAndSchemaConflict);
1815            }
1816        }
1817
1818        let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1819        if let Some(config) = configuration {
1820            Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1821        } else {
1822            Ok(reconciled_schema)
1823        }
1824    }
1825
1826    pub fn default_with_embedding_function(
1827        embedding_function: EmbeddingFunctionConfiguration,
1828    ) -> Schema {
1829        let mut schema = Schema::new_default(KnnIndex::Spann);
1830        if let Some(float_list) = &mut schema.defaults.float_list {
1831            if let Some(vector_index) = &mut float_list.vector_index {
1832                vector_index.config.embedding_function = Some(embedding_function.clone());
1833            }
1834        }
1835        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1836            if let Some(float_list) = &mut embedding_types.float_list {
1837                if let Some(vector_index) = &mut float_list.vector_index {
1838                    vector_index.config.embedding_function = Some(embedding_function);
1839                }
1840            }
1841        }
1842        schema
1843    }
1844
1845    /// Check if schema is default by checking each field individually
1846    pub fn is_default(&self) -> bool {
1847        // Check if defaults are default (field by field)
1848        if !Self::is_value_types_default(&self.defaults) {
1849            return false;
1850        }
1851
1852        for key in self.keys.keys() {
1853            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1854                return false;
1855            }
1856        }
1857
1858        // Check #embedding key
1859        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1860            if !Self::is_embedding_value_types_default(embedding_value) {
1861                return false;
1862            }
1863        }
1864
1865        // Check #document key
1866        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1867            if !Self::is_document_value_types_default(document_value) {
1868                return false;
1869            }
1870        }
1871
1872        // Check CMEK is None (default)
1873        if self.cmek.is_some() {
1874            return false;
1875        }
1876
1877        true
1878    }
1879
1880    /// Check if ValueTypes (defaults) are in default state
1881    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1882        // Check string field
1883        if let Some(string) = &value_types.string {
1884            if let Some(string_inverted) = &string.string_inverted_index {
1885                if !string_inverted.enabled {
1886                    return false;
1887                }
1888                // Config is an empty struct, so no need to check it
1889            }
1890            if let Some(fts) = &string.fts_index {
1891                if fts.enabled {
1892                    return false;
1893                }
1894                // Config is an empty struct, so no need to check it
1895            }
1896        }
1897
1898        // Check float field
1899        if let Some(float) = &value_types.float {
1900            if let Some(float_inverted) = &float.float_inverted_index {
1901                if !float_inverted.enabled {
1902                    return false;
1903                }
1904                // Config is an empty struct, so no need to check it
1905            }
1906        }
1907
1908        // Check int field
1909        if let Some(int) = &value_types.int {
1910            if let Some(int_inverted) = &int.int_inverted_index {
1911                if !int_inverted.enabled {
1912                    return false;
1913                }
1914                // Config is an empty struct, so no need to check it
1915            }
1916        }
1917
1918        // Check boolean field
1919        if let Some(boolean) = &value_types.boolean {
1920            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1921                if !bool_inverted.enabled {
1922                    return false;
1923                }
1924                // Config is an empty struct, so no need to check it
1925            }
1926        }
1927
1928        // Check float_list field (vector index should be disabled)
1929        if let Some(float_list) = &value_types.float_list {
1930            if let Some(vector_index) = &float_list.vector_index {
1931                if vector_index.enabled {
1932                    return false;
1933                }
1934                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1935                    return false;
1936                }
1937                if !is_space_default(&vector_index.config.space) {
1938                    return false;
1939                }
1940                // Check that the config has default structure
1941                if vector_index.config.source_key.is_some() {
1942                    return false;
1943                }
1944                // Check that either hnsw or spann config is present (not both, not neither)
1945                // and that the config values are default
1946                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1947                    (Some(hnsw_config), None) => {
1948                        if !hnsw_config.is_default() {
1949                            return false;
1950                        }
1951                    }
1952                    (None, Some(spann_config)) => {
1953                        if !spann_config.is_default() {
1954                            return false;
1955                        }
1956                    }
1957                    (Some(_), Some(_)) => return false, // Both present
1958                    (None, None) => {}
1959                }
1960            }
1961        }
1962
1963        // Check sparse_vector field (should be disabled)
1964        if let Some(sparse_vector) = &value_types.sparse_vector {
1965            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1966                if sparse_index.enabled {
1967                    return false;
1968                }
1969                // Check config structure
1970                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1971                    return false;
1972                }
1973                if sparse_index.config.source_key.is_some() {
1974                    return false;
1975                }
1976                if let Some(bm25) = &sparse_index.config.bm25 {
1977                    if bm25 != &false {
1978                        return false;
1979                    }
1980                }
1981            }
1982        }
1983
1984        true
1985    }
1986
1987    /// Check if ValueTypes for #embedding key are in default state
1988    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1989        // For #embedding, only float_list should be set
1990        if value_types.string.is_some()
1991            || value_types.float.is_some()
1992            || value_types.int.is_some()
1993            || value_types.boolean.is_some()
1994            || value_types.sparse_vector.is_some()
1995        {
1996            return false;
1997        }
1998
1999        // Check float_list field (vector index should be enabled)
2000        if let Some(float_list) = &value_types.float_list {
2001            if let Some(vector_index) = &float_list.vector_index {
2002                if !vector_index.enabled {
2003                    return false;
2004                }
2005                if !is_space_default(&vector_index.config.space) {
2006                    return false;
2007                }
2008                // Check that embedding_function is default
2009                if !is_embedding_function_default(&vector_index.config.embedding_function) {
2010                    return false;
2011                }
2012                // Check that source_key is #document
2013                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
2014                    return false;
2015                }
2016                // Check that either hnsw or spann config is present (not both, not neither)
2017                // and that the config values are default
2018                match (&vector_index.config.hnsw, &vector_index.config.spann) {
2019                    (Some(hnsw_config), None) => {
2020                        if !hnsw_config.is_default() {
2021                            return false;
2022                        }
2023                    }
2024                    (None, Some(spann_config)) => {
2025                        if !spann_config.is_default() {
2026                            return false;
2027                        }
2028                    }
2029                    (Some(_), Some(_)) => return false, // Both present
2030                    (None, None) => {}
2031                }
2032            }
2033        }
2034
2035        true
2036    }
2037
2038    /// Check if ValueTypes for #document key are in default state
2039    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
2040        // For #document, only string should be set
2041        if value_types.float_list.is_some()
2042            || value_types.float.is_some()
2043            || value_types.int.is_some()
2044            || value_types.boolean.is_some()
2045            || value_types.sparse_vector.is_some()
2046        {
2047            return false;
2048        }
2049
2050        // Check string field
2051        if let Some(string) = &value_types.string {
2052            if let Some(fts) = &string.fts_index {
2053                if !fts.enabled {
2054                    return false;
2055                }
2056                // Config is an empty struct, so no need to check it
2057            }
2058            if let Some(string_inverted) = &string.string_inverted_index {
2059                if string_inverted.enabled {
2060                    return false;
2061                }
2062                // Config is an empty struct, so no need to check it
2063            }
2064        }
2065
2066        true
2067    }
2068
2069    /// Check if a specific metadata key-value should be indexed based on schema configuration
2070    pub fn is_metadata_type_index_enabled(
2071        &self,
2072        key: &str,
2073        value_type: MetadataValueType,
2074    ) -> Result<bool, SchemaError> {
2075        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
2076
2077        match value_type {
2078            MetadataValueType::Bool => match &v_type.boolean {
2079                Some(bool_type) => match &bool_type.bool_inverted_index {
2080                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2081                    None => Err(SchemaError::MissingIndexConfiguration {
2082                        key: key.to_string(),
2083                        value_type: "bool".to_string(),
2084                    }),
2085                },
2086                None => match &self.defaults.boolean {
2087                    Some(bool_type) => match &bool_type.bool_inverted_index {
2088                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2089                        None => Err(SchemaError::MissingIndexConfiguration {
2090                            key: key.to_string(),
2091                            value_type: "bool".to_string(),
2092                        }),
2093                    },
2094                    None => Err(SchemaError::MissingIndexConfiguration {
2095                        key: key.to_string(),
2096                        value_type: "bool".to_string(),
2097                    }),
2098                },
2099            },
2100            MetadataValueType::Int => match &v_type.int {
2101                Some(int_type) => match &int_type.int_inverted_index {
2102                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2103                    None => Err(SchemaError::MissingIndexConfiguration {
2104                        key: key.to_string(),
2105                        value_type: "int".to_string(),
2106                    }),
2107                },
2108                None => match &self.defaults.int {
2109                    Some(int_type) => match &int_type.int_inverted_index {
2110                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2111                        None => Err(SchemaError::MissingIndexConfiguration {
2112                            key: key.to_string(),
2113                            value_type: "int".to_string(),
2114                        }),
2115                    },
2116                    None => Err(SchemaError::MissingIndexConfiguration {
2117                        key: key.to_string(),
2118                        value_type: "int".to_string(),
2119                    }),
2120                },
2121            },
2122            MetadataValueType::Float => match &v_type.float {
2123                Some(float_type) => match &float_type.float_inverted_index {
2124                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2125                    None => Err(SchemaError::MissingIndexConfiguration {
2126                        key: key.to_string(),
2127                        value_type: "float".to_string(),
2128                    }),
2129                },
2130                None => match &self.defaults.float {
2131                    Some(float_type) => match &float_type.float_inverted_index {
2132                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2133                        None => Err(SchemaError::MissingIndexConfiguration {
2134                            key: key.to_string(),
2135                            value_type: "float".to_string(),
2136                        }),
2137                    },
2138                    None => Err(SchemaError::MissingIndexConfiguration {
2139                        key: key.to_string(),
2140                        value_type: "float".to_string(),
2141                    }),
2142                },
2143            },
2144            MetadataValueType::Str => match &v_type.string {
2145                Some(string_type) => match &string_type.string_inverted_index {
2146                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2147                    None => Err(SchemaError::MissingIndexConfiguration {
2148                        key: key.to_string(),
2149                        value_type: "string".to_string(),
2150                    }),
2151                },
2152                None => match &self.defaults.string {
2153                    Some(string_type) => match &string_type.string_inverted_index {
2154                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2155                        None => Err(SchemaError::MissingIndexConfiguration {
2156                            key: key.to_string(),
2157                            value_type: "string".to_string(),
2158                        }),
2159                    },
2160                    None => Err(SchemaError::MissingIndexConfiguration {
2161                        key: key.to_string(),
2162                        value_type: "string".to_string(),
2163                    }),
2164                },
2165            },
2166            MetadataValueType::SparseVector => match &v_type.sparse_vector {
2167                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2168                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2169                    None => Err(SchemaError::MissingIndexConfiguration {
2170                        key: key.to_string(),
2171                        value_type: "sparse_vector".to_string(),
2172                    }),
2173                },
2174                None => match &self.defaults.sparse_vector {
2175                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2176                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2177                        None => Err(SchemaError::MissingIndexConfiguration {
2178                            key: key.to_string(),
2179                            value_type: "sparse_vector".to_string(),
2180                        }),
2181                    },
2182                    None => Err(SchemaError::MissingIndexConfiguration {
2183                        key: key.to_string(),
2184                        value_type: "sparse_vector".to_string(),
2185                    }),
2186                },
2187            },
2188            // Array types use the same indexes as their scalar counterparts
2189            MetadataValueType::BoolArray => {
2190                self.is_metadata_type_index_enabled(key, MetadataValueType::Bool)
2191            }
2192            MetadataValueType::IntArray => {
2193                self.is_metadata_type_index_enabled(key, MetadataValueType::Int)
2194            }
2195            MetadataValueType::FloatArray => {
2196                self.is_metadata_type_index_enabled(key, MetadataValueType::Float)
2197            }
2198            MetadataValueType::StringArray => {
2199                self.is_metadata_type_index_enabled(key, MetadataValueType::Str)
2200            }
2201        }
2202    }
2203
2204    pub fn is_metadata_where_indexing_enabled(
2205        &self,
2206        where_clause: &Where,
2207    ) -> Result<(), FilterValidationError> {
2208        match where_clause {
2209            Where::Composite(composite) => {
2210                for child in &composite.children {
2211                    self.is_metadata_where_indexing_enabled(child)?;
2212                }
2213                Ok(())
2214            }
2215            Where::Document(_) => {
2216                if !self.is_fts_enabled() {
2217                    return Err(FilterValidationError::FtsDisabled);
2218                }
2219                Ok(())
2220            }
2221            Where::Metadata(expression) => {
2222                let value_type = match &expression.comparison {
2223                    MetadataComparison::Primitive(_, value) => value.value_type(),
2224                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
2225                    MetadataComparison::ArrayContains(_, value) => value.value_type(),
2226                };
2227                let is_enabled = self
2228                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
2229                    .map_err(FilterValidationError::Schema)?;
2230                if !is_enabled {
2231                    return Err(FilterValidationError::IndexingDisabled {
2232                        key: expression.key.clone(),
2233                        value_type,
2234                    });
2235                }
2236                Ok(())
2237            }
2238        }
2239    }
2240
2241    pub fn is_knn_key_indexing_enabled(
2242        &self,
2243        key: &str,
2244        query: &QueryVector,
2245    ) -> Result<(), FilterValidationError> {
2246        match query {
2247            QueryVector::Sparse(_) => {
2248                let is_enabled = self
2249                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
2250                    .map_err(FilterValidationError::Schema)?;
2251                if !is_enabled {
2252                    return Err(FilterValidationError::IndexingDisabled {
2253                        key: key.to_string(),
2254                        value_type: MetadataValueType::SparseVector,
2255                    });
2256                }
2257                Ok(())
2258            }
2259            QueryVector::Dense(_) => {
2260                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
2261                // Dense vectors are always indexed
2262                Ok(())
2263            }
2264        }
2265    }
2266
2267    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2268        if key.starts_with(CHROMA_KEY) {
2269            return false;
2270        }
2271        let value_types = self.keys.entry(key.to_string()).or_default();
2272        match value_type {
2273            MetadataValueType::Bool => {
2274                if value_types.boolean.is_none() {
2275                    value_types.boolean = self.defaults.boolean.clone();
2276                    return true;
2277                }
2278            }
2279            MetadataValueType::Int => {
2280                if value_types.int.is_none() {
2281                    value_types.int = self.defaults.int.clone();
2282                    return true;
2283                }
2284            }
2285            MetadataValueType::Float => {
2286                if value_types.float.is_none() {
2287                    value_types.float = self.defaults.float.clone();
2288                    return true;
2289                }
2290            }
2291            MetadataValueType::Str => {
2292                if value_types.string.is_none() {
2293                    value_types.string = self.defaults.string.clone();
2294                    return true;
2295                }
2296            }
2297            MetadataValueType::SparseVector => {
2298                if value_types.sparse_vector.is_none() {
2299                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
2300                    return true;
2301                }
2302            }
2303            // Array types use the same indexes as their scalar counterparts
2304            MetadataValueType::BoolArray => {
2305                if value_types.boolean.is_none() {
2306                    value_types.boolean = self.defaults.boolean.clone();
2307                    return true;
2308                }
2309            }
2310            MetadataValueType::IntArray => {
2311                if value_types.int.is_none() {
2312                    value_types.int = self.defaults.int.clone();
2313                    return true;
2314                }
2315            }
2316            MetadataValueType::FloatArray => {
2317                if value_types.float.is_none() {
2318                    value_types.float = self.defaults.float.clone();
2319                    return true;
2320                }
2321            }
2322            MetadataValueType::StringArray => {
2323                if value_types.string.is_none() {
2324                    value_types.string = self.defaults.string.clone();
2325                    return true;
2326                }
2327            }
2328        }
2329        false
2330    }
2331
2332    // ========================================================================
2333    // BUILDER PATTERN METHODS
2334    // ========================================================================
2335
2336    /// Create an index configuration (builder pattern)
2337    ///
2338    /// This method allows fluent, chainable configuration of indexes on a schema.
2339    /// It matches the Python API's `.create_index()` method.
2340    ///
2341    /// # Arguments
2342    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
2343    /// * `config` - Index configuration to create
2344    ///
2345    /// # Returns
2346    /// `Self` for method chaining
2347    ///
2348    /// # Errors
2349    /// Returns error if:
2350    /// - Attempting to create index on special keys (`#document`, `#embedding`)
2351    /// - Invalid configuration (e.g., vector index on non-embedding key)
2352    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
2353    ///
2354    /// # Examples
2355    /// ```
2356    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space, SchemaBuilderError};
2357    ///
2358    /// # fn main() -> Result<(), SchemaBuilderError> {
2359    /// let schema = Schema::default()
2360    ///     .create_index(None, VectorIndexConfig {
2361    ///         space: Some(Space::Cosine),
2362    ///         embedding_function: None,
2363    ///         source_key: None,
2364    ///         hnsw: None,
2365    ///         spann: None,
2366    ///     }.into())?
2367    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2368    /// # Ok(())
2369    /// # }
2370    /// ```
2371    pub fn create_index(
2372        mut self,
2373        key: Option<&str>,
2374        config: IndexConfig,
2375    ) -> Result<Self, SchemaBuilderError> {
2376        // 1. Handle special index types: Vector, FTS, SparseVector
2377        match &config {
2378            IndexConfig::Vector(cfg) => {
2379                // Vector is global only (no key allowed)
2380                if let Some(k) = key {
2381                    return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2382                }
2383                self._set_vector_index_config_builder(cfg.clone());
2384                return Ok(self);
2385            }
2386            IndexConfig::Fts(_) => {
2387                // FTS is only allowed on #document key
2388                if key != Some(DOCUMENT_KEY) {
2389                    return Err(SchemaBuilderError::FtsIndexOnlyOnDocument);
2390                }
2391                // Falls through to dispatch
2392            }
2393            IndexConfig::SparseVector(_) => {
2394                // SparseVector requires a specific key
2395                if key.is_none() {
2396                    return Err(SchemaBuilderError::SparseVectorRequiresKey);
2397                }
2398                // Falls through to dispatch
2399            }
2400            _ => {}
2401        }
2402
2403        // 2. Validate special keys
2404        if let Some(k) = key {
2405            if k == EMBEDDING_KEY {
2406                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2407                    key: k.to_string(),
2408                });
2409            }
2410            if k == DOCUMENT_KEY && !matches!(config, IndexConfig::Fts(_)) {
2411                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2412                    key: k.to_string(),
2413                });
2414            }
2415            if k.starts_with('#') && k != DOCUMENT_KEY {
2416                return Err(SchemaBuilderError::ReservedKeyPrefix { key: k.to_string() });
2417            }
2418        }
2419
2420        // 3. Dispatch to appropriate helper
2421        match key {
2422            Some(k) => self._set_index_for_key_builder(k, config, true)?,
2423            None => self._set_index_in_defaults_builder(config, true)?,
2424        }
2425
2426        Ok(self)
2427    }
2428
2429    /// Delete/disable an index configuration (builder pattern)
2430    ///
2431    /// This method allows disabling indexes on a schema.
2432    /// It matches the Python API's `.delete_index()` method.
2433    ///
2434    /// # Arguments
2435    /// * `key` - Optional key name for per-key index. `None` applies to defaults
2436    /// * `config` - Index configuration to disable
2437    ///
2438    /// # Returns
2439    /// `Self` for method chaining
2440    ///
2441    /// # Errors
2442    /// Returns error if:
2443    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
2444    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
2445    ///
2446    /// # Examples
2447    /// ```
2448    /// use chroma_types::{Schema, StringInvertedIndexConfig, SchemaBuilderError};
2449    ///
2450    /// # fn main() -> Result<(), SchemaBuilderError> {
2451    /// let schema = Schema::default()
2452    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2453    /// # Ok(())
2454    /// # }
2455    /// ```
2456    pub fn delete_index(
2457        mut self,
2458        key: Option<&str>,
2459        config: IndexConfig,
2460    ) -> Result<Self, SchemaBuilderError> {
2461        // 1. Handle special index types: Vector, FTS, SparseVector
2462        match &config {
2463            IndexConfig::Vector(_) => {
2464                // Vector deletion not supported
2465                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2466            }
2467            IndexConfig::Fts(_) => {
2468                // FTS deletion is only allowed on #document key
2469                if key != Some(DOCUMENT_KEY) {
2470                    return Err(SchemaBuilderError::FtsIndexDeletionOnlyOnDocument);
2471                }
2472                // Falls through to dispatch
2473            }
2474            IndexConfig::SparseVector(_) => {
2475                // SparseVector deletion not supported
2476                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2477            }
2478            _ => {}
2479        }
2480
2481        // 2. Validate special keys
2482        if let Some(k) = key {
2483            if k == EMBEDDING_KEY {
2484                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2485                    key: k.to_string(),
2486                });
2487            }
2488            if k == DOCUMENT_KEY && !matches!(config, IndexConfig::Fts(_)) {
2489                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2490                    key: k.to_string(),
2491                });
2492            }
2493            if k.starts_with('#') && k != DOCUMENT_KEY {
2494                return Err(SchemaBuilderError::ReservedKeyPrefix { key: k.to_string() });
2495            }
2496        }
2497
2498        // 3. Dispatch to appropriate helper
2499        match key {
2500            Some(k) => self._set_index_for_key_builder(k, config, false)?,
2501            None => self._set_index_in_defaults_builder(config, false)?,
2502        }
2503
2504        Ok(self)
2505    }
2506
2507    /// Set customer-managed encryption key for the collection (builder pattern)
2508    ///
2509    /// This method allows setting CMEK on a schema for fluent, chainable configuration.
2510    ///
2511    /// # Arguments
2512    /// * `cmek` - Customer-managed encryption key configuration
2513    ///
2514    /// # Returns
2515    /// `Self` for method chaining
2516    ///
2517    /// # Examples
2518    /// ```
2519    /// use chroma_types::{Schema, Cmek};
2520    ///
2521    /// let schema = Schema::default()
2522    ///     .with_cmek(Cmek::gcp("projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key".to_string()));
2523    /// ```
2524    pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2525        self.cmek = Some(cmek);
2526        self
2527    }
2528
2529    /// Set vector index config globally (applies to #embedding)
2530    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2531        // Update defaults (disabled, just config update)
2532        if let Some(float_list) = &mut self.defaults.float_list {
2533            if let Some(vector_index) = &mut float_list.vector_index {
2534                vector_index.config = config.clone();
2535            }
2536        }
2537
2538        // Update #embedding key (enabled, config update, preserve source_key=#document)
2539        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2540            if let Some(float_list) = &mut embedding_types.float_list {
2541                if let Some(vector_index) = &mut float_list.vector_index {
2542                    let mut updated_config = config;
2543                    // Preserve source_key as #document
2544                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2545                    vector_index.config = updated_config;
2546                }
2547            }
2548        }
2549    }
2550
2551    /// Set FTS index config globally (applies to #document)
2552    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2553        // Update defaults (disabled, just config update)
2554        if let Some(string) = &mut self.defaults.string {
2555            if let Some(fts_index) = &mut string.fts_index {
2556                fts_index.config = config.clone();
2557            }
2558        }
2559
2560        // Update #document key (enabled, config update)
2561        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2562            if let Some(string) = &mut document_types.string {
2563                if let Some(fts_index) = &mut string.fts_index {
2564                    fts_index.config = config;
2565                }
2566            }
2567        }
2568    }
2569
2570    /// Set index configuration for a specific key
2571    fn _set_index_for_key_builder(
2572        &mut self,
2573        key: &str,
2574        config: IndexConfig,
2575        enabled: bool,
2576    ) -> Result<(), SchemaBuilderError> {
2577        // Check for multiple sparse vector indexes BEFORE getting mutable reference
2578        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2579            // Find existing sparse vector index
2580            let existing_key = self
2581                .keys
2582                .iter()
2583                .find(|(k, v)| {
2584                    k.as_str() != key
2585                        && v.sparse_vector
2586                            .as_ref()
2587                            .and_then(|sv| sv.sparse_vector_index.as_ref())
2588                            .map(|idx| idx.enabled)
2589                            .unwrap_or(false)
2590                })
2591                .map(|(k, _)| k.clone());
2592
2593            if let Some(existing_key) = existing_key {
2594                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2595            }
2596        }
2597
2598        // Get or create ValueTypes for this key
2599        let value_types = self.keys.entry(key.to_string()).or_default();
2600
2601        // Set the appropriate index based on config type
2602        match config {
2603            IndexConfig::Vector(_) => {
2604                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2605                    key: key.to_string(),
2606                });
2607            }
2608            IndexConfig::Fts(cfg) => {
2609                // FTS is validated in create_index/delete_index to only allow #document
2610                if let Some(string) = value_types.string.as_mut() {
2611                    if let Some(fts_index) = string.fts_index.as_mut() {
2612                        fts_index.enabled = enabled;
2613                        fts_index.config = cfg;
2614                    }
2615                }
2616            }
2617            IndexConfig::SparseVector(cfg) => {
2618                value_types.sparse_vector = Some(SparseVectorValueType {
2619                    sparse_vector_index: Some(SparseVectorIndexType {
2620                        enabled,
2621                        config: cfg,
2622                    }),
2623                });
2624            }
2625            IndexConfig::StringInverted(cfg) => {
2626                if value_types.string.is_none() {
2627                    value_types.string = Some(StringValueType {
2628                        fts_index: None,
2629                        string_inverted_index: None,
2630                    });
2631                }
2632                if let Some(string) = &mut value_types.string {
2633                    string.string_inverted_index = Some(StringInvertedIndexType {
2634                        enabled,
2635                        config: cfg,
2636                    });
2637                }
2638            }
2639            IndexConfig::IntInverted(cfg) => {
2640                value_types.int = Some(IntValueType {
2641                    int_inverted_index: Some(IntInvertedIndexType {
2642                        enabled,
2643                        config: cfg,
2644                    }),
2645                });
2646            }
2647            IndexConfig::FloatInverted(cfg) => {
2648                value_types.float = Some(FloatValueType {
2649                    float_inverted_index: Some(FloatInvertedIndexType {
2650                        enabled,
2651                        config: cfg,
2652                    }),
2653                });
2654            }
2655            IndexConfig::BoolInverted(cfg) => {
2656                value_types.boolean = Some(BoolValueType {
2657                    bool_inverted_index: Some(BoolInvertedIndexType {
2658                        enabled,
2659                        config: cfg,
2660                    }),
2661                });
2662            }
2663        }
2664
2665        Ok(())
2666    }
2667
2668    /// Set index configuration in defaults
2669    fn _set_index_in_defaults_builder(
2670        &mut self,
2671        config: IndexConfig,
2672        enabled: bool,
2673    ) -> Result<(), SchemaBuilderError> {
2674        match config {
2675            IndexConfig::Vector(_) => {
2676                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2677                    key: "defaults".to_string(),
2678                });
2679            }
2680            IndexConfig::Fts(_) => {
2681                // FTS is only allowed on #document, not globally
2682                return Err(SchemaBuilderError::FtsIndexOnlyOnDocument);
2683            }
2684            IndexConfig::SparseVector(cfg) => {
2685                self.defaults.sparse_vector = Some(SparseVectorValueType {
2686                    sparse_vector_index: Some(SparseVectorIndexType {
2687                        enabled,
2688                        config: cfg,
2689                    }),
2690                });
2691            }
2692            IndexConfig::StringInverted(cfg) => {
2693                if self.defaults.string.is_none() {
2694                    self.defaults.string = Some(StringValueType {
2695                        fts_index: None,
2696                        string_inverted_index: None,
2697                    });
2698                }
2699                if let Some(string) = &mut self.defaults.string {
2700                    string.string_inverted_index = Some(StringInvertedIndexType {
2701                        enabled,
2702                        config: cfg,
2703                    });
2704                }
2705            }
2706            IndexConfig::IntInverted(cfg) => {
2707                self.defaults.int = Some(IntValueType {
2708                    int_inverted_index: Some(IntInvertedIndexType {
2709                        enabled,
2710                        config: cfg,
2711                    }),
2712                });
2713            }
2714            IndexConfig::FloatInverted(cfg) => {
2715                self.defaults.float = Some(FloatValueType {
2716                    float_inverted_index: Some(FloatInvertedIndexType {
2717                        enabled,
2718                        config: cfg,
2719                    }),
2720                });
2721            }
2722            IndexConfig::BoolInverted(cfg) => {
2723                self.defaults.boolean = Some(BoolValueType {
2724                    bool_inverted_index: Some(BoolInvertedIndexType {
2725                        enabled,
2726                        config: cfg,
2727                    }),
2728                });
2729            }
2730        }
2731
2732        Ok(())
2733    }
2734}
2735
2736// ============================================================================
2737// INDEX CONFIGURATION STRUCTURES
2738// ============================================================================
2739
2740#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2741#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2742#[serde(deny_unknown_fields)]
2743pub struct VectorIndexConfig {
2744    /// Vector space for similarity calculation (cosine, l2, ip)
2745    #[serde(skip_serializing_if = "Option::is_none")]
2746    pub space: Option<Space>,
2747    /// Embedding function configuration
2748    #[serde(skip_serializing_if = "Option::is_none")]
2749    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2750    /// Key to source the vector from
2751    #[serde(skip_serializing_if = "Option::is_none")]
2752    pub source_key: Option<String>,
2753    /// HNSW algorithm configuration
2754    #[serde(skip_serializing_if = "Option::is_none")]
2755    pub hnsw: Option<HnswIndexConfig>,
2756    /// SPANN algorithm configuration
2757    #[serde(skip_serializing_if = "Option::is_none")]
2758    pub spann: Option<SpannIndexConfig>,
2759}
2760
2761/// Configuration for HNSW vector index algorithm parameters
2762#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2763#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2764#[serde(deny_unknown_fields)]
2765pub struct HnswIndexConfig {
2766    #[serde(skip_serializing_if = "Option::is_none")]
2767    pub ef_construction: Option<usize>,
2768    #[serde(skip_serializing_if = "Option::is_none")]
2769    pub max_neighbors: Option<usize>,
2770    #[serde(skip_serializing_if = "Option::is_none")]
2771    pub ef_search: Option<usize>,
2772    #[serde(skip_serializing_if = "Option::is_none")]
2773    pub num_threads: Option<usize>,
2774    #[serde(skip_serializing_if = "Option::is_none")]
2775    #[validate(range(min = 2))]
2776    pub batch_size: Option<usize>,
2777    #[serde(skip_serializing_if = "Option::is_none")]
2778    #[validate(range(min = 2))]
2779    pub sync_threshold: Option<usize>,
2780    #[serde(skip_serializing_if = "Option::is_none")]
2781    pub resize_factor: Option<f64>,
2782}
2783
2784impl HnswIndexConfig {
2785    /// Check if this config has default values
2786    /// None values are considered default (not set by user)
2787    /// Note: We skip num_threads as it's variable based on available_parallelism
2788    pub fn is_default(&self) -> bool {
2789        if let Some(ef_construction) = self.ef_construction {
2790            if ef_construction != default_construction_ef() {
2791                return false;
2792            }
2793        }
2794        if let Some(max_neighbors) = self.max_neighbors {
2795            if max_neighbors != default_m() {
2796                return false;
2797            }
2798        }
2799        if let Some(ef_search) = self.ef_search {
2800            if ef_search != default_search_ef() {
2801                return false;
2802            }
2803        }
2804        if let Some(batch_size) = self.batch_size {
2805            if batch_size != default_batch_size() {
2806                return false;
2807            }
2808        }
2809        if let Some(sync_threshold) = self.sync_threshold {
2810            if sync_threshold != default_sync_threshold() {
2811                return false;
2812            }
2813        }
2814        if let Some(resize_factor) = self.resize_factor {
2815            if resize_factor != default_resize_factor() {
2816                return false;
2817            }
2818        }
2819        // Skip num_threads check as it's system-dependent
2820        true
2821    }
2822}
2823
2824/// Quantization implementation for SPANN vector index.
2825#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
2826#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2827#[serde(rename_all = "snake_case")]
2828pub enum Quantization {
2829    #[default]
2830    None,
2831    FourBitRabitQWithUSearch,
2832}
2833
2834fn is_default_quantization(v: &Quantization) -> bool {
2835    matches!(v, Quantization::None)
2836}
2837
2838/// Configuration for SPANN vector index algorithm parameters
2839#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2840#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2841#[serde(deny_unknown_fields)]
2842pub struct SpannIndexConfig {
2843    #[serde(skip_serializing_if = "Option::is_none")]
2844    #[validate(range(max = 128))]
2845    pub search_nprobe: Option<u32>,
2846    #[serde(skip_serializing_if = "Option::is_none")]
2847    #[validate(range(min = 1.0, max = 1.0))]
2848    pub search_rng_factor: Option<f32>,
2849    #[serde(skip_serializing_if = "Option::is_none")]
2850    #[validate(range(min = 5.0, max = 10.0))]
2851    pub search_rng_epsilon: Option<f32>,
2852    #[serde(skip_serializing_if = "Option::is_none")]
2853    #[validate(range(max = 8))]
2854    pub nreplica_count: Option<u32>,
2855    #[serde(skip_serializing_if = "Option::is_none")]
2856    #[validate(range(min = 1.0, max = 1.0))]
2857    pub write_rng_factor: Option<f32>,
2858    #[serde(skip_serializing_if = "Option::is_none")]
2859    #[validate(range(min = 5.0, max = 10.0))]
2860    pub write_rng_epsilon: Option<f32>,
2861    #[serde(skip_serializing_if = "Option::is_none")]
2862    #[validate(range(min = 50, max = 200))]
2863    pub split_threshold: Option<u32>,
2864    #[serde(skip_serializing_if = "Option::is_none")]
2865    #[validate(range(max = 1000))]
2866    pub num_samples_kmeans: Option<usize>,
2867    #[serde(skip_serializing_if = "Option::is_none")]
2868    #[validate(range(min = 100.0, max = 100.0))]
2869    pub initial_lambda: Option<f32>,
2870    #[serde(skip_serializing_if = "Option::is_none")]
2871    #[validate(range(max = 64))]
2872    pub reassign_neighbor_count: Option<u32>,
2873    #[serde(skip_serializing_if = "Option::is_none")]
2874    #[validate(range(min = 25, max = 100))]
2875    pub merge_threshold: Option<u32>,
2876    #[serde(skip_serializing_if = "Option::is_none")]
2877    #[validate(range(max = 8))]
2878    pub num_centers_to_merge_to: Option<u32>,
2879    #[serde(skip_serializing_if = "Option::is_none")]
2880    #[validate(range(max = 64))]
2881    pub write_nprobe: Option<u32>,
2882    #[serde(skip_serializing_if = "Option::is_none")]
2883    #[validate(range(max = 200))]
2884    pub ef_construction: Option<usize>,
2885    #[serde(skip_serializing_if = "Option::is_none")]
2886    #[validate(range(max = 200))]
2887    pub ef_search: Option<usize>,
2888    #[serde(skip_serializing_if = "Option::is_none")]
2889    #[validate(range(max = 64))]
2890    pub max_neighbors: Option<usize>,
2891    #[serde(skip_serializing_if = "Option::is_none")]
2892    #[validate(range(min = 0.1, max = 1.0))]
2893    pub center_drift_threshold: Option<f32>,
2894    /// Quantization implementation for vector search (cloud-only feature)
2895    #[serde(default, skip_serializing_if = "is_default_quantization")]
2896    pub quantize: Quantization,
2897}
2898
2899impl SpannIndexConfig {
2900    /// Check if this config has default values
2901    /// None values are considered default (not set by user)
2902    pub fn is_default(&self) -> bool {
2903        if let Some(search_nprobe) = self.search_nprobe {
2904            if search_nprobe != default_search_nprobe() {
2905                return false;
2906            }
2907        }
2908        if let Some(search_rng_factor) = self.search_rng_factor {
2909            if search_rng_factor != default_search_rng_factor() {
2910                return false;
2911            }
2912        }
2913        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2914            if search_rng_epsilon != default_search_rng_epsilon() {
2915                return false;
2916            }
2917        }
2918        if let Some(nreplica_count) = self.nreplica_count {
2919            if nreplica_count != default_nreplica_count() {
2920                return false;
2921            }
2922        }
2923        if let Some(write_rng_factor) = self.write_rng_factor {
2924            if write_rng_factor != default_write_rng_factor() {
2925                return false;
2926            }
2927        }
2928        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2929            if write_rng_epsilon != default_write_rng_epsilon() {
2930                return false;
2931            }
2932        }
2933        if let Some(split_threshold) = self.split_threshold {
2934            if split_threshold != default_split_threshold() {
2935                return false;
2936            }
2937        }
2938        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2939            if num_samples_kmeans != default_num_samples_kmeans() {
2940                return false;
2941            }
2942        }
2943        if let Some(initial_lambda) = self.initial_lambda {
2944            if initial_lambda != default_initial_lambda() {
2945                return false;
2946            }
2947        }
2948        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2949            if reassign_neighbor_count != default_reassign_neighbor_count() {
2950                return false;
2951            }
2952        }
2953        if let Some(merge_threshold) = self.merge_threshold {
2954            if merge_threshold != default_merge_threshold() {
2955                return false;
2956            }
2957        }
2958        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2959            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2960                return false;
2961            }
2962        }
2963        if let Some(write_nprobe) = self.write_nprobe {
2964            if write_nprobe != default_write_nprobe() {
2965                return false;
2966            }
2967        }
2968        if let Some(ef_construction) = self.ef_construction {
2969            if ef_construction != default_construction_ef_spann() {
2970                return false;
2971            }
2972        }
2973        if let Some(ef_search) = self.ef_search {
2974            if ef_search != default_search_ef_spann() {
2975                return false;
2976            }
2977        }
2978        if let Some(max_neighbors) = self.max_neighbors {
2979            if max_neighbors != default_m_spann() {
2980                return false;
2981            }
2982        }
2983        if let Some(center_drift_threshold) = self.center_drift_threshold {
2984            if center_drift_threshold != default_center_drift_threshold() {
2985                return false;
2986            }
2987        }
2988        if !matches!(self.quantize, Quantization::None) {
2989            return false;
2990        }
2991        true
2992    }
2993}
2994
2995#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2996#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2997#[serde(deny_unknown_fields)]
2998pub struct SparseVectorIndexConfig {
2999    /// Embedding function configuration
3000    #[serde(skip_serializing_if = "Option::is_none")]
3001    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
3002    /// Key to source the sparse vector from
3003    #[serde(skip_serializing_if = "Option::is_none")]
3004    pub source_key: Option<String>,
3005    /// Whether this embedding is BM25
3006    #[serde(skip_serializing_if = "Option::is_none")]
3007    pub bm25: Option<bool>,
3008}
3009
3010#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3011#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3012#[serde(deny_unknown_fields)]
3013pub struct FtsIndexConfig {
3014    // FTS index typically has no additional parameters
3015}
3016
3017#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3018#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3019#[serde(deny_unknown_fields)]
3020pub struct StringInvertedIndexConfig {
3021    // String inverted index typically has no additional parameters
3022}
3023
3024#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3025#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3026#[serde(deny_unknown_fields)]
3027pub struct IntInvertedIndexConfig {
3028    // Integer inverted index typically has no additional parameters
3029}
3030
3031#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3032#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3033#[serde(deny_unknown_fields)]
3034pub struct FloatInvertedIndexConfig {
3035    // Float inverted index typically has no additional parameters
3036}
3037
3038#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3039#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3040#[serde(deny_unknown_fields)]
3041pub struct BoolInvertedIndexConfig {
3042    // Boolean inverted index typically has no additional parameters
3043}
3044
3045// ============================================================================
3046// BUILDER PATTERN SUPPORT
3047// ============================================================================
3048
3049/// Union type for all index configurations (used by builder pattern)
3050#[derive(Clone, Debug)]
3051#[allow(clippy::large_enum_variant)]
3052pub enum IndexConfig {
3053    Vector(VectorIndexConfig),
3054    SparseVector(SparseVectorIndexConfig),
3055    Fts(FtsIndexConfig),
3056    StringInverted(StringInvertedIndexConfig),
3057    IntInverted(IntInvertedIndexConfig),
3058    FloatInverted(FloatInvertedIndexConfig),
3059    BoolInverted(BoolInvertedIndexConfig),
3060}
3061
3062// Convenience From implementations for ergonomic usage
3063impl From<VectorIndexConfig> for IndexConfig {
3064    fn from(config: VectorIndexConfig) -> Self {
3065        IndexConfig::Vector(config)
3066    }
3067}
3068
3069impl From<SparseVectorIndexConfig> for IndexConfig {
3070    fn from(config: SparseVectorIndexConfig) -> Self {
3071        IndexConfig::SparseVector(config)
3072    }
3073}
3074
3075impl From<FtsIndexConfig> for IndexConfig {
3076    fn from(config: FtsIndexConfig) -> Self {
3077        IndexConfig::Fts(config)
3078    }
3079}
3080
3081impl From<StringInvertedIndexConfig> for IndexConfig {
3082    fn from(config: StringInvertedIndexConfig) -> Self {
3083        IndexConfig::StringInverted(config)
3084    }
3085}
3086
3087impl From<IntInvertedIndexConfig> for IndexConfig {
3088    fn from(config: IntInvertedIndexConfig) -> Self {
3089        IndexConfig::IntInverted(config)
3090    }
3091}
3092
3093impl From<FloatInvertedIndexConfig> for IndexConfig {
3094    fn from(config: FloatInvertedIndexConfig) -> Self {
3095        IndexConfig::FloatInverted(config)
3096    }
3097}
3098
3099impl From<BoolInvertedIndexConfig> for IndexConfig {
3100    fn from(config: BoolInvertedIndexConfig) -> Self {
3101        IndexConfig::BoolInverted(config)
3102    }
3103}
3104
3105impl TryFrom<&InternalCollectionConfiguration> for Schema {
3106    type Error = SchemaError;
3107
3108    fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
3109        // Start with a default schema structure
3110        let mut schema = match &config.vector_index {
3111            VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
3112            VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
3113        };
3114        // Convert vector index configuration
3115        let vector_config = match &config.vector_index {
3116            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
3117                space: Some(hnsw_config.space.clone()),
3118                embedding_function: config.embedding_function.clone(),
3119                source_key: None,
3120                hnsw: Some(HnswIndexConfig {
3121                    ef_construction: Some(hnsw_config.ef_construction),
3122                    max_neighbors: Some(hnsw_config.max_neighbors),
3123                    ef_search: Some(hnsw_config.ef_search),
3124                    num_threads: Some(hnsw_config.num_threads),
3125                    batch_size: Some(hnsw_config.batch_size),
3126                    sync_threshold: Some(hnsw_config.sync_threshold),
3127                    resize_factor: Some(hnsw_config.resize_factor),
3128                }),
3129                spann: None,
3130            },
3131            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
3132                space: Some(spann_config.space.clone()),
3133                embedding_function: config.embedding_function.clone(),
3134                source_key: None,
3135                hnsw: None,
3136                spann: Some(SpannIndexConfig {
3137                    search_nprobe: Some(spann_config.search_nprobe),
3138                    search_rng_factor: Some(spann_config.search_rng_factor),
3139                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
3140                    nreplica_count: Some(spann_config.nreplica_count),
3141                    write_rng_factor: Some(spann_config.write_rng_factor),
3142                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
3143                    split_threshold: Some(spann_config.split_threshold),
3144                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
3145                    initial_lambda: Some(spann_config.initial_lambda),
3146                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
3147                    merge_threshold: Some(spann_config.merge_threshold),
3148                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
3149                    write_nprobe: Some(spann_config.write_nprobe),
3150                    ef_construction: Some(spann_config.ef_construction),
3151                    ef_search: Some(spann_config.ef_search),
3152                    max_neighbors: Some(spann_config.max_neighbors),
3153                    center_drift_threshold: None,
3154                    quantize: Quantization::None,
3155                }),
3156            },
3157        };
3158
3159        // Update defaults (keep enabled=false, just update the config)
3160        // This serves as the template for any new float_list fields
3161        if let Some(float_list) = &mut schema.defaults.float_list {
3162            if let Some(vector_index) = &mut float_list.vector_index {
3163                vector_index.config = vector_config.clone();
3164            }
3165        }
3166
3167        // Update the vector_index in the existing #embedding key override
3168        // Keep enabled=true (already set by new_default) and update the config
3169        // Set source_key to DOCUMENT_KEY for the embedding key
3170        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
3171            if let Some(float_list) = &mut embedding_types.float_list {
3172                if let Some(vector_index) = &mut float_list.vector_index {
3173                    let mut vector_config = vector_config;
3174                    vector_config.source_key = Some(DOCUMENT_KEY.to_string());
3175                    vector_index.config = vector_config;
3176                }
3177            }
3178        }
3179
3180        Ok(schema)
3181    }
3182}
3183
3184#[cfg(test)]
3185mod tests {
3186    use super::*;
3187    use crate::hnsw_configuration::Space;
3188    use crate::metadata::SparseVector;
3189    use crate::{
3190        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
3191    };
3192    use serde_json::json;
3193
3194    #[test]
3195    fn test_reconcile_with_defaults_none_user_schema() {
3196        // Test that when no user schema is provided, we get the default schema
3197        let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
3198        let expected = Schema::new_default(KnnIndex::Spann);
3199        assert_eq!(result, expected);
3200    }
3201
3202    #[test]
3203    fn test_reconcile_with_defaults_empty_user_schema() {
3204        // Test merging with an empty user schema
3205        let user_schema = Schema {
3206            defaults: ValueTypes::default(),
3207            keys: HashMap::new(),
3208            cmek: None,
3209            source_attached_function_id: None,
3210        };
3211
3212        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3213        let expected = Schema::new_default(KnnIndex::Spann);
3214        assert_eq!(result, expected);
3215    }
3216
3217    #[test]
3218    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
3219        // Test that user can override string inverted index enabled state
3220        let mut user_schema = Schema {
3221            defaults: ValueTypes::default(),
3222            keys: HashMap::new(),
3223            cmek: None,
3224            source_attached_function_id: None,
3225        };
3226
3227        user_schema.defaults.string = Some(StringValueType {
3228            string_inverted_index: Some(StringInvertedIndexType {
3229                enabled: false, // Override default (true) to false
3230                config: StringInvertedIndexConfig {},
3231            }),
3232            fts_index: None,
3233        });
3234
3235        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3236
3237        // Check that the user override took precedence
3238        assert!(
3239            !result
3240                .defaults
3241                .string
3242                .as_ref()
3243                .unwrap()
3244                .string_inverted_index
3245                .as_ref()
3246                .unwrap()
3247                .enabled
3248        );
3249        // Check that other defaults are still present
3250        assert!(result.defaults.float.is_some());
3251        assert!(result.defaults.int.is_some());
3252    }
3253
3254    #[test]
3255    fn test_reconcile_with_defaults_user_overrides_vector_config() {
3256        // Test field-level merging for vector configurations
3257        let mut user_schema = Schema {
3258            defaults: ValueTypes::default(),
3259            keys: HashMap::new(),
3260            cmek: None,
3261            source_attached_function_id: None,
3262        };
3263
3264        user_schema.defaults.float_list = Some(FloatListValueType {
3265            vector_index: Some(VectorIndexType {
3266                enabled: true, // Enable vector index (default is false)
3267                config: VectorIndexConfig {
3268                    space: Some(Space::L2),                     // Override default space
3269                    embedding_function: None,                   // Will use default
3270                    source_key: Some("custom_key".to_string()), // Override default
3271                    hnsw: Some(HnswIndexConfig {
3272                        ef_construction: Some(500), // Override default
3273                        max_neighbors: None,        // Will use default
3274                        ef_search: None,            // Will use default
3275                        num_threads: None,
3276                        batch_size: None,
3277                        sync_threshold: None,
3278                        resize_factor: None,
3279                    }),
3280                    spann: None,
3281                },
3282            }),
3283        });
3284
3285        // Use HNSW defaults for this test so we have HNSW config to merge with
3286        let result = {
3287            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3288            let merged_defaults = Schema::merge_value_types(
3289                &default_schema.defaults,
3290                &user_schema.defaults,
3291                KnnIndex::Hnsw,
3292            )
3293            .unwrap();
3294            let mut merged_keys = default_schema.keys.clone();
3295            for (key, user_value_types) in user_schema.keys {
3296                if let Some(default_value_types) = merged_keys.get(&key) {
3297                    let merged_value_types = Schema::merge_value_types(
3298                        default_value_types,
3299                        &user_value_types,
3300                        KnnIndex::Hnsw,
3301                    )
3302                    .unwrap();
3303                    merged_keys.insert(key, merged_value_types);
3304                } else {
3305                    merged_keys.insert(key, user_value_types);
3306                }
3307            }
3308            Schema {
3309                defaults: merged_defaults,
3310                keys: merged_keys,
3311                cmek: None,
3312                source_attached_function_id: None,
3313            }
3314        };
3315
3316        let vector_config = &result
3317            .defaults
3318            .float_list
3319            .as_ref()
3320            .unwrap()
3321            .vector_index
3322            .as_ref()
3323            .unwrap()
3324            .config;
3325
3326        // Check user overrides took precedence
3327        assert_eq!(vector_config.space, Some(Space::L2));
3328        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
3329        assert_eq!(
3330            vector_config.hnsw.as_ref().unwrap().ef_construction,
3331            Some(500)
3332        );
3333
3334        // Check defaults were preserved for unspecified fields
3335        assert_eq!(vector_config.embedding_function, None);
3336        // Since user provided HNSW config, the default max_neighbors should be merged in
3337        assert_eq!(
3338            vector_config.hnsw.as_ref().unwrap().max_neighbors,
3339            Some(default_m())
3340        );
3341    }
3342
3343    #[test]
3344    fn test_reconcile_with_defaults_keys() {
3345        // Test that key overrides are properly merged
3346        let mut user_schema = Schema {
3347            defaults: ValueTypes::default(),
3348            keys: HashMap::new(),
3349            cmek: None,
3350            source_attached_function_id: None,
3351        };
3352
3353        // Add a custom key override
3354        let custom_key_types = ValueTypes {
3355            string: Some(StringValueType {
3356                fts_index: Some(FtsIndexType {
3357                    enabled: true,
3358                    config: FtsIndexConfig {},
3359                }),
3360                string_inverted_index: Some(StringInvertedIndexType {
3361                    enabled: false,
3362                    config: StringInvertedIndexConfig {},
3363                }),
3364            }),
3365            ..Default::default()
3366        };
3367        user_schema
3368            .keys
3369            .insert("custom_key".to_string(), custom_key_types);
3370
3371        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3372
3373        // Check that default key overrides are preserved
3374        assert!(result.keys.contains_key(EMBEDDING_KEY));
3375        assert!(result.keys.contains_key(DOCUMENT_KEY));
3376
3377        // Check that user key override was added
3378        assert!(result.keys.contains_key("custom_key"));
3379        let custom_override = result.keys.get("custom_key").unwrap();
3380        assert!(
3381            custom_override
3382                .string
3383                .as_ref()
3384                .unwrap()
3385                .fts_index
3386                .as_ref()
3387                .unwrap()
3388                .enabled
3389        );
3390    }
3391
3392    #[test]
3393    fn test_reconcile_with_defaults_override_existing_key() {
3394        // Test overriding an existing key override (like #embedding)
3395        let mut user_schema = Schema {
3396            defaults: ValueTypes::default(),
3397            keys: HashMap::new(),
3398            cmek: None,
3399            source_attached_function_id: None,
3400        };
3401
3402        // Override the #embedding key with custom settings
3403        let embedding_override = ValueTypes {
3404            float_list: Some(FloatListValueType {
3405                vector_index: Some(VectorIndexType {
3406                    enabled: false, // Override default enabled=true to false
3407                    config: VectorIndexConfig {
3408                        space: Some(Space::Ip), // Override default space
3409                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3410                        source_key: Some("custom_embedding_key".to_string()),
3411                        hnsw: None,
3412                        spann: None,
3413                    },
3414                }),
3415            }),
3416            ..Default::default()
3417        };
3418        user_schema
3419            .keys
3420            .insert(EMBEDDING_KEY.to_string(), embedding_override);
3421
3422        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3423
3424        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3425        let vector_config = &embedding_config
3426            .float_list
3427            .as_ref()
3428            .unwrap()
3429            .vector_index
3430            .as_ref()
3431            .unwrap();
3432
3433        // Check user overrides took precedence
3434        assert!(!vector_config.enabled);
3435        assert_eq!(vector_config.config.space, Some(Space::Ip));
3436        assert_eq!(
3437            vector_config.config.source_key,
3438            Some("custom_embedding_key".to_string())
3439        );
3440    }
3441
3442    #[test]
3443    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3444        let collection_config = InternalCollectionConfiguration {
3445            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3446                space: Space::Cosine,
3447                ef_construction: 128,
3448                ef_search: 96,
3449                max_neighbors: 42,
3450                num_threads: 8,
3451                resize_factor: 1.5,
3452                sync_threshold: 2_000,
3453                batch_size: 256,
3454            }),
3455            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3456                EmbeddingFunctionNewConfiguration {
3457                    name: "custom".to_string(),
3458                    config: json!({"alpha": 1}),
3459                },
3460            )),
3461        };
3462
3463        let schema = Schema::try_from(&collection_config).unwrap();
3464        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3465
3466        assert_eq!(reconstructed, collection_config);
3467    }
3468
3469    #[test]
3470    fn test_convert_schema_to_collection_config_spann_roundtrip() {
3471        let spann_config = InternalSpannConfiguration {
3472            space: Space::Cosine,
3473            search_nprobe: 11,
3474            search_rng_factor: 1.7,
3475            write_nprobe: 5,
3476            nreplica_count: 3,
3477            split_threshold: 150,
3478            merge_threshold: 80,
3479            ef_construction: 120,
3480            ef_search: 90,
3481            max_neighbors: 40,
3482            ..Default::default()
3483        };
3484
3485        let collection_config = InternalCollectionConfiguration {
3486            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3487            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3488                EmbeddingFunctionNewConfiguration {
3489                    name: "custom".to_string(),
3490                    config: json!({"beta": true}),
3491                },
3492            )),
3493        };
3494
3495        let schema = Schema::try_from(&collection_config).unwrap();
3496        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3497
3498        assert_eq!(reconstructed, collection_config);
3499    }
3500
3501    #[test]
3502    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3503        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3504        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3505            if let Some(float_list) = &mut embedding.float_list {
3506                if let Some(vector_index) = &mut float_list.vector_index {
3507                    vector_index.config.spann = Some(SpannIndexConfig {
3508                        search_nprobe: Some(1),
3509                        search_rng_factor: Some(1.0),
3510                        search_rng_epsilon: Some(0.1),
3511                        nreplica_count: Some(1),
3512                        write_rng_factor: Some(1.0),
3513                        write_rng_epsilon: Some(0.1),
3514                        split_threshold: Some(100),
3515                        num_samples_kmeans: Some(10),
3516                        initial_lambda: Some(0.5),
3517                        reassign_neighbor_count: Some(10),
3518                        merge_threshold: Some(50),
3519                        num_centers_to_merge_to: Some(3),
3520                        write_nprobe: Some(1),
3521                        ef_construction: Some(50),
3522                        ef_search: Some(40),
3523                        max_neighbors: Some(20),
3524                        center_drift_threshold: None,
3525                        quantize: Quantization::None,
3526                    });
3527                }
3528            }
3529        }
3530
3531        let result = InternalCollectionConfiguration::try_from(&schema);
3532        assert!(result.is_err());
3533    }
3534
3535    #[test]
3536    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3537        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3538        let before = schema.clone();
3539        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3540        assert!(!modified);
3541        assert_eq!(schema, before);
3542    }
3543
3544    #[test]
3545    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3546        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3547        assert!(!schema.keys.contains_key("custom_field"));
3548
3549        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3550
3551        assert!(modified);
3552        let entry = schema
3553            .keys
3554            .get("custom_field")
3555            .expect("expected new key override to be inserted");
3556        assert_eq!(entry.boolean, schema.defaults.boolean);
3557        assert!(entry.string.is_none());
3558        assert!(entry.int.is_none());
3559        assert!(entry.float.is_none());
3560        assert!(entry.float_list.is_none());
3561        assert!(entry.sparse_vector.is_none());
3562    }
3563
3564    #[test]
3565    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3566        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3567        let initial_len = schema.keys.len();
3568        schema.keys.insert(
3569            "custom_field".to_string(),
3570            ValueTypes {
3571                string: schema.defaults.string.clone(),
3572                ..Default::default()
3573            },
3574        );
3575
3576        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3577
3578        assert!(modified);
3579        assert_eq!(schema.keys.len(), initial_len + 1);
3580        let entry = schema
3581            .keys
3582            .get("custom_field")
3583            .expect("expected key override to exist after ensure call");
3584        assert!(entry.string.is_some());
3585        assert_eq!(entry.boolean, schema.defaults.boolean);
3586    }
3587
3588    #[test]
3589    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3590        let schema = Schema::new_default(KnnIndex::Spann);
3591        let result = schema.is_knn_key_indexing_enabled(
3592            "custom_sparse",
3593            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3594        );
3595
3596        let err = result.expect_err("expected indexing disabled error");
3597        match err {
3598            FilterValidationError::IndexingDisabled { key, value_type } => {
3599                assert_eq!(key, "custom_sparse");
3600                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3601            }
3602            other => panic!("unexpected error variant: {other:?}"),
3603        }
3604    }
3605
3606    #[test]
3607    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3608        let mut schema = Schema::new_default(KnnIndex::Spann);
3609        schema.keys.insert(
3610            "sparse_enabled".to_string(),
3611            ValueTypes {
3612                sparse_vector: Some(SparseVectorValueType {
3613                    sparse_vector_index: Some(SparseVectorIndexType {
3614                        enabled: true,
3615                        config: SparseVectorIndexConfig {
3616                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3617                            source_key: None,
3618                            bm25: None,
3619                        },
3620                    }),
3621                }),
3622                ..Default::default()
3623            },
3624        );
3625
3626        let result = schema.is_knn_key_indexing_enabled(
3627            "sparse_enabled",
3628            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3629        );
3630
3631        assert!(result.is_ok());
3632    }
3633
3634    #[test]
3635    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3636        let schema = Schema::new_default(KnnIndex::Spann);
3637        let result = schema.is_knn_key_indexing_enabled(
3638            EMBEDDING_KEY,
3639            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3640        );
3641
3642        assert!(result.is_ok());
3643    }
3644
3645    #[test]
3646    fn test_merge_hnsw_configs_field_level() {
3647        // Test field-level merging for HNSW configurations
3648        let default_hnsw = HnswIndexConfig {
3649            ef_construction: Some(200),
3650            max_neighbors: Some(16),
3651            ef_search: Some(10),
3652            num_threads: Some(4),
3653            batch_size: Some(100),
3654            sync_threshold: Some(1000),
3655            resize_factor: Some(1.2),
3656        };
3657
3658        let user_hnsw = HnswIndexConfig {
3659            ef_construction: Some(300), // Override
3660            max_neighbors: None,        // Will use default
3661            ef_search: Some(20),        // Override
3662            num_threads: None,          // Will use default
3663            batch_size: None,           // Will use default
3664            sync_threshold: Some(2000), // Override
3665            resize_factor: None,        // Will use default
3666        };
3667
3668        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3669
3670        // Check user overrides
3671        assert_eq!(result.ef_construction, Some(300));
3672        assert_eq!(result.ef_search, Some(20));
3673        assert_eq!(result.sync_threshold, Some(2000));
3674
3675        // Check defaults preserved
3676        assert_eq!(result.max_neighbors, Some(16));
3677        assert_eq!(result.num_threads, Some(4));
3678        assert_eq!(result.batch_size, Some(100));
3679        assert_eq!(result.resize_factor, Some(1.2));
3680    }
3681
3682    #[test]
3683    fn test_merge_spann_configs_field_level() {
3684        // Test field-level merging for SPANN configurations
3685        let default_spann = SpannIndexConfig {
3686            search_nprobe: Some(10),
3687            search_rng_factor: Some(1.0),  // Must be exactly 1.0
3688            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
3689            nreplica_count: Some(3),
3690            write_rng_factor: Some(1.0),  // Must be exactly 1.0
3691            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
3692            split_threshold: Some(100),   // Must be 50-200
3693            num_samples_kmeans: Some(100),
3694            initial_lambda: Some(100.0), // Must be exactly 100.0
3695            reassign_neighbor_count: Some(50),
3696            merge_threshold: Some(50),        // Must be 25-100
3697            num_centers_to_merge_to: Some(4), // Max is 8
3698            write_nprobe: Some(5),
3699            ef_construction: Some(100),
3700            ef_search: Some(10),
3701            max_neighbors: Some(16),
3702            center_drift_threshold: None,
3703            quantize: Quantization::None,
3704        };
3705
3706        let user_spann = SpannIndexConfig {
3707            search_nprobe: Some(20),       // Override
3708            search_rng_factor: None,       // Will use default
3709            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
3710            nreplica_count: None,          // Will use default
3711            write_rng_factor: None,
3712            write_rng_epsilon: None,
3713            split_threshold: Some(150), // Override (valid: 50-200)
3714            num_samples_kmeans: None,
3715            initial_lambda: None,
3716            reassign_neighbor_count: None,
3717            merge_threshold: None,
3718            num_centers_to_merge_to: None,
3719            write_nprobe: None,
3720            ef_construction: None,
3721            ef_search: None,
3722            max_neighbors: None,
3723            center_drift_threshold: None,
3724            quantize: Quantization::None,
3725        };
3726
3727        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann))
3728            .unwrap()
3729            .unwrap();
3730
3731        // Check user overrides
3732        assert_eq!(result.search_nprobe, Some(20));
3733        assert_eq!(result.search_rng_epsilon, Some(8.0));
3734        assert_eq!(result.split_threshold, Some(150));
3735
3736        // Check defaults preserved
3737        assert_eq!(result.search_rng_factor, Some(1.0));
3738        assert_eq!(result.nreplica_count, Some(3));
3739        assert_eq!(result.initial_lambda, Some(100.0));
3740    }
3741
3742    #[test]
3743    fn test_merge_spann_configs_rejects_quantize_true() {
3744        // Test that merge_spann_configs rejects quantize: true in user schema
3745        let default_spann = SpannIndexConfig {
3746            search_nprobe: Some(10),
3747            search_rng_factor: Some(1.0),
3748            search_rng_epsilon: Some(7.0),
3749            nreplica_count: Some(3),
3750            write_rng_factor: Some(1.0),
3751            write_rng_epsilon: Some(6.0),
3752            split_threshold: Some(100),
3753            num_samples_kmeans: Some(100),
3754            initial_lambda: Some(100.0),
3755            reassign_neighbor_count: Some(50),
3756            merge_threshold: Some(50),
3757            num_centers_to_merge_to: Some(4),
3758            write_nprobe: Some(5),
3759            ef_construction: Some(100),
3760            ef_search: Some(10),
3761            max_neighbors: Some(16),
3762            center_drift_threshold: None,
3763            quantize: Quantization::None,
3764        };
3765
3766        let user_spann_with_quantize = SpannIndexConfig {
3767            search_nprobe: Some(20),
3768            search_rng_factor: None,
3769            search_rng_epsilon: Some(8.0),
3770            nreplica_count: None,
3771            write_rng_factor: None,
3772            write_rng_epsilon: None,
3773            split_threshold: Some(150),
3774            num_samples_kmeans: None,
3775            initial_lambda: None,
3776            reassign_neighbor_count: None,
3777            merge_threshold: None,
3778            num_centers_to_merge_to: None,
3779            write_nprobe: None,
3780            ef_construction: None,
3781            ef_search: None,
3782            max_neighbors: None,
3783            center_drift_threshold: None,
3784            quantize: Quantization::FourBitRabitQWithUSearch, // This should be rejected
3785        };
3786
3787        // Should reject user schema with quantize: true
3788        let result =
3789            Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann_with_quantize));
3790        assert!(result.is_err());
3791        match result {
3792            Err(SchemaError::InvalidUserInput { reason }) => {
3793                assert!(reason.contains("quantize field cannot be set"));
3794            }
3795            _ => panic!("Expected InvalidUserInput error"),
3796        }
3797
3798        // Should reject default schema with quantize: true
3799        let default_spann_with_quantize = SpannIndexConfig {
3800            search_nprobe: Some(10),
3801            search_rng_factor: Some(1.0),
3802            search_rng_epsilon: Some(7.0),
3803            nreplica_count: Some(3),
3804            write_rng_factor: Some(1.0),
3805            write_rng_epsilon: Some(6.0),
3806            split_threshold: Some(100),
3807            num_samples_kmeans: Some(100),
3808            initial_lambda: Some(100.0),
3809            reassign_neighbor_count: Some(50),
3810            merge_threshold: Some(50),
3811            num_centers_to_merge_to: Some(4),
3812            write_nprobe: Some(5),
3813            ef_construction: Some(100),
3814            ef_search: Some(10),
3815            max_neighbors: Some(16),
3816            center_drift_threshold: None,
3817            quantize: Quantization::FourBitRabitQWithUSearch, // This should be rejected
3818        };
3819
3820        let result = Schema::merge_spann_configs(Some(&default_spann_with_quantize), None);
3821        assert!(result.is_err());
3822        match result {
3823            Err(SchemaError::InvalidUserInput { reason }) => {
3824                assert!(reason.contains("quantize field cannot be set"));
3825            }
3826            _ => panic!("Expected InvalidUserInput error"),
3827        }
3828
3829        // Should reject user-only schema with quantize: true
3830        let result = Schema::merge_spann_configs(None, Some(&user_spann_with_quantize));
3831        assert!(result.is_err());
3832        match result {
3833            Err(SchemaError::InvalidUserInput { reason }) => {
3834                assert!(reason.contains("quantize field cannot be set"));
3835            }
3836            _ => panic!("Expected InvalidUserInput error"),
3837        }
3838    }
3839
3840    #[test]
3841    fn test_spann_index_config_into_internal_configuration() {
3842        let config = SpannIndexConfig {
3843            search_nprobe: Some(33),
3844            search_rng_factor: Some(1.2),
3845            search_rng_epsilon: None,
3846            nreplica_count: None,
3847            write_rng_factor: Some(1.5),
3848            write_rng_epsilon: None,
3849            split_threshold: Some(75),
3850            num_samples_kmeans: None,
3851            initial_lambda: Some(0.9),
3852            reassign_neighbor_count: Some(40),
3853            merge_threshold: None,
3854            num_centers_to_merge_to: Some(4),
3855            write_nprobe: Some(60),
3856            ef_construction: Some(180),
3857            ef_search: Some(170),
3858            max_neighbors: Some(32),
3859            center_drift_threshold: None,
3860            quantize: Quantization::None,
3861        };
3862
3863        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3864        assert_eq!(with_space.space, Space::Cosine);
3865        assert_eq!(with_space.search_nprobe, 33);
3866        assert_eq!(with_space.search_rng_factor, 1.2);
3867        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3868        assert_eq!(with_space.write_rng_factor, 1.5);
3869        assert_eq!(with_space.write_nprobe, 60);
3870        assert_eq!(with_space.ef_construction, 180);
3871        assert_eq!(with_space.ef_search, 170);
3872        assert_eq!(with_space.max_neighbors, 32);
3873        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3874
3875        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3876        assert_eq!(default_space_config.space, default_space());
3877    }
3878
3879    #[test]
3880    fn test_merge_string_type_combinations() {
3881        // Test all combinations of default and user StringValueType
3882
3883        // Both Some - should merge
3884        let default = StringValueType {
3885            string_inverted_index: Some(StringInvertedIndexType {
3886                enabled: true,
3887                config: StringInvertedIndexConfig {},
3888            }),
3889            fts_index: Some(FtsIndexType {
3890                enabled: false,
3891                config: FtsIndexConfig {},
3892            }),
3893        };
3894
3895        let user = StringValueType {
3896            string_inverted_index: Some(StringInvertedIndexType {
3897                enabled: false, // Override
3898                config: StringInvertedIndexConfig {},
3899            }),
3900            fts_index: None, // Will use default
3901        };
3902
3903        let result = Schema::merge_string_type(Some(&default), Some(&user))
3904            .unwrap()
3905            .unwrap();
3906        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3907        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3908
3909        // Default Some, User None - should return default
3910        let result = Schema::merge_string_type(Some(&default), None)
3911            .unwrap()
3912            .unwrap();
3913        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3914
3915        // Default None, User Some - should return user
3916        let result = Schema::merge_string_type(None, Some(&user))
3917            .unwrap()
3918            .unwrap();
3919        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3920
3921        // Both None - should return None
3922        let result = Schema::merge_string_type(None, None).unwrap();
3923        assert!(result.is_none());
3924    }
3925
3926    #[test]
3927    fn test_merge_vector_index_config_comprehensive() {
3928        // Test comprehensive vector index config merging
3929        let default_config = VectorIndexConfig {
3930            space: Some(Space::Cosine),
3931            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3932            source_key: Some("default_key".to_string()),
3933            hnsw: Some(HnswIndexConfig {
3934                ef_construction: Some(200),
3935                max_neighbors: Some(16),
3936                ef_search: Some(10),
3937                num_threads: Some(4),
3938                batch_size: Some(100),
3939                sync_threshold: Some(1000),
3940                resize_factor: Some(1.2),
3941            }),
3942            spann: None,
3943        };
3944
3945        let user_config = VectorIndexConfig {
3946            space: Some(Space::L2),                   // Override
3947            embedding_function: None,                 // Will use default
3948            source_key: Some("user_key".to_string()), // Override
3949            hnsw: Some(HnswIndexConfig {
3950                ef_construction: Some(300), // Override
3951                max_neighbors: None,        // Will use default
3952                ef_search: None,            // Will use default
3953                num_threads: None,
3954                batch_size: None,
3955                sync_threshold: None,
3956                resize_factor: None,
3957            }),
3958            spann: Some(SpannIndexConfig {
3959                search_nprobe: Some(15),
3960                search_rng_factor: None,
3961                search_rng_epsilon: None,
3962                nreplica_count: None,
3963                write_rng_factor: None,
3964                write_rng_epsilon: None,
3965                split_threshold: None,
3966                num_samples_kmeans: None,
3967                initial_lambda: None,
3968                reassign_neighbor_count: None,
3969                merge_threshold: None,
3970                num_centers_to_merge_to: None,
3971                write_nprobe: None,
3972                ef_construction: None,
3973                ef_search: None,
3974                max_neighbors: None,
3975                center_drift_threshold: None,
3976                quantize: Quantization::None,
3977            }), // Add SPANN config
3978        };
3979
3980        let result =
3981            Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw)
3982                .expect("merge should succeed");
3983
3984        // Check field-level merging
3985        assert_eq!(result.space, Some(Space::L2)); // User override
3986        assert_eq!(
3987            result.embedding_function,
3988            Some(EmbeddingFunctionConfiguration::Legacy)
3989        ); // Default preserved
3990        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3991
3992        // Check HNSW merging
3993        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3994        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3995
3996        // Check SPANN is not present, since merging in the context of HNSW
3997        assert!(result.spann.is_none());
3998    }
3999
4000    #[test]
4001    fn test_merge_sparse_vector_index_config() {
4002        // Test sparse vector index config merging
4003        let default_config = SparseVectorIndexConfig {
4004            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4005            source_key: Some("default_sparse_key".to_string()),
4006            bm25: None,
4007        };
4008
4009        let user_config = SparseVectorIndexConfig {
4010            embedding_function: None,                        // Will use default
4011            source_key: Some("user_sparse_key".to_string()), // Override
4012            bm25: None,
4013        };
4014
4015        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
4016
4017        // Check user override
4018        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
4019        // Check default preserved
4020        assert_eq!(
4021            result.embedding_function,
4022            Some(EmbeddingFunctionConfiguration::Legacy)
4023        );
4024    }
4025
4026    #[test]
4027    fn test_complex_nested_merging_scenario() {
4028        // Test a complex scenario with multiple levels of merging
4029        let mut user_schema = Schema {
4030            defaults: ValueTypes::default(),
4031            keys: HashMap::new(),
4032            cmek: None,
4033            source_attached_function_id: None,
4034        };
4035
4036        // Set up complex user defaults
4037        user_schema.defaults.string = Some(StringValueType {
4038            string_inverted_index: Some(StringInvertedIndexType {
4039                enabled: false,
4040                config: StringInvertedIndexConfig {},
4041            }),
4042            fts_index: Some(FtsIndexType {
4043                enabled: true,
4044                config: FtsIndexConfig {},
4045            }),
4046        });
4047
4048        user_schema.defaults.float_list = Some(FloatListValueType {
4049            vector_index: Some(VectorIndexType {
4050                enabled: true,
4051                config: VectorIndexConfig {
4052                    space: Some(Space::Ip),
4053                    embedding_function: None, // Will use default
4054                    source_key: Some("custom_vector_key".to_string()),
4055                    hnsw: Some(HnswIndexConfig {
4056                        ef_construction: Some(400),
4057                        max_neighbors: Some(32),
4058                        ef_search: None, // Will use default
4059                        num_threads: None,
4060                        batch_size: None,
4061                        sync_threshold: None,
4062                        resize_factor: None,
4063                    }),
4064                    spann: None,
4065                },
4066            }),
4067        });
4068
4069        // Set up key overrides
4070        let custom_key_override = ValueTypes {
4071            string: Some(StringValueType {
4072                fts_index: Some(FtsIndexType {
4073                    enabled: true,
4074                    config: FtsIndexConfig {},
4075                }),
4076                string_inverted_index: None,
4077            }),
4078            ..Default::default()
4079        };
4080        user_schema
4081            .keys
4082            .insert("custom_field".to_string(), custom_key_override);
4083
4084        // Use HNSW defaults for this test so we have HNSW config to merge with
4085        let result = {
4086            let default_schema = Schema::new_default(KnnIndex::Hnsw);
4087            let merged_defaults = Schema::merge_value_types(
4088                &default_schema.defaults,
4089                &user_schema.defaults,
4090                KnnIndex::Hnsw,
4091            )
4092            .unwrap();
4093            let mut merged_keys = default_schema.keys.clone();
4094            for (key, user_value_types) in user_schema.keys {
4095                if let Some(default_value_types) = merged_keys.get(&key) {
4096                    let merged_value_types = Schema::merge_value_types(
4097                        default_value_types,
4098                        &user_value_types,
4099                        KnnIndex::Hnsw,
4100                    )
4101                    .unwrap();
4102                    merged_keys.insert(key, merged_value_types);
4103                } else {
4104                    merged_keys.insert(key, user_value_types);
4105                }
4106            }
4107            Schema {
4108                defaults: merged_defaults,
4109                keys: merged_keys,
4110                cmek: None,
4111                source_attached_function_id: None,
4112            }
4113        };
4114
4115        // Verify complex merging worked correctly
4116
4117        // Check defaults merging
4118        assert!(
4119            !result
4120                .defaults
4121                .string
4122                .as_ref()
4123                .unwrap()
4124                .string_inverted_index
4125                .as_ref()
4126                .unwrap()
4127                .enabled
4128        );
4129        assert!(
4130            result
4131                .defaults
4132                .string
4133                .as_ref()
4134                .unwrap()
4135                .fts_index
4136                .as_ref()
4137                .unwrap()
4138                .enabled
4139        );
4140
4141        let vector_config = &result
4142            .defaults
4143            .float_list
4144            .as_ref()
4145            .unwrap()
4146            .vector_index
4147            .as_ref()
4148            .unwrap()
4149            .config;
4150        assert_eq!(vector_config.space, Some(Space::Ip));
4151        assert_eq!(vector_config.embedding_function, None); // Default preserved
4152        assert_eq!(
4153            vector_config.source_key,
4154            Some("custom_vector_key".to_string())
4155        );
4156        assert_eq!(
4157            vector_config.hnsw.as_ref().unwrap().ef_construction,
4158            Some(400)
4159        );
4160        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
4161        assert_eq!(
4162            vector_config.hnsw.as_ref().unwrap().ef_search,
4163            Some(default_search_ef())
4164        ); // Default preserved
4165
4166        // Check key overrides
4167        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
4168        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
4169        assert!(result.keys.contains_key("custom_field")); // User added
4170
4171        let custom_override = result.keys.get("custom_field").unwrap();
4172        assert!(
4173            custom_override
4174                .string
4175                .as_ref()
4176                .unwrap()
4177                .fts_index
4178                .as_ref()
4179                .unwrap()
4180                .enabled
4181        );
4182        assert!(custom_override
4183            .string
4184            .as_ref()
4185            .unwrap()
4186            .string_inverted_index
4187            .is_none());
4188    }
4189
4190    #[test]
4191    fn test_reconcile_with_collection_config_default_config() {
4192        // Test that when collection config is default, schema is returned as-is
4193        let collection_config = InternalCollectionConfiguration::default_hnsw();
4194        let schema = Schema::try_from(&collection_config).unwrap();
4195
4196        let result =
4197            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4198                .unwrap();
4199        assert_eq!(result, schema);
4200    }
4201
4202    // Test all 8 cases of double default scenarios
4203    #[test]
4204    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
4205        let collection_config = InternalCollectionConfiguration::default_hnsw();
4206        let schema = Schema::new_default(KnnIndex::Hnsw);
4207        let result =
4208            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4209                .unwrap();
4210
4211        // Should create new schema with default_knn_index (Hnsw)
4212        assert!(result.defaults.float_list.is_some());
4213        assert!(result
4214            .defaults
4215            .float_list
4216            .as_ref()
4217            .unwrap()
4218            .vector_index
4219            .as_ref()
4220            .unwrap()
4221            .config
4222            .hnsw
4223            .is_some());
4224        assert!(result
4225            .defaults
4226            .float_list
4227            .as_ref()
4228            .unwrap()
4229            .vector_index
4230            .as_ref()
4231            .unwrap()
4232            .config
4233            .spann
4234            .is_none());
4235    }
4236
4237    #[test]
4238    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
4239        let collection_config = InternalCollectionConfiguration::default_hnsw();
4240        let schema = Schema::new_default(KnnIndex::Hnsw);
4241        let result =
4242            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4243                .unwrap();
4244
4245        // Should create new schema with default_knn_index (Spann)
4246        assert!(result.defaults.float_list.is_some());
4247        assert!(result
4248            .defaults
4249            .float_list
4250            .as_ref()
4251            .unwrap()
4252            .vector_index
4253            .as_ref()
4254            .unwrap()
4255            .config
4256            .spann
4257            .is_some());
4258        assert!(result
4259            .defaults
4260            .float_list
4261            .as_ref()
4262            .unwrap()
4263            .vector_index
4264            .as_ref()
4265            .unwrap()
4266            .config
4267            .hnsw
4268            .is_none());
4269    }
4270
4271    #[test]
4272    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
4273        let collection_config = InternalCollectionConfiguration::default_hnsw();
4274        let schema = Schema::new_default(KnnIndex::Spann);
4275        let result =
4276            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4277                .unwrap();
4278
4279        // Should create new schema with default_knn_index (Hnsw)
4280        assert!(result.defaults.float_list.is_some());
4281        assert!(result
4282            .defaults
4283            .float_list
4284            .as_ref()
4285            .unwrap()
4286            .vector_index
4287            .as_ref()
4288            .unwrap()
4289            .config
4290            .hnsw
4291            .is_some());
4292        assert!(result
4293            .defaults
4294            .float_list
4295            .as_ref()
4296            .unwrap()
4297            .vector_index
4298            .as_ref()
4299            .unwrap()
4300            .config
4301            .spann
4302            .is_none());
4303    }
4304
4305    #[test]
4306    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
4307        let collection_config = InternalCollectionConfiguration::default_hnsw();
4308        let schema = Schema::new_default(KnnIndex::Spann);
4309        let result =
4310            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4311                .unwrap();
4312
4313        // Should create new schema with default_knn_index (Spann)
4314        assert!(result.defaults.float_list.is_some());
4315        assert!(result
4316            .defaults
4317            .float_list
4318            .as_ref()
4319            .unwrap()
4320            .vector_index
4321            .as_ref()
4322            .unwrap()
4323            .config
4324            .spann
4325            .is_some());
4326        assert!(result
4327            .defaults
4328            .float_list
4329            .as_ref()
4330            .unwrap()
4331            .vector_index
4332            .as_ref()
4333            .unwrap()
4334            .config
4335            .hnsw
4336            .is_none());
4337    }
4338
4339    #[test]
4340    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
4341        let collection_config = InternalCollectionConfiguration::default_spann();
4342        let schema = Schema::new_default(KnnIndex::Spann);
4343        let result =
4344            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4345                .unwrap();
4346
4347        // Should create new schema with default_knn_index (Hnsw)
4348        assert!(result.defaults.float_list.is_some());
4349        assert!(result
4350            .defaults
4351            .float_list
4352            .as_ref()
4353            .unwrap()
4354            .vector_index
4355            .as_ref()
4356            .unwrap()
4357            .config
4358            .hnsw
4359            .is_some());
4360        assert!(result
4361            .defaults
4362            .float_list
4363            .as_ref()
4364            .unwrap()
4365            .vector_index
4366            .as_ref()
4367            .unwrap()
4368            .config
4369            .spann
4370            .is_none());
4371    }
4372
4373    #[test]
4374    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
4375        let collection_config = InternalCollectionConfiguration::default_spann();
4376        let schema = Schema::new_default(KnnIndex::Spann);
4377        let result =
4378            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4379                .unwrap();
4380
4381        // Should create new schema with default_knn_index (Spann)
4382        assert!(result.defaults.float_list.is_some());
4383        assert!(result
4384            .defaults
4385            .float_list
4386            .as_ref()
4387            .unwrap()
4388            .vector_index
4389            .as_ref()
4390            .unwrap()
4391            .config
4392            .spann
4393            .is_some());
4394        assert!(result
4395            .defaults
4396            .float_list
4397            .as_ref()
4398            .unwrap()
4399            .vector_index
4400            .as_ref()
4401            .unwrap()
4402            .config
4403            .hnsw
4404            .is_none());
4405        // Defaults should have source_key=None
4406        assert_eq!(
4407            result
4408                .defaults
4409                .float_list
4410                .as_ref()
4411                .unwrap()
4412                .vector_index
4413                .as_ref()
4414                .unwrap()
4415                .config
4416                .source_key,
4417            None
4418        );
4419    }
4420
4421    #[test]
4422    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
4423        let collection_config = InternalCollectionConfiguration::default_spann();
4424        let schema = Schema::new_default(KnnIndex::Hnsw);
4425        let result =
4426            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4427                .unwrap();
4428
4429        // Should create new schema with default_knn_index (Hnsw)
4430        assert!(result.defaults.float_list.is_some());
4431        assert!(result
4432            .defaults
4433            .float_list
4434            .as_ref()
4435            .unwrap()
4436            .vector_index
4437            .as_ref()
4438            .unwrap()
4439            .config
4440            .hnsw
4441            .is_some());
4442        assert!(result
4443            .defaults
4444            .float_list
4445            .as_ref()
4446            .unwrap()
4447            .vector_index
4448            .as_ref()
4449            .unwrap()
4450            .config
4451            .spann
4452            .is_none());
4453    }
4454
4455    #[test]
4456    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
4457        let collection_config = InternalCollectionConfiguration::default_spann();
4458        let schema = Schema::new_default(KnnIndex::Hnsw);
4459        let result =
4460            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4461                .unwrap();
4462
4463        // Should create new schema with default_knn_index (Spann)
4464        assert!(result.defaults.float_list.is_some());
4465        assert!(result
4466            .defaults
4467            .float_list
4468            .as_ref()
4469            .unwrap()
4470            .vector_index
4471            .as_ref()
4472            .unwrap()
4473            .config
4474            .spann
4475            .is_some());
4476        assert!(result
4477            .defaults
4478            .float_list
4479            .as_ref()
4480            .unwrap()
4481            .vector_index
4482            .as_ref()
4483            .unwrap()
4484            .config
4485            .hnsw
4486            .is_none());
4487    }
4488
4489    #[test]
4490    fn test_defaults_source_key_not_document() {
4491        // Test that defaults.float_list.vector_index.config.source_key is None, not DOCUMENT_KEY
4492        let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4493        let schema_spann = Schema::new_default(KnnIndex::Spann);
4494
4495        // Check HNSW default schema
4496        let defaults_hnsw = schema_hnsw
4497            .defaults
4498            .float_list
4499            .as_ref()
4500            .unwrap()
4501            .vector_index
4502            .as_ref()
4503            .unwrap();
4504        assert_eq!(defaults_hnsw.config.source_key, None);
4505
4506        // Check Spann default schema
4507        let defaults_spann = schema_spann
4508            .defaults
4509            .float_list
4510            .as_ref()
4511            .unwrap()
4512            .vector_index
4513            .as_ref()
4514            .unwrap();
4515        assert_eq!(defaults_spann.config.source_key, None);
4516
4517        // Test after reconcile with NON-default collection config
4518        // This path calls try_from where our fix is
4519        let collection_config_hnsw = InternalCollectionConfiguration {
4520            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4521                ef_construction: 300,
4522                max_neighbors: 32,
4523                ef_search: 50,
4524                num_threads: 8,
4525                batch_size: 200,
4526                sync_threshold: 2000,
4527                resize_factor: 1.5,
4528                space: Space::L2,
4529            }),
4530            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4531        };
4532        let result_hnsw = Schema::reconcile_with_collection_config(
4533            &schema_hnsw,
4534            &collection_config_hnsw,
4535            KnnIndex::Hnsw,
4536        )
4537        .unwrap();
4538        let reconciled_defaults_hnsw = result_hnsw
4539            .defaults
4540            .float_list
4541            .as_ref()
4542            .unwrap()
4543            .vector_index
4544            .as_ref()
4545            .unwrap();
4546        assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4547
4548        let collection_config_spann = InternalCollectionConfiguration {
4549            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4550                search_nprobe: 20,
4551                search_rng_factor: 3.0,
4552                search_rng_epsilon: 0.2,
4553                nreplica_count: 5,
4554                write_rng_factor: 2.0,
4555                write_rng_epsilon: 0.1,
4556                split_threshold: 2000,
4557                num_samples_kmeans: 200,
4558                initial_lambda: 0.8,
4559                reassign_neighbor_count: 100,
4560                merge_threshold: 800,
4561                num_centers_to_merge_to: 20,
4562                write_nprobe: 10,
4563                ef_construction: 400,
4564                ef_search: 60,
4565                max_neighbors: 24,
4566                space: Space::Cosine,
4567            }),
4568            embedding_function: None,
4569        };
4570        let result_spann = Schema::reconcile_with_collection_config(
4571            &schema_spann,
4572            &collection_config_spann,
4573            KnnIndex::Spann,
4574        )
4575        .unwrap();
4576        let reconciled_defaults_spann = result_spann
4577            .defaults
4578            .float_list
4579            .as_ref()
4580            .unwrap()
4581            .vector_index
4582            .as_ref()
4583            .unwrap();
4584        assert_eq!(reconciled_defaults_spann.config.source_key, None);
4585
4586        // Verify that #embedding key DOES have source_key set to DOCUMENT_KEY
4587        let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4588        let embedding_vector_index_hnsw = embedding_hnsw
4589            .float_list
4590            .as_ref()
4591            .unwrap()
4592            .vector_index
4593            .as_ref()
4594            .unwrap();
4595        assert_eq!(
4596            embedding_vector_index_hnsw.config.source_key,
4597            Some(DOCUMENT_KEY.to_string())
4598        );
4599
4600        let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4601        let embedding_vector_index_spann = embedding_spann
4602            .float_list
4603            .as_ref()
4604            .unwrap()
4605            .vector_index
4606            .as_ref()
4607            .unwrap();
4608        assert_eq!(
4609            embedding_vector_index_spann.config.source_key,
4610            Some(DOCUMENT_KEY.to_string())
4611        );
4612    }
4613
4614    #[test]
4615    fn test_try_from_source_key() {
4616        // Direct test of try_from to verify source_key behavior
4617        // Defaults should have source_key=None, #embedding should have source_key=DOCUMENT_KEY
4618
4619        // Test with HNSW config
4620        let collection_config_hnsw = InternalCollectionConfiguration {
4621            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4622                ef_construction: 300,
4623                max_neighbors: 32,
4624                ef_search: 50,
4625                num_threads: 8,
4626                batch_size: 200,
4627                sync_threshold: 2000,
4628                resize_factor: 1.5,
4629                space: Space::L2,
4630            }),
4631            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4632        };
4633        let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4634
4635        // Check defaults have source_key=None
4636        let defaults_hnsw = schema_hnsw
4637            .defaults
4638            .float_list
4639            .as_ref()
4640            .unwrap()
4641            .vector_index
4642            .as_ref()
4643            .unwrap();
4644        assert_eq!(defaults_hnsw.config.source_key, None);
4645
4646        // Check #embedding has source_key=DOCUMENT_KEY
4647        let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4648        let embedding_vector_index_hnsw = embedding_hnsw
4649            .float_list
4650            .as_ref()
4651            .unwrap()
4652            .vector_index
4653            .as_ref()
4654            .unwrap();
4655        assert_eq!(
4656            embedding_vector_index_hnsw.config.source_key,
4657            Some(DOCUMENT_KEY.to_string())
4658        );
4659
4660        // Test with Spann config
4661        let collection_config_spann = InternalCollectionConfiguration {
4662            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4663                search_nprobe: 20,
4664                search_rng_factor: 3.0,
4665                search_rng_epsilon: 0.2,
4666                nreplica_count: 5,
4667                write_rng_factor: 2.0,
4668                write_rng_epsilon: 0.1,
4669                split_threshold: 2000,
4670                num_samples_kmeans: 200,
4671                initial_lambda: 0.8,
4672                reassign_neighbor_count: 100,
4673                merge_threshold: 800,
4674                num_centers_to_merge_to: 20,
4675                write_nprobe: 10,
4676                ef_construction: 400,
4677                ef_search: 60,
4678                max_neighbors: 24,
4679                space: Space::Cosine,
4680            }),
4681            embedding_function: None,
4682        };
4683        let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4684
4685        // Check defaults have source_key=None
4686        let defaults_spann = schema_spann
4687            .defaults
4688            .float_list
4689            .as_ref()
4690            .unwrap()
4691            .vector_index
4692            .as_ref()
4693            .unwrap();
4694        assert_eq!(defaults_spann.config.source_key, None);
4695
4696        // Check #embedding has source_key=DOCUMENT_KEY
4697        let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4698        let embedding_vector_index_spann = embedding_spann
4699            .float_list
4700            .as_ref()
4701            .unwrap()
4702            .vector_index
4703            .as_ref()
4704            .unwrap();
4705        assert_eq!(
4706            embedding_vector_index_spann.config.source_key,
4707            Some(DOCUMENT_KEY.to_string())
4708        );
4709    }
4710
4711    #[test]
4712    fn test_default_hnsw_with_default_embedding_function() {
4713        // Test that when InternalCollectionConfiguration is default HNSW but has
4714        // an embedding function with name "default" and config as {}, it still
4715        // goes through the double default path and preserves source_key behavior
4716        use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4717
4718        let collection_config = InternalCollectionConfiguration {
4719            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4720            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4721                EmbeddingFunctionNewConfiguration {
4722                    name: "default".to_string(),
4723                    config: serde_json::json!({}),
4724                },
4725            )),
4726        };
4727
4728        // Verify it's still considered default
4729        assert!(collection_config.is_default());
4730
4731        let schema = Schema::new_default(KnnIndex::Hnsw);
4732        let result =
4733            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4734                .unwrap();
4735
4736        // Check that defaults have source_key=None
4737        let defaults = result
4738            .defaults
4739            .float_list
4740            .as_ref()
4741            .unwrap()
4742            .vector_index
4743            .as_ref()
4744            .unwrap();
4745        assert_eq!(defaults.config.source_key, None);
4746
4747        // Check that #embedding has source_key=DOCUMENT_KEY
4748        let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4749        let embedding_vector_index = embedding
4750            .float_list
4751            .as_ref()
4752            .unwrap()
4753            .vector_index
4754            .as_ref()
4755            .unwrap();
4756        assert_eq!(
4757            embedding_vector_index.config.source_key,
4758            Some(DOCUMENT_KEY.to_string())
4759        );
4760
4761        // verify vector index config is set to spann
4762        let vector_index_config = defaults.config.clone();
4763        assert!(vector_index_config.spann.is_some());
4764        assert!(vector_index_config.hnsw.is_none());
4765
4766        // Verify embedding function was set correctly
4767        assert_eq!(
4768            embedding_vector_index.config.embedding_function,
4769            Some(EmbeddingFunctionConfiguration::Known(
4770                EmbeddingFunctionNewConfiguration {
4771                    name: "default".to_string(),
4772                    config: serde_json::json!({}),
4773                },
4774            ))
4775        );
4776        assert_eq!(
4777            defaults.config.embedding_function,
4778            Some(EmbeddingFunctionConfiguration::Known(
4779                EmbeddingFunctionNewConfiguration {
4780                    name: "default".to_string(),
4781                    config: serde_json::json!({}),
4782                },
4783            ))
4784        );
4785    }
4786
4787    #[test]
4788    fn test_reconcile_with_collection_config_both_non_default() {
4789        // Test that when both schema and collection config are non-default, it returns an error
4790        let mut schema = Schema::new_default(KnnIndex::Hnsw);
4791        schema.defaults.string = Some(StringValueType {
4792            fts_index: Some(FtsIndexType {
4793                enabled: true,
4794                config: FtsIndexConfig {},
4795            }),
4796            string_inverted_index: None,
4797        });
4798
4799        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4800        // Make collection config non-default by changing a parameter
4801        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4802        {
4803            hnsw_config.ef_construction = 500; // Non-default value
4804        }
4805
4806        // Use reconcile_schema_and_config which has the early validation
4807        let result = Schema::reconcile_schema_and_config(
4808            Some(&schema),
4809            Some(&collection_config),
4810            KnnIndex::Spann,
4811        );
4812        assert!(result.is_err());
4813        assert!(matches!(
4814            result.unwrap_err(),
4815            SchemaError::ConfigAndSchemaConflict
4816        ));
4817    }
4818
4819    #[test]
4820    fn test_reconcile_with_collection_config_hnsw_override() {
4821        // Test that non-default HNSW collection config overrides default schema
4822        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
4823
4824        let collection_config = InternalCollectionConfiguration {
4825            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4826                ef_construction: 300,
4827                max_neighbors: 32,
4828                ef_search: 50,
4829                num_threads: 8,
4830                batch_size: 200,
4831                sync_threshold: 2000,
4832                resize_factor: 1.5,
4833                space: Space::L2,
4834            }),
4835            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4836        };
4837
4838        let result =
4839            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4840                .unwrap();
4841
4842        // Check that #embedding key override was created with the collection config settings
4843        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4844        let vector_index = embedding_override
4845            .float_list
4846            .as_ref()
4847            .unwrap()
4848            .vector_index
4849            .as_ref()
4850            .unwrap();
4851
4852        assert!(vector_index.enabled);
4853        assert_eq!(vector_index.config.space, Some(Space::L2));
4854        assert_eq!(
4855            vector_index.config.embedding_function,
4856            Some(EmbeddingFunctionConfiguration::Legacy)
4857        );
4858        assert_eq!(
4859            vector_index.config.source_key,
4860            Some(DOCUMENT_KEY.to_string())
4861        );
4862
4863        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4864        assert_eq!(hnsw_config.ef_construction, Some(300));
4865        assert_eq!(hnsw_config.max_neighbors, Some(32));
4866        assert_eq!(hnsw_config.ef_search, Some(50));
4867        assert_eq!(hnsw_config.num_threads, Some(8));
4868        assert_eq!(hnsw_config.batch_size, Some(200));
4869        assert_eq!(hnsw_config.sync_threshold, Some(2000));
4870        assert_eq!(hnsw_config.resize_factor, Some(1.5));
4871
4872        assert!(vector_index.config.spann.is_none());
4873    }
4874
4875    #[test]
4876    fn test_reconcile_with_collection_config_spann_override() {
4877        // Test that non-default SPANN collection config overrides default schema
4878        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
4879
4880        let collection_config = InternalCollectionConfiguration {
4881            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4882                search_nprobe: 20,
4883                search_rng_factor: 3.0,
4884                search_rng_epsilon: 0.2,
4885                nreplica_count: 5,
4886                write_rng_factor: 2.0,
4887                write_rng_epsilon: 0.1,
4888                split_threshold: 2000,
4889                num_samples_kmeans: 200,
4890                initial_lambda: 0.8,
4891                reassign_neighbor_count: 100,
4892                merge_threshold: 800,
4893                num_centers_to_merge_to: 20,
4894                write_nprobe: 10,
4895                ef_construction: 400,
4896                ef_search: 60,
4897                max_neighbors: 24,
4898                space: Space::Cosine,
4899            }),
4900            embedding_function: None,
4901        };
4902
4903        let result =
4904            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4905                .unwrap();
4906
4907        // Check that #embedding key override was created with the collection config settings
4908        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4909        let vector_index = embedding_override
4910            .float_list
4911            .as_ref()
4912            .unwrap()
4913            .vector_index
4914            .as_ref()
4915            .unwrap();
4916
4917        assert!(vector_index.enabled);
4918        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4919        assert_eq!(vector_index.config.embedding_function, None);
4920        assert_eq!(
4921            vector_index.config.source_key,
4922            Some(DOCUMENT_KEY.to_string())
4923        );
4924
4925        assert!(vector_index.config.hnsw.is_none());
4926
4927        let spann_config = vector_index.config.spann.as_ref().unwrap();
4928        assert_eq!(spann_config.search_nprobe, Some(20));
4929        assert_eq!(spann_config.search_rng_factor, Some(3.0));
4930        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4931        assert_eq!(spann_config.nreplica_count, Some(5));
4932        assert_eq!(spann_config.write_rng_factor, Some(2.0));
4933        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4934        assert_eq!(spann_config.split_threshold, Some(2000));
4935        assert_eq!(spann_config.num_samples_kmeans, Some(200));
4936        assert_eq!(spann_config.initial_lambda, Some(0.8));
4937        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4938        assert_eq!(spann_config.merge_threshold, Some(800));
4939        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4940        assert_eq!(spann_config.write_nprobe, Some(10));
4941        assert_eq!(spann_config.ef_construction, Some(400));
4942        assert_eq!(spann_config.ef_search, Some(60));
4943        assert_eq!(spann_config.max_neighbors, Some(24));
4944    }
4945
4946    #[test]
4947    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4948        // Test that collection config updates BOTH defaults.float_list.vector_index
4949        // AND keys["embedding"].float_list.vector_index
4950        let schema = Schema::new_default(KnnIndex::Hnsw);
4951
4952        let collection_config = InternalCollectionConfiguration {
4953            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4954                ef_construction: 300,
4955                max_neighbors: 32,
4956                ef_search: 50,
4957                num_threads: 8,
4958                batch_size: 200,
4959                sync_threshold: 2000,
4960                resize_factor: 1.5,
4961                space: Space::L2,
4962            }),
4963            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4964        };
4965
4966        let result =
4967            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4968                .unwrap();
4969
4970        // Check that defaults.float_list.vector_index was updated
4971        let defaults_vector_index = result
4972            .defaults
4973            .float_list
4974            .as_ref()
4975            .unwrap()
4976            .vector_index
4977            .as_ref()
4978            .unwrap();
4979
4980        // Should be disabled in defaults (template for new keys)
4981        assert!(!defaults_vector_index.enabled);
4982        // But config should be updated
4983        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4984        assert_eq!(
4985            defaults_vector_index.config.embedding_function,
4986            Some(EmbeddingFunctionConfiguration::Legacy)
4987        );
4988        assert_eq!(defaults_vector_index.config.source_key, None);
4989        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4990        assert_eq!(defaults_hnsw.ef_construction, Some(300));
4991        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4992
4993        // Check that #embedding key override was also updated
4994        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4995        let embedding_vector_index = embedding_override
4996            .float_list
4997            .as_ref()
4998            .unwrap()
4999            .vector_index
5000            .as_ref()
5001            .unwrap();
5002
5003        // Should be enabled on #embedding
5004        assert!(embedding_vector_index.enabled);
5005        // Config should match defaults
5006        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
5007        assert_eq!(
5008            embedding_vector_index.config.embedding_function,
5009            Some(EmbeddingFunctionConfiguration::Legacy)
5010        );
5011        assert_eq!(
5012            embedding_vector_index.config.source_key,
5013            Some(DOCUMENT_KEY.to_string())
5014        );
5015        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
5016        assert_eq!(embedding_hnsw.ef_construction, Some(300));
5017        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
5018    }
5019
5020    #[test]
5021    fn test_is_schema_default() {
5022        // Test that actual default schemas are correctly identified
5023        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
5024        assert!(default_hnsw_schema.is_default());
5025
5026        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
5027        assert!(default_spann_schema.is_default());
5028
5029        // Test that a modified default schema is not considered default
5030        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
5031        // Make a clear modification - change the string inverted index enabled state
5032        if let Some(ref mut string_type) = modified_schema.defaults.string {
5033            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
5034                string_inverted.enabled = false; // Default is true, so this should make it non-default
5035            }
5036        }
5037        assert!(!modified_schema.is_default());
5038
5039        // Test that schema with additional key overrides is not default
5040        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
5041        schema_with_extra_overrides
5042            .keys
5043            .insert("custom_key".to_string(), ValueTypes::default());
5044        assert!(!schema_with_extra_overrides.is_default());
5045    }
5046
5047    #[test]
5048    fn test_is_schema_default_with_space() {
5049        let schema = Schema::new_default(KnnIndex::Hnsw);
5050        assert!(schema.is_default());
5051
5052        let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
5053        if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
5054            if let Some(ref mut vector_index) = float_list.vector_index {
5055                vector_index.config.space = Some(Space::Cosine);
5056            }
5057        }
5058        assert!(!schema_with_space.is_default());
5059
5060        let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
5061        if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
5062            .keys
5063            .get_mut(EMBEDDING_KEY)
5064        {
5065            if let Some(ref mut float_list) = embedding_key.float_list {
5066                if let Some(ref mut vector_index) = float_list.vector_index {
5067                    vector_index.config.space = Some(Space::Cosine);
5068                }
5069            }
5070        }
5071        assert!(!schema_with_space_in_embedding_key.is_default());
5072    }
5073
5074    #[test]
5075    fn test_is_schema_default_with_embedding_function() {
5076        let schema = Schema::new_default(KnnIndex::Hnsw);
5077        assert!(schema.is_default());
5078
5079        let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
5080        if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
5081            if let Some(ref mut vector_index) = float_list.vector_index {
5082                vector_index.config.embedding_function =
5083                    Some(EmbeddingFunctionConfiguration::Legacy);
5084            }
5085        }
5086        assert!(!schema_with_embedding_function.is_default());
5087
5088        let mut schema_with_embedding_function_in_embedding_key =
5089            Schema::new_default(KnnIndex::Spann);
5090        if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
5091            .keys
5092            .get_mut(EMBEDDING_KEY)
5093        {
5094            if let Some(ref mut float_list) = embedding_key.float_list {
5095                if let Some(ref mut vector_index) = float_list.vector_index {
5096                    vector_index.config.embedding_function =
5097                        Some(EmbeddingFunctionConfiguration::Legacy);
5098                }
5099            }
5100        }
5101        assert!(!schema_with_embedding_function_in_embedding_key.is_default());
5102    }
5103
5104    #[test]
5105    fn test_add_merges_keys_by_value_type() {
5106        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
5107        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5108
5109        let string_override = ValueTypes {
5110            string: Some(StringValueType {
5111                string_inverted_index: Some(StringInvertedIndexType {
5112                    enabled: true,
5113                    config: StringInvertedIndexConfig {},
5114                }),
5115                fts_index: None,
5116            }),
5117            ..Default::default()
5118        };
5119        schema_a
5120            .keys
5121            .insert("custom_field".to_string(), string_override);
5122
5123        let float_override = ValueTypes {
5124            float: Some(FloatValueType {
5125                float_inverted_index: Some(FloatInvertedIndexType {
5126                    enabled: true,
5127                    config: FloatInvertedIndexConfig {},
5128                }),
5129            }),
5130            ..Default::default()
5131        };
5132        schema_b
5133            .keys
5134            .insert("custom_field".to_string(), float_override);
5135
5136        let merged = schema_a.merge(&schema_b).unwrap();
5137        let merged_override = merged.keys.get("custom_field").unwrap();
5138
5139        assert!(merged_override.string.is_some());
5140        assert!(merged_override.float.is_some());
5141        assert!(
5142            merged_override
5143                .string
5144                .as_ref()
5145                .unwrap()
5146                .string_inverted_index
5147                .as_ref()
5148                .unwrap()
5149                .enabled
5150        );
5151        assert!(
5152            merged_override
5153                .float
5154                .as_ref()
5155                .unwrap()
5156                .float_inverted_index
5157                .as_ref()
5158                .unwrap()
5159                .enabled
5160        );
5161    }
5162
5163    #[test]
5164    fn test_add_rejects_different_defaults() {
5165        let schema_a = Schema::new_default(KnnIndex::Hnsw);
5166        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5167
5168        if let Some(string_type) = schema_b.defaults.string.as_mut() {
5169            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
5170                string_index.enabled = false;
5171            }
5172        }
5173
5174        let err = schema_a.merge(&schema_b).unwrap_err();
5175        assert!(matches!(err, SchemaError::DefaultsMismatch));
5176    }
5177
5178    #[test]
5179    fn test_add_detects_conflicting_value_type_configuration() {
5180        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
5181        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5182
5183        let string_override_enabled = ValueTypes {
5184            string: Some(StringValueType {
5185                string_inverted_index: Some(StringInvertedIndexType {
5186                    enabled: true,
5187                    config: StringInvertedIndexConfig {},
5188                }),
5189                fts_index: None,
5190            }),
5191            ..Default::default()
5192        };
5193        schema_a
5194            .keys
5195            .insert("custom_field".to_string(), string_override_enabled);
5196
5197        let string_override_disabled = ValueTypes {
5198            string: Some(StringValueType {
5199                string_inverted_index: Some(StringInvertedIndexType {
5200                    enabled: false,
5201                    config: StringInvertedIndexConfig {},
5202                }),
5203                fts_index: None,
5204            }),
5205            ..Default::default()
5206        };
5207        schema_b
5208            .keys
5209            .insert("custom_field".to_string(), string_override_disabled);
5210
5211        let err = schema_a.merge(&schema_b).unwrap_err();
5212        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
5213    }
5214
5215    // TODO(Sanket): Remove this test once deployed
5216    #[test]
5217    fn test_backward_compatibility_aliases() {
5218        // Test that old format with # and $ prefixes and key_overrides can be deserialized
5219        let old_format_json = r###"{
5220            "defaults": {
5221                "#string": {
5222                    "$fts_index": {
5223                        "enabled": true,
5224                        "config": {}
5225                    }
5226                },
5227                "#int": {
5228                    "$int_inverted_index": {
5229                        "enabled": true,
5230                        "config": {}
5231                    }
5232                },
5233                "#float_list": {
5234                    "$vector_index": {
5235                        "enabled": true,
5236                        "config": {
5237                            "spann": {
5238                                "search_nprobe": 10
5239                            }
5240                        }
5241                    }
5242                }
5243            },
5244            "key_overrides": {
5245                "#document": {
5246                    "#string": {
5247                        "$fts_index": {
5248                            "enabled": false,
5249                            "config": {}
5250                        }
5251                    }
5252                }
5253            }
5254        }"###;
5255
5256        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
5257
5258        // Test that new format without prefixes and keys can be deserialized
5259        let new_format_json = r###"{
5260            "defaults": {
5261                "string": {
5262                    "fts_index": {
5263                        "enabled": true,
5264                        "config": {}
5265                    }
5266                },
5267                "int": {
5268                    "int_inverted_index": {
5269                        "enabled": true,
5270                        "config": {}
5271                    }
5272                },
5273                "float_list": {
5274                    "vector_index": {
5275                        "enabled": true,
5276                        "config": {
5277                            "spann": {
5278                                "search_nprobe": 10
5279                            }
5280                        }
5281                    }
5282                }
5283            },
5284            "keys": {
5285                "#document": {
5286                    "string": {
5287                        "fts_index": {
5288                            "enabled": false,
5289                            "config": {}
5290                        }
5291                    }
5292                }
5293            }
5294        }"###;
5295
5296        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
5297
5298        // Both should deserialize to the same structure
5299        assert_eq!(schema_from_old, schema_from_new);
5300
5301        // Verify the deserialized content is correct
5302        assert!(schema_from_old.defaults.string.is_some());
5303        assert!(schema_from_old
5304            .defaults
5305            .string
5306            .as_ref()
5307            .unwrap()
5308            .fts_index
5309            .is_some());
5310        assert!(
5311            schema_from_old
5312                .defaults
5313                .string
5314                .as_ref()
5315                .unwrap()
5316                .fts_index
5317                .as_ref()
5318                .unwrap()
5319                .enabled
5320        );
5321
5322        assert!(schema_from_old.defaults.int.is_some());
5323        assert!(schema_from_old
5324            .defaults
5325            .int
5326            .as_ref()
5327            .unwrap()
5328            .int_inverted_index
5329            .is_some());
5330
5331        assert!(schema_from_old.defaults.float_list.is_some());
5332        assert!(schema_from_old
5333            .defaults
5334            .float_list
5335            .as_ref()
5336            .unwrap()
5337            .vector_index
5338            .is_some());
5339
5340        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
5341        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
5342        assert!(doc_override.string.is_some());
5343        assert!(
5344            !doc_override
5345                .string
5346                .as_ref()
5347                .unwrap()
5348                .fts_index
5349                .as_ref()
5350                .unwrap()
5351                .enabled
5352        );
5353
5354        // Test that serialization always outputs the new format (without prefixes)
5355        let serialized = serde_json::to_string(&schema_from_old).unwrap();
5356
5357        // Should contain new format keys
5358        assert!(serialized.contains(r#""keys":"#));
5359        assert!(serialized.contains(r#""string":"#));
5360        assert!(serialized.contains(r#""fts_index":"#));
5361        assert!(serialized.contains(r#""int_inverted_index":"#));
5362        assert!(serialized.contains(r#""vector_index":"#));
5363
5364        // Should NOT contain old format keys
5365        assert!(!serialized.contains(r#""key_overrides":"#));
5366        assert!(!serialized.contains(r###""#string":"###));
5367        assert!(!serialized.contains(r###""$fts_index":"###));
5368        assert!(!serialized.contains(r###""$int_inverted_index":"###));
5369        assert!(!serialized.contains(r###""$vector_index":"###));
5370    }
5371
5372    #[test]
5373    fn test_hnsw_index_config_validation() {
5374        use validator::Validate;
5375
5376        // Valid configuration - should pass
5377        let valid_config = HnswIndexConfig {
5378            batch_size: Some(10),
5379            sync_threshold: Some(100),
5380            ef_construction: Some(100),
5381            max_neighbors: Some(16),
5382            ..Default::default()
5383        };
5384        assert!(valid_config.validate().is_ok());
5385
5386        // Invalid: batch_size too small (min 2)
5387        let invalid_batch_size = HnswIndexConfig {
5388            batch_size: Some(1),
5389            ..Default::default()
5390        };
5391        assert!(invalid_batch_size.validate().is_err());
5392
5393        // Invalid: sync_threshold too small (min 2)
5394        let invalid_sync_threshold = HnswIndexConfig {
5395            sync_threshold: Some(1),
5396            ..Default::default()
5397        };
5398        assert!(invalid_sync_threshold.validate().is_err());
5399
5400        // Valid: boundary values (exactly 2) should pass
5401        let boundary_config = HnswIndexConfig {
5402            batch_size: Some(2),
5403            sync_threshold: Some(2),
5404            ..Default::default()
5405        };
5406        assert!(boundary_config.validate().is_ok());
5407
5408        // Valid: None values should pass validation
5409        let all_none_config = HnswIndexConfig {
5410            ..Default::default()
5411        };
5412        assert!(all_none_config.validate().is_ok());
5413
5414        // Valid: fields without validation can be any value
5415        let other_fields_config = HnswIndexConfig {
5416            ef_construction: Some(1),
5417            max_neighbors: Some(1),
5418            ef_search: Some(1),
5419            num_threads: Some(1),
5420            resize_factor: Some(0.1),
5421            ..Default::default()
5422        };
5423        assert!(other_fields_config.validate().is_ok());
5424    }
5425
5426    #[test]
5427    fn test_spann_index_config_validation() {
5428        use validator::Validate;
5429
5430        // Valid configuration - should pass
5431        let valid_config = SpannIndexConfig {
5432            write_nprobe: Some(32),
5433            nreplica_count: Some(4),
5434            split_threshold: Some(100),
5435            merge_threshold: Some(50),
5436            reassign_neighbor_count: Some(32),
5437            num_centers_to_merge_to: Some(4),
5438            ef_construction: Some(100),
5439            ef_search: Some(100),
5440            max_neighbors: Some(32),
5441            search_rng_factor: Some(1.0),
5442            write_rng_factor: Some(1.0),
5443            search_rng_epsilon: Some(7.5),
5444            write_rng_epsilon: Some(7.5),
5445            ..Default::default()
5446        };
5447        assert!(valid_config.validate().is_ok());
5448
5449        // Invalid: write_nprobe too large (max 64)
5450        let invalid_write_nprobe = SpannIndexConfig {
5451            write_nprobe: Some(200),
5452            ..Default::default()
5453        };
5454        assert!(invalid_write_nprobe.validate().is_err());
5455
5456        // Invalid: split_threshold too small (min 50)
5457        let invalid_split_threshold = SpannIndexConfig {
5458            split_threshold: Some(10),
5459            ..Default::default()
5460        };
5461        assert!(invalid_split_threshold.validate().is_err());
5462
5463        // Invalid: split_threshold too large (max 200)
5464        let invalid_split_threshold_high = SpannIndexConfig {
5465            split_threshold: Some(250),
5466            ..Default::default()
5467        };
5468        assert!(invalid_split_threshold_high.validate().is_err());
5469
5470        // Invalid: nreplica_count too large (max 8)
5471        let invalid_nreplica = SpannIndexConfig {
5472            nreplica_count: Some(10),
5473            ..Default::default()
5474        };
5475        assert!(invalid_nreplica.validate().is_err());
5476
5477        // Invalid: reassign_neighbor_count too large (max 64)
5478        let invalid_reassign = SpannIndexConfig {
5479            reassign_neighbor_count: Some(100),
5480            ..Default::default()
5481        };
5482        assert!(invalid_reassign.validate().is_err());
5483
5484        // Invalid: merge_threshold out of range (min 25, max 100)
5485        let invalid_merge_threshold_low = SpannIndexConfig {
5486            merge_threshold: Some(5),
5487            ..Default::default()
5488        };
5489        assert!(invalid_merge_threshold_low.validate().is_err());
5490
5491        let invalid_merge_threshold_high = SpannIndexConfig {
5492            merge_threshold: Some(150),
5493            ..Default::default()
5494        };
5495        assert!(invalid_merge_threshold_high.validate().is_err());
5496
5497        // Invalid: num_centers_to_merge_to too large (max 8)
5498        let invalid_num_centers = SpannIndexConfig {
5499            num_centers_to_merge_to: Some(10),
5500            ..Default::default()
5501        };
5502        assert!(invalid_num_centers.validate().is_err());
5503
5504        // Invalid: ef_construction too large (max 200)
5505        let invalid_ef_construction = SpannIndexConfig {
5506            ef_construction: Some(300),
5507            ..Default::default()
5508        };
5509        assert!(invalid_ef_construction.validate().is_err());
5510
5511        // Invalid: ef_search too large (max 200)
5512        let invalid_ef_search = SpannIndexConfig {
5513            ef_search: Some(300),
5514            ..Default::default()
5515        };
5516        assert!(invalid_ef_search.validate().is_err());
5517
5518        // Invalid: max_neighbors too large (max 64)
5519        let invalid_max_neighbors = SpannIndexConfig {
5520            max_neighbors: Some(100),
5521            ..Default::default()
5522        };
5523        assert!(invalid_max_neighbors.validate().is_err());
5524
5525        // Invalid: search_nprobe too large (max 128)
5526        let invalid_search_nprobe = SpannIndexConfig {
5527            search_nprobe: Some(200),
5528            ..Default::default()
5529        };
5530        assert!(invalid_search_nprobe.validate().is_err());
5531
5532        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5533        let invalid_search_rng_factor_low = SpannIndexConfig {
5534            search_rng_factor: Some(0.9),
5535            ..Default::default()
5536        };
5537        assert!(invalid_search_rng_factor_low.validate().is_err());
5538
5539        let invalid_search_rng_factor_high = SpannIndexConfig {
5540            search_rng_factor: Some(1.1),
5541            ..Default::default()
5542        };
5543        assert!(invalid_search_rng_factor_high.validate().is_err());
5544
5545        // Valid: search_rng_factor exactly 1.0
5546        let valid_search_rng_factor = SpannIndexConfig {
5547            search_rng_factor: Some(1.0),
5548            ..Default::default()
5549        };
5550        assert!(valid_search_rng_factor.validate().is_ok());
5551
5552        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
5553        let invalid_search_rng_epsilon_low = SpannIndexConfig {
5554            search_rng_epsilon: Some(4.0),
5555            ..Default::default()
5556        };
5557        assert!(invalid_search_rng_epsilon_low.validate().is_err());
5558
5559        let invalid_search_rng_epsilon_high = SpannIndexConfig {
5560            search_rng_epsilon: Some(11.0),
5561            ..Default::default()
5562        };
5563        assert!(invalid_search_rng_epsilon_high.validate().is_err());
5564
5565        // Valid: search_rng_epsilon within range
5566        let valid_search_rng_epsilon = SpannIndexConfig {
5567            search_rng_epsilon: Some(7.5),
5568            ..Default::default()
5569        };
5570        assert!(valid_search_rng_epsilon.validate().is_ok());
5571
5572        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5573        let invalid_write_rng_factor_low = SpannIndexConfig {
5574            write_rng_factor: Some(0.9),
5575            ..Default::default()
5576        };
5577        assert!(invalid_write_rng_factor_low.validate().is_err());
5578
5579        let invalid_write_rng_factor_high = SpannIndexConfig {
5580            write_rng_factor: Some(1.1),
5581            ..Default::default()
5582        };
5583        assert!(invalid_write_rng_factor_high.validate().is_err());
5584
5585        // Valid: write_rng_factor exactly 1.0
5586        let valid_write_rng_factor = SpannIndexConfig {
5587            write_rng_factor: Some(1.0),
5588            ..Default::default()
5589        };
5590        assert!(valid_write_rng_factor.validate().is_ok());
5591
5592        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
5593        let invalid_write_rng_epsilon_low = SpannIndexConfig {
5594            write_rng_epsilon: Some(4.0),
5595            ..Default::default()
5596        };
5597        assert!(invalid_write_rng_epsilon_low.validate().is_err());
5598
5599        let invalid_write_rng_epsilon_high = SpannIndexConfig {
5600            write_rng_epsilon: Some(11.0),
5601            ..Default::default()
5602        };
5603        assert!(invalid_write_rng_epsilon_high.validate().is_err());
5604
5605        // Valid: write_rng_epsilon within range
5606        let valid_write_rng_epsilon = SpannIndexConfig {
5607            write_rng_epsilon: Some(7.5),
5608            ..Default::default()
5609        };
5610        assert!(valid_write_rng_epsilon.validate().is_ok());
5611
5612        // Invalid: num_samples_kmeans too large (max 1000)
5613        let invalid_num_samples_kmeans = SpannIndexConfig {
5614            num_samples_kmeans: Some(1500),
5615            ..Default::default()
5616        };
5617        assert!(invalid_num_samples_kmeans.validate().is_err());
5618
5619        // Valid: num_samples_kmeans within range
5620        let valid_num_samples_kmeans = SpannIndexConfig {
5621            num_samples_kmeans: Some(500),
5622            ..Default::default()
5623        };
5624        assert!(valid_num_samples_kmeans.validate().is_ok());
5625
5626        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
5627        let invalid_initial_lambda_high = SpannIndexConfig {
5628            initial_lambda: Some(150.0),
5629            ..Default::default()
5630        };
5631        assert!(invalid_initial_lambda_high.validate().is_err());
5632
5633        let invalid_initial_lambda_low = SpannIndexConfig {
5634            initial_lambda: Some(50.0),
5635            ..Default::default()
5636        };
5637        assert!(invalid_initial_lambda_low.validate().is_err());
5638
5639        // Valid: initial_lambda exactly 100.0
5640        let valid_initial_lambda = SpannIndexConfig {
5641            initial_lambda: Some(100.0),
5642            ..Default::default()
5643        };
5644        assert!(valid_initial_lambda.validate().is_ok());
5645
5646        // Valid: None values should pass validation
5647        let all_none_config = SpannIndexConfig {
5648            ..Default::default()
5649        };
5650        assert!(all_none_config.validate().is_ok());
5651    }
5652
5653    #[test]
5654    fn test_builder_pattern_crud_workflow() {
5655        // Test comprehensive CRUD workflow using the builder pattern
5656
5657        // CREATE: Build a schema with multiple indexes
5658        let schema = Schema::new_default(KnnIndex::Hnsw)
5659            .create_index(
5660                None,
5661                IndexConfig::Vector(VectorIndexConfig {
5662                    space: Some(Space::Cosine),
5663                    embedding_function: None,
5664                    source_key: None,
5665                    hnsw: Some(HnswIndexConfig {
5666                        ef_construction: Some(200),
5667                        max_neighbors: Some(32),
5668                        ef_search: Some(50),
5669                        num_threads: None,
5670                        batch_size: None,
5671                        sync_threshold: None,
5672                        resize_factor: None,
5673                    }),
5674                    spann: None,
5675                }),
5676            )
5677            .expect("vector config should succeed")
5678            .create_index(
5679                Some("category"),
5680                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5681            )
5682            .expect("string inverted on key should succeed")
5683            .create_index(
5684                Some("year"),
5685                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5686            )
5687            .expect("int inverted on key should succeed")
5688            .create_index(
5689                Some("rating"),
5690                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5691            )
5692            .expect("float inverted on key should succeed")
5693            .create_index(
5694                Some("is_active"),
5695                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5696            )
5697            .expect("bool inverted on key should succeed");
5698
5699        // READ: Verify the schema was built correctly
5700        // Check vector config
5701        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5702        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5703        assert!(embedding.float_list.is_some());
5704        let vector_index = embedding
5705            .float_list
5706            .as_ref()
5707            .unwrap()
5708            .vector_index
5709            .as_ref()
5710            .unwrap();
5711        assert!(vector_index.enabled);
5712        assert_eq!(vector_index.config.space, Some(Space::Cosine));
5713        assert_eq!(
5714            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5715            Some(200)
5716        );
5717
5718        // Check per-key indexes
5719        assert!(schema.keys.contains_key("category"));
5720        assert!(schema.keys.contains_key("year"));
5721        assert!(schema.keys.contains_key("rating"));
5722        assert!(schema.keys.contains_key("is_active"));
5723
5724        // Verify category string inverted index
5725        let category = schema.keys.get("category").unwrap();
5726        assert!(category.string.is_some());
5727        let string_idx = category
5728            .string
5729            .as_ref()
5730            .unwrap()
5731            .string_inverted_index
5732            .as_ref()
5733            .unwrap();
5734        assert!(string_idx.enabled);
5735
5736        // Verify year int inverted index
5737        let year = schema.keys.get("year").unwrap();
5738        assert!(year.int.is_some());
5739        let int_idx = year
5740            .int
5741            .as_ref()
5742            .unwrap()
5743            .int_inverted_index
5744            .as_ref()
5745            .unwrap();
5746        assert!(int_idx.enabled);
5747
5748        // UPDATE/DELETE: Disable some indexes
5749        let schema = schema
5750            .delete_index(
5751                Some("category"),
5752                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5753            )
5754            .expect("delete string inverted should succeed")
5755            .delete_index(
5756                Some("year"),
5757                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5758            )
5759            .expect("delete int inverted should succeed");
5760
5761        // VERIFY DELETE: Check that indexes were disabled
5762        let category = schema.keys.get("category").unwrap();
5763        let string_idx = category
5764            .string
5765            .as_ref()
5766            .unwrap()
5767            .string_inverted_index
5768            .as_ref()
5769            .unwrap();
5770        assert!(!string_idx.enabled); // Should be disabled now
5771
5772        let year = schema.keys.get("year").unwrap();
5773        let int_idx = year
5774            .int
5775            .as_ref()
5776            .unwrap()
5777            .int_inverted_index
5778            .as_ref()
5779            .unwrap();
5780        assert!(!int_idx.enabled); // Should be disabled now
5781
5782        // Verify other indexes still enabled
5783        let rating = schema.keys.get("rating").unwrap();
5784        let float_idx = rating
5785            .float
5786            .as_ref()
5787            .unwrap()
5788            .float_inverted_index
5789            .as_ref()
5790            .unwrap();
5791        assert!(float_idx.enabled); // Should still be enabled
5792
5793        let is_active = schema.keys.get("is_active").unwrap();
5794        let bool_idx = is_active
5795            .boolean
5796            .as_ref()
5797            .unwrap()
5798            .bool_inverted_index
5799            .as_ref()
5800            .unwrap();
5801        assert!(bool_idx.enabled); // Should still be enabled
5802    }
5803
5804    #[test]
5805    fn test_builder_create_index_validation_errors() {
5806        // Test all validation errors for create_index() as documented in the docstring:
5807        // - Attempting to create index on special keys (#document, #embedding)
5808        // - Invalid configuration (e.g., vector index on non-embedding key)
5809        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
5810
5811        // Error: Vector index on specific key (must be global)
5812        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5813            Some("my_vectors"),
5814            IndexConfig::Vector(VectorIndexConfig {
5815                space: Some(Space::L2),
5816                embedding_function: None,
5817                source_key: None,
5818                hnsw: None,
5819                spann: None,
5820            }),
5821        );
5822        assert!(result.is_err());
5823        assert!(matches!(
5824            result.unwrap_err(),
5825            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5826        ));
5827
5828        // Error: FTS index on non-#document key
5829        let result = Schema::new_default(KnnIndex::Hnsw)
5830            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5831        assert!(result.is_err());
5832        assert!(matches!(
5833            result.unwrap_err(),
5834            SchemaBuilderError::FtsIndexOnlyOnDocument
5835        ));
5836
5837        // Success: FTS index on #document key
5838        let schema = Schema::new_default(KnnIndex::Hnsw)
5839            .create_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
5840            .expect("FTS on #document should succeed");
5841        assert!(schema.is_fts_enabled());
5842
5843        // Error: Cannot create index on special key #document
5844        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5845            Some(DOCUMENT_KEY),
5846            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5847        );
5848        assert!(result.is_err());
5849        assert!(matches!(
5850            result.unwrap_err(),
5851            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5852        ));
5853
5854        // Error: Cannot create index on special key #embedding
5855        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5856            Some(EMBEDDING_KEY),
5857            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5858        );
5859        assert!(result.is_err());
5860        assert!(matches!(
5861            result.unwrap_err(),
5862            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5863        ));
5864
5865        // Error: Sparse vector without key (must specify key)
5866        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5867            None,
5868            IndexConfig::SparseVector(SparseVectorIndexConfig {
5869                embedding_function: None,
5870                source_key: None,
5871                bm25: None,
5872            }),
5873        );
5874        assert!(result.is_err());
5875        assert!(matches!(
5876            result.unwrap_err(),
5877            SchemaBuilderError::SparseVectorRequiresKey
5878        ));
5879
5880        // Error: Multiple sparse vector indexes (only one allowed per collection)
5881        let result = Schema::new_default(KnnIndex::Hnsw)
5882            .create_index(
5883                Some("sparse1"),
5884                IndexConfig::SparseVector(SparseVectorIndexConfig {
5885                    embedding_function: None,
5886                    source_key: None,
5887                    bm25: None,
5888                }),
5889            )
5890            .expect("first sparse should succeed")
5891            .create_index(
5892                Some("sparse2"),
5893                IndexConfig::SparseVector(SparseVectorIndexConfig {
5894                    embedding_function: None,
5895                    source_key: None,
5896                    bm25: None,
5897                }),
5898            );
5899        assert!(result.is_err());
5900        assert!(matches!(
5901            result.unwrap_err(),
5902            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5903        ));
5904    }
5905
5906    #[test]
5907    fn test_builder_delete_index_validation_errors() {
5908        // Test all validation errors for delete_index() as documented in the docstring:
5909        // - Attempting to delete index on special keys (#document, #embedding)
5910        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
5911
5912        // Error: Delete on special key #embedding
5913        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5914            Some(EMBEDDING_KEY),
5915            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5916        );
5917        assert!(result.is_err());
5918        assert!(matches!(
5919            result.unwrap_err(),
5920            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5921        ));
5922
5923        // Error: Delete on special key #document
5924        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5925            Some(DOCUMENT_KEY),
5926            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5927        );
5928        assert!(result.is_err());
5929        assert!(matches!(
5930            result.unwrap_err(),
5931            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5932        ));
5933
5934        // Error: Delete vector index (not currently supported)
5935        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5936            None,
5937            IndexConfig::Vector(VectorIndexConfig {
5938                space: None,
5939                embedding_function: None,
5940                source_key: None,
5941                hnsw: None,
5942                spann: None,
5943            }),
5944        );
5945        assert!(result.is_err());
5946        assert!(matches!(
5947            result.unwrap_err(),
5948            SchemaBuilderError::VectorIndexDeletionNotSupported
5949        ));
5950
5951        // FTS index deletion is now supported (disables FTS)
5952        let schema = Schema::new_default(KnnIndex::Hnsw)
5953            .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
5954            .expect("FTS deletion should succeed");
5955        assert!(!schema.is_fts_enabled());
5956
5957        // Error: Delete sparse vector index (not currently supported)
5958        let result = Schema::new_default(KnnIndex::Hnsw)
5959            .create_index(
5960                Some("sparse"),
5961                IndexConfig::SparseVector(SparseVectorIndexConfig {
5962                    embedding_function: None,
5963                    source_key: None,
5964                    bm25: None,
5965                }),
5966            )
5967            .expect("create should succeed")
5968            .delete_index(
5969                Some("sparse"),
5970                IndexConfig::SparseVector(SparseVectorIndexConfig {
5971                    embedding_function: None,
5972                    source_key: None,
5973                    bm25: None,
5974                }),
5975            );
5976        assert!(result.is_err());
5977        assert!(matches!(
5978            result.unwrap_err(),
5979            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5980        ));
5981    }
5982
5983    #[test]
5984    fn test_fts_create_global_without_key_rejected() {
5985        // FTS create_index without key (global) should fail with FtsIndexOnlyOnDocument
5986        let result = Schema::new_default(KnnIndex::Hnsw)
5987            .create_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5988        assert!(result.is_err());
5989        assert!(matches!(
5990            result.unwrap_err(),
5991            SchemaBuilderError::FtsIndexOnlyOnDocument
5992        ));
5993    }
5994
5995    #[test]
5996    fn test_fts_delete_global_without_key_rejected() {
5997        // FTS delete_index without key (global) should fail with FtsIndexDeletionOnlyOnDocument
5998        let result = Schema::new_default(KnnIndex::Hnsw)
5999            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
6000        assert!(result.is_err());
6001        assert!(matches!(
6002            result.unwrap_err(),
6003            SchemaBuilderError::FtsIndexDeletionOnlyOnDocument
6004        ));
6005    }
6006
6007    #[test]
6008    fn test_fts_delete_on_custom_key_rejected() {
6009        // FTS delete_index on a custom key (not #document) should fail
6010        let result = Schema::new_default(KnnIndex::Hnsw)
6011            .delete_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
6012        assert!(result.is_err());
6013        assert!(matches!(
6014            result.unwrap_err(),
6015            SchemaBuilderError::FtsIndexDeletionOnlyOnDocument
6016        ));
6017    }
6018
6019    #[test]
6020    fn test_reserved_key_prefix_create_index() {
6021        // create_index with a key starting with # (not #document or #embedding) should fail
6022        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
6023            Some("#custom_field"),
6024            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
6025        );
6026        assert!(result.is_err());
6027        assert!(matches!(
6028            result.unwrap_err(),
6029            SchemaBuilderError::ReservedKeyPrefix { key } if key == "#custom_field"
6030        ));
6031    }
6032
6033    #[test]
6034    fn test_reserved_key_prefix_delete_index() {
6035        // delete_index with a key starting with # (not #document or #embedding) should fail
6036        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
6037            Some("#custom_field"),
6038            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
6039        );
6040        assert!(result.is_err());
6041        assert!(matches!(
6042            result.unwrap_err(),
6043            SchemaBuilderError::ReservedKeyPrefix { key } if key == "#custom_field"
6044        ));
6045    }
6046
6047    #[test]
6048    fn test_is_fts_enabled_backward_compatibility() {
6049        // Default schema has FTS enabled (backward compatibility)
6050        let schema = Schema::new_default(KnnIndex::Hnsw);
6051        assert!(schema.is_fts_enabled());
6052
6053        // Schema with no FTS config at all should default to enabled (is_none_or)
6054        let empty_schema = Schema {
6055            defaults: ValueTypes::default(),
6056            keys: HashMap::new(),
6057            cmek: None,
6058            source_attached_function_id: None,
6059        };
6060        assert!(empty_schema.is_fts_enabled());
6061    }
6062
6063    #[test]
6064    fn test_is_fts_enabled_after_disable() {
6065        // After disabling FTS on #document, is_fts_enabled should return false
6066        let schema = Schema::new_default(KnnIndex::Hnsw)
6067            .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6068            .expect("FTS deletion should succeed");
6069        assert!(!schema.is_fts_enabled());
6070    }
6071
6072    #[test]
6073    fn test_is_fts_enabled_after_reenable() {
6074        // After disabling then re-enabling FTS on #document, is_fts_enabled should return true
6075        let schema = Schema::new_default(KnnIndex::Hnsw)
6076            .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6077            .expect("FTS deletion should succeed")
6078            .create_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6079            .expect("FTS creation should succeed");
6080        assert!(schema.is_fts_enabled());
6081    }
6082
6083    #[test]
6084    fn test_fts_disabled_blocks_where_document_validation() {
6085        use crate::{DocumentExpression, DocumentOperator};
6086
6087        // Create schema with FTS disabled
6088        let schema = Schema::new_default(KnnIndex::Hnsw)
6089            .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6090            .expect("FTS deletion should succeed");
6091
6092        // Where::Document query should be rejected
6093        let where_clause = Where::Document(DocumentExpression {
6094            operator: DocumentOperator::Contains,
6095            pattern: "test query".to_string(),
6096        });
6097        let result = schema.is_metadata_where_indexing_enabled(&where_clause);
6098        assert!(result.is_err());
6099        assert!(matches!(
6100            result.unwrap_err(),
6101            FilterValidationError::FtsDisabled
6102        ));
6103    }
6104
6105    #[test]
6106    fn test_fts_enabled_allows_where_document_validation() {
6107        use crate::{DocumentExpression, DocumentOperator};
6108
6109        // Default schema has FTS enabled
6110        let schema = Schema::new_default(KnnIndex::Hnsw);
6111
6112        // Where::Document query should be allowed
6113        let where_clause = Where::Document(DocumentExpression {
6114            operator: DocumentOperator::Contains,
6115            pattern: "test query".to_string(),
6116        });
6117        let result = schema.is_metadata_where_indexing_enabled(&where_clause);
6118        assert!(result.is_ok());
6119    }
6120
6121    #[test]
6122    fn test_builder_pattern_chaining() {
6123        // Test complex chaining scenario
6124        let schema = Schema::new_default(KnnIndex::Hnsw)
6125            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
6126            .unwrap()
6127            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
6128            .unwrap()
6129            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
6130            .unwrap()
6131            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
6132            .unwrap()
6133            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
6134            .unwrap()
6135            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
6136            .unwrap();
6137
6138        // Verify tag1 is enabled
6139        assert!(
6140            schema
6141                .keys
6142                .get("tag1")
6143                .unwrap()
6144                .string
6145                .as_ref()
6146                .unwrap()
6147                .string_inverted_index
6148                .as_ref()
6149                .unwrap()
6150                .enabled
6151        );
6152
6153        // Verify tag2 is disabled
6154        assert!(
6155            !schema
6156                .keys
6157                .get("tag2")
6158                .unwrap()
6159                .string
6160                .as_ref()
6161                .unwrap()
6162                .string_inverted_index
6163                .as_ref()
6164                .unwrap()
6165                .enabled
6166        );
6167
6168        // Verify tag3 is enabled
6169        assert!(
6170            schema
6171                .keys
6172                .get("tag3")
6173                .unwrap()
6174                .string
6175                .as_ref()
6176                .unwrap()
6177                .string_inverted_index
6178                .as_ref()
6179                .unwrap()
6180                .enabled
6181        );
6182
6183        // Verify count is enabled
6184        assert!(
6185            schema
6186                .keys
6187                .get("count")
6188                .unwrap()
6189                .int
6190                .as_ref()
6191                .unwrap()
6192                .int_inverted_index
6193                .as_ref()
6194                .unwrap()
6195                .enabled
6196        );
6197
6198        // Verify score is enabled
6199        assert!(
6200            schema
6201                .keys
6202                .get("score")
6203                .unwrap()
6204                .float
6205                .as_ref()
6206                .unwrap()
6207                .float_inverted_index
6208                .as_ref()
6209                .unwrap()
6210                .enabled
6211        );
6212    }
6213
6214    #[test]
6215    fn test_schema_default_matches_python() {
6216        // Test that Schema::default() matches Python's Schema() behavior exactly
6217        let schema = Schema::default();
6218
6219        // ============================================================================
6220        // VERIFY DEFAULTS (match Python's _initialize_defaults)
6221        // ============================================================================
6222
6223        // String defaults: FTS disabled, string inverted enabled
6224        assert!(schema.defaults.string.is_some());
6225        let string = schema.defaults.string.as_ref().unwrap();
6226        assert!(!string.fts_index.as_ref().unwrap().enabled);
6227        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
6228
6229        // Float list defaults: vector index disabled
6230        assert!(schema.defaults.float_list.is_some());
6231        let float_list = schema.defaults.float_list.as_ref().unwrap();
6232        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
6233        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
6234        assert_eq!(vector_config.space, None); // Python leaves as None
6235        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
6236        assert_eq!(vector_config.spann, None); // Python doesn't specify
6237        assert_eq!(vector_config.source_key, None);
6238
6239        // Sparse vector defaults: disabled
6240        assert!(schema.defaults.sparse_vector.is_some());
6241        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
6242        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
6243
6244        // Int defaults: inverted index enabled
6245        assert!(schema.defaults.int.is_some());
6246        assert!(
6247            schema
6248                .defaults
6249                .int
6250                .as_ref()
6251                .unwrap()
6252                .int_inverted_index
6253                .as_ref()
6254                .unwrap()
6255                .enabled
6256        );
6257
6258        // Float defaults: inverted index enabled
6259        assert!(schema.defaults.float.is_some());
6260        assert!(
6261            schema
6262                .defaults
6263                .float
6264                .as_ref()
6265                .unwrap()
6266                .float_inverted_index
6267                .as_ref()
6268                .unwrap()
6269                .enabled
6270        );
6271
6272        // Bool defaults: inverted index enabled
6273        assert!(schema.defaults.boolean.is_some());
6274        assert!(
6275            schema
6276                .defaults
6277                .boolean
6278                .as_ref()
6279                .unwrap()
6280                .bool_inverted_index
6281                .as_ref()
6282                .unwrap()
6283                .enabled
6284        );
6285
6286        // ============================================================================
6287        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
6288        // ============================================================================
6289
6290        // #document: FTS enabled, string inverted disabled
6291        assert!(schema.keys.contains_key(DOCUMENT_KEY));
6292        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
6293        assert!(doc.string.is_some());
6294        assert!(
6295            doc.string
6296                .as_ref()
6297                .unwrap()
6298                .fts_index
6299                .as_ref()
6300                .unwrap()
6301                .enabled
6302        );
6303        assert!(
6304            !doc.string
6305                .as_ref()
6306                .unwrap()
6307                .string_inverted_index
6308                .as_ref()
6309                .unwrap()
6310                .enabled
6311        );
6312
6313        // #embedding: vector index enabled with source_key=#document
6314        assert!(schema.keys.contains_key(EMBEDDING_KEY));
6315        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
6316        assert!(embedding.float_list.is_some());
6317        let vec_idx = embedding
6318            .float_list
6319            .as_ref()
6320            .unwrap()
6321            .vector_index
6322            .as_ref()
6323            .unwrap();
6324        assert!(vec_idx.enabled);
6325        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
6326        assert_eq!(vec_idx.config.space, None); // Python leaves as None
6327        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
6328        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
6329
6330        // Verify only these two special keys exist
6331        assert_eq!(schema.keys.len(), 2);
6332    }
6333
6334    #[test]
6335    fn test_schema_default_works_with_builder() {
6336        // Test that Schema::default() can be used with builder pattern
6337        let schema = Schema::default()
6338            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
6339            .expect("should succeed");
6340
6341        // Verify the new index was added
6342        assert!(schema.keys.contains_key("category"));
6343        assert!(schema.keys.contains_key(DOCUMENT_KEY));
6344        assert!(schema.keys.contains_key(EMBEDDING_KEY));
6345        assert_eq!(schema.keys.len(), 3);
6346    }
6347
6348    #[cfg(feature = "testing")]
6349    mod proptests {
6350        use super::*;
6351        use crate::strategies::{
6352            embedding_function_strategy, internal_collection_configuration_strategy,
6353            internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
6354            knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
6355        };
6356        use crate::{
6357            HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
6358        };
6359        use proptest::prelude::*;
6360        use proptest::strategy::BoxedStrategy;
6361        use proptest::string::string_regex;
6362        use serde_json::json;
6363
6364        fn default_embedding_function_strategy(
6365        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6366            proptest::option::of(prop_oneof![
6367                Just(EmbeddingFunctionConfiguration::Unknown),
6368                Just(EmbeddingFunctionConfiguration::Known(
6369                    EmbeddingFunctionNewConfiguration {
6370                        name: "default".to_string(),
6371                        config: json!({ "alpha": 1 }),
6372                    }
6373                )),
6374            ])
6375        }
6376
6377        fn sparse_embedding_function_strategy(
6378        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6379            let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
6380                EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
6381                    name,
6382                    config: json!({ "alpha": 1 }),
6383                })
6384            });
6385
6386            proptest::option::of(prop_oneof![
6387                Just(EmbeddingFunctionConfiguration::Unknown),
6388                known_strategy,
6389            ])
6390        }
6391
6392        fn non_default_internal_collection_configuration_strategy(
6393        ) -> impl Strategy<Value = InternalCollectionConfiguration> {
6394            internal_collection_configuration_strategy()
6395                .prop_filter("non-default configuration", |config| !config.is_default())
6396        }
6397
6398        fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6399            (
6400                proptest::option::of(1usize..=512),
6401                proptest::option::of(1usize..=128),
6402                proptest::option::of(1usize..=512),
6403                proptest::option::of(1usize..=64),
6404                proptest::option::of(2usize..=4096),
6405                proptest::option::of(2usize..=4096),
6406                proptest::option::of(prop_oneof![
6407                    Just(0.5f64),
6408                    Just(1.0f64),
6409                    Just(1.5f64),
6410                    Just(2.0f64)
6411                ]),
6412            )
6413                .prop_map(
6414                    |(
6415                        ef_construction,
6416                        max_neighbors,
6417                        ef_search,
6418                        num_threads,
6419                        batch_size,
6420                        sync_threshold,
6421                        resize_factor,
6422                    )| HnswIndexConfig {
6423                        ef_construction,
6424                        max_neighbors,
6425                        ef_search,
6426                        num_threads,
6427                        batch_size,
6428                        sync_threshold,
6429                        resize_factor,
6430                    },
6431                )
6432        }
6433
6434        fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6435            let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
6436            (
6437                (
6438                    proptest::option::of(1u32..=128),               // search_nprobe
6439                    proptest::option::of(Just(1.0f32)), // search_rng_factor (must be 1.0)
6440                    proptest::option::of(epsilon_strategy.clone()), // search_rng_epsilon
6441                    proptest::option::of(1u32..=8),     // nreplica_count
6442                    proptest::option::of(Just(1.0f32)), // write_rng_factor (must be 1.0)
6443                    proptest::option::of(epsilon_strategy), // write_rng_epsilon
6444                    proptest::option::of(50u32..=200),  // split_threshold
6445                    proptest::option::of(1usize..=1000), // num_samples_kmeans
6446                ),
6447                (
6448                    proptest::option::of(Just(100.0f32)), // initial_lambda (must be 100.0)
6449                    proptest::option::of(1u32..=64),      // reassign_neighbor_count
6450                    proptest::option::of(25u32..=100),    // merge_threshold
6451                    proptest::option::of(1u32..=8),       // num_centers_to_merge_to
6452                    proptest::option::of(1u32..=64),      // write_nprobe
6453                    proptest::option::of(1usize..=200),   // ef_construction
6454                    proptest::option::of(1usize..=200),   // ef_search
6455                    proptest::option::of(1usize..=64),    // max_neighbors
6456                ),
6457            )
6458                .prop_map(
6459                    |(
6460                        (
6461                            search_nprobe,
6462                            search_rng_factor,
6463                            search_rng_epsilon,
6464                            nreplica_count,
6465                            write_rng_factor,
6466                            write_rng_epsilon,
6467                            split_threshold,
6468                            num_samples_kmeans,
6469                        ),
6470                        (
6471                            initial_lambda,
6472                            reassign_neighbor_count,
6473                            merge_threshold,
6474                            num_centers_to_merge_to,
6475                            write_nprobe,
6476                            ef_construction,
6477                            ef_search,
6478                            max_neighbors,
6479                        ),
6480                    )| SpannIndexConfig {
6481                        search_nprobe,
6482                        search_rng_factor,
6483                        search_rng_epsilon,
6484                        nreplica_count,
6485                        write_rng_factor,
6486                        write_rng_epsilon,
6487                        split_threshold,
6488                        num_samples_kmeans,
6489                        initial_lambda,
6490                        reassign_neighbor_count,
6491                        merge_threshold,
6492                        num_centers_to_merge_to,
6493                        write_nprobe,
6494                        ef_construction,
6495                        ef_search,
6496                        max_neighbors,
6497                        center_drift_threshold: None,
6498                        quantize: Quantization::None,
6499                    },
6500                )
6501        }
6502
6503        proptest! {
6504            #[test]
6505            fn merge_hnsw_configs_preserves_user_overrides(
6506                base in partial_hnsw_index_config_strategy(),
6507                user in partial_hnsw_index_config_strategy(),
6508            ) {
6509                let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
6510                    .expect("merge should return Some when both are Some");
6511
6512                // Property: user values always take precedence when Some
6513                if user.ef_construction.is_some() {
6514                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
6515                }
6516                if user.max_neighbors.is_some() {
6517                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6518                }
6519                if user.ef_search.is_some() {
6520                    prop_assert_eq!(merged.ef_search, user.ef_search);
6521                }
6522                if user.num_threads.is_some() {
6523                    prop_assert_eq!(merged.num_threads, user.num_threads);
6524                }
6525                if user.batch_size.is_some() {
6526                    prop_assert_eq!(merged.batch_size, user.batch_size);
6527                }
6528                if user.sync_threshold.is_some() {
6529                    prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
6530                }
6531                if user.resize_factor.is_some() {
6532                    prop_assert_eq!(merged.resize_factor, user.resize_factor);
6533                }
6534            }
6535
6536            #[test]
6537            fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
6538                base in partial_hnsw_index_config_strategy(),
6539            ) {
6540                let merged = Schema::merge_hnsw_configs(Some(&base), None)
6541                    .expect("merge should return Some when base is Some");
6542
6543                // Property: when user is None, base values are preserved
6544                prop_assert_eq!(merged, base);
6545            }
6546
6547            #[test]
6548            fn merge_hnsw_configs_returns_user_when_base_is_none(
6549                user in partial_hnsw_index_config_strategy(),
6550            ) {
6551                let merged = Schema::merge_hnsw_configs(None, Some(&user))
6552                    .expect("merge should return Some when user is Some");
6553
6554                // Property: when base is None, user values are preserved
6555                prop_assert_eq!(merged, user);
6556            }
6557
6558            #[test]
6559            fn merge_spann_configs_preserves_user_overrides(
6560                base in partial_spann_index_config_strategy(),
6561                user in partial_spann_index_config_strategy(),
6562            ) {
6563                let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
6564                    .expect("merge should return Ok")
6565                    .expect("merge should return Some when both are Some");
6566
6567                // Property: user values always take precedence when Some
6568                if user.search_nprobe.is_some() {
6569                    prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
6570                }
6571                if user.search_rng_epsilon.is_some() {
6572                    prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
6573                }
6574                if user.split_threshold.is_some() {
6575                    prop_assert_eq!(merged.split_threshold, user.split_threshold);
6576                }
6577                if user.ef_construction.is_some() {
6578                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
6579                }
6580                if user.ef_search.is_some() {
6581                    prop_assert_eq!(merged.ef_search, user.ef_search);
6582                }
6583                if user.max_neighbors.is_some() {
6584                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6585                }
6586            }
6587
6588            #[test]
6589            fn merge_spann_configs_falls_back_to_base_when_user_is_none(
6590                base in partial_spann_index_config_strategy(),
6591            ) {
6592                let merged = Schema::merge_spann_configs(Some(&base), None)
6593                    .expect("merge should return Ok")
6594                    .expect("merge should return Some when base is Some");
6595
6596                // Property: when user is None, base values are preserved
6597                prop_assert_eq!(merged, base);
6598            }
6599
6600            #[test]
6601            fn merge_vector_index_config_preserves_user_overrides(
6602                base in vector_index_config_strategy(),
6603                user in vector_index_config_strategy(),
6604                knn in knn_index_strategy(),
6605            ) {
6606                let merged = Schema::merge_vector_index_config(&base, &user, knn)
6607                    .expect("merge should succeed");
6608
6609                // Property: user values take precedence for top-level fields
6610                if user.space.is_some() {
6611                    prop_assert_eq!(merged.space, user.space);
6612                }
6613                if user.embedding_function.is_some() {
6614                    prop_assert_eq!(merged.embedding_function, user.embedding_function);
6615                }
6616                if user.source_key.is_some() {
6617                    prop_assert_eq!(merged.source_key, user.source_key);
6618                }
6619
6620                // Property: nested configs are merged according to merge rules
6621                match knn {
6622                    KnnIndex::Hnsw => {
6623                        if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6624                            let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6625                            if user_hnsw.ef_construction.is_some() {
6626                                prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6627                            }
6628                        }
6629                    }
6630                    KnnIndex::Spann => {
6631                        if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6632                            let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6633                            if user_spann.search_nprobe.is_some() {
6634                                prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6635                            }
6636                        }
6637                    }
6638                }
6639            }
6640        }
6641
6642        fn expected_vector_index_config(
6643            config: &InternalCollectionConfiguration,
6644        ) -> VectorIndexConfig {
6645            match &config.vector_index {
6646                VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6647                    space: Some(hnsw_config.space.clone()),
6648                    embedding_function: config.embedding_function.clone(),
6649                    source_key: None,
6650                    hnsw: Some(HnswIndexConfig {
6651                        ef_construction: Some(hnsw_config.ef_construction),
6652                        max_neighbors: Some(hnsw_config.max_neighbors),
6653                        ef_search: Some(hnsw_config.ef_search),
6654                        num_threads: Some(hnsw_config.num_threads),
6655                        batch_size: Some(hnsw_config.batch_size),
6656                        sync_threshold: Some(hnsw_config.sync_threshold),
6657                        resize_factor: Some(hnsw_config.resize_factor),
6658                    }),
6659                    spann: None,
6660                },
6661                VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6662                    space: Some(spann_config.space.clone()),
6663                    embedding_function: config.embedding_function.clone(),
6664                    source_key: None,
6665                    hnsw: None,
6666                    spann: Some(SpannIndexConfig {
6667                        search_nprobe: Some(spann_config.search_nprobe),
6668                        search_rng_factor: Some(spann_config.search_rng_factor),
6669                        search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6670                        nreplica_count: Some(spann_config.nreplica_count),
6671                        write_rng_factor: Some(spann_config.write_rng_factor),
6672                        write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6673                        split_threshold: Some(spann_config.split_threshold),
6674                        num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6675                        initial_lambda: Some(spann_config.initial_lambda),
6676                        reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6677                        merge_threshold: Some(spann_config.merge_threshold),
6678                        num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6679                        write_nprobe: Some(spann_config.write_nprobe),
6680                        ef_construction: Some(spann_config.ef_construction),
6681                        ef_search: Some(spann_config.ef_search),
6682                        max_neighbors: Some(spann_config.max_neighbors),
6683                        center_drift_threshold: None,
6684                        quantize: Quantization::None,
6685                    }),
6686                },
6687            }
6688        }
6689
6690        fn non_special_key_strategy() -> BoxedStrategy<String> {
6691            string_regex(TEST_NAME_PATTERN)
6692                .unwrap()
6693                .prop_filter("exclude special keys", |key| {
6694                    key != DOCUMENT_KEY && key != EMBEDDING_KEY
6695                })
6696                .boxed()
6697        }
6698
6699        fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6700            proptest::option::of(prop_oneof![
6701                Just(DOCUMENT_KEY.to_string()),
6702                string_regex(TEST_NAME_PATTERN).unwrap(),
6703            ])
6704            .boxed()
6705        }
6706
6707        fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6708            any::<bool>().prop_map(|enabled| FtsIndexType {
6709                enabled,
6710                config: FtsIndexConfig {},
6711            })
6712        }
6713
6714        fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6715            any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6716                enabled,
6717                config: StringInvertedIndexConfig {},
6718            })
6719        }
6720
6721        fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6722            proptest::option::of(
6723                (
6724                    proptest::option::of(string_inverted_index_type_strategy()),
6725                    proptest::option::of(fts_index_type_strategy()),
6726                )
6727                    .prop_map(|(string_inverted_index, fts_index)| {
6728                        StringValueType {
6729                            string_inverted_index,
6730                            fts_index,
6731                        }
6732                    }),
6733            )
6734            .boxed()
6735        }
6736
6737        fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6738            any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6739                enabled,
6740                config: FloatInvertedIndexConfig {},
6741            })
6742        }
6743
6744        fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6745            proptest::option::of(
6746                proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6747                    |float_inverted_index| FloatValueType {
6748                        float_inverted_index,
6749                    },
6750                ),
6751            )
6752            .boxed()
6753        }
6754
6755        fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6756            any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6757                enabled,
6758                config: IntInvertedIndexConfig {},
6759            })
6760        }
6761
6762        fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6763            proptest::option::of(
6764                proptest::option::of(int_inverted_index_type_strategy())
6765                    .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6766            )
6767            .boxed()
6768        }
6769
6770        fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6771            any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6772                enabled,
6773                config: BoolInvertedIndexConfig {},
6774            })
6775        }
6776
6777        fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6778            proptest::option::of(
6779                proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6780                    |bool_inverted_index| BoolValueType {
6781                        bool_inverted_index,
6782                    },
6783                ),
6784            )
6785            .boxed()
6786        }
6787
6788        fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6789            (
6790                sparse_embedding_function_strategy(),
6791                source_key_strategy(),
6792                proptest::option::of(any::<bool>()),
6793            )
6794                .prop_map(|(embedding_function, source_key, bm25)| {
6795                    SparseVectorIndexConfig {
6796                        embedding_function,
6797                        source_key,
6798                        bm25,
6799                    }
6800                })
6801        }
6802
6803        fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6804            proptest::option::of(
6805                (
6806                    any::<bool>(),
6807                    proptest::option::of(sparse_vector_index_config_strategy()),
6808                )
6809                    .prop_map(|(enabled, config)| SparseVectorValueType {
6810                        sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6811                            enabled,
6812                            config: cfg,
6813                        }),
6814                    }),
6815            )
6816            .boxed()
6817        }
6818
6819        fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6820            internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6821                ef_construction: Some(config.ef_construction),
6822                max_neighbors: Some(config.max_neighbors),
6823                ef_search: Some(config.ef_search),
6824                num_threads: Some(config.num_threads),
6825                batch_size: Some(config.batch_size),
6826                sync_threshold: Some(config.sync_threshold),
6827                resize_factor: Some(config.resize_factor),
6828            })
6829        }
6830
6831        fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6832            internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6833                search_nprobe: Some(config.search_nprobe),
6834                search_rng_factor: Some(config.search_rng_factor),
6835                search_rng_epsilon: Some(config.search_rng_epsilon),
6836                nreplica_count: Some(config.nreplica_count),
6837                write_rng_factor: Some(config.write_rng_factor),
6838                write_rng_epsilon: Some(config.write_rng_epsilon),
6839                split_threshold: Some(config.split_threshold),
6840                num_samples_kmeans: Some(config.num_samples_kmeans),
6841                initial_lambda: Some(config.initial_lambda),
6842                reassign_neighbor_count: Some(config.reassign_neighbor_count),
6843                merge_threshold: Some(config.merge_threshold),
6844                num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6845                write_nprobe: Some(config.write_nprobe),
6846                ef_construction: Some(config.ef_construction),
6847                ef_search: Some(config.ef_search),
6848                max_neighbors: Some(config.max_neighbors),
6849                center_drift_threshold: None,
6850                quantize: Quantization::None,
6851            })
6852        }
6853
6854        fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6855            (
6856                proptest::option::of(space_strategy()),
6857                embedding_function_strategy(),
6858                source_key_strategy(),
6859                proptest::option::of(hnsw_index_config_strategy()),
6860                proptest::option::of(spann_index_config_strategy()),
6861            )
6862                .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6863                    VectorIndexConfig {
6864                        space,
6865                        embedding_function,
6866                        source_key,
6867                        hnsw,
6868                        spann,
6869                    }
6870                })
6871        }
6872
6873        fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6874            (any::<bool>(), vector_index_config_strategy())
6875                .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6876        }
6877
6878        fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6879            proptest::option::of(
6880                proptest::option::of(vector_index_type_strategy())
6881                    .prop_map(|vector_index| FloatListValueType { vector_index }),
6882            )
6883            .boxed()
6884        }
6885
6886        fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6887            (
6888                string_value_type_strategy(),
6889                float_list_value_type_strategy(),
6890                sparse_vector_value_type_strategy(),
6891                int_value_type_strategy(),
6892                float_value_type_strategy(),
6893                bool_value_type_strategy(),
6894            )
6895                .prop_map(
6896                    |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6897                        string,
6898                        float_list,
6899                        sparse_vector,
6900                        int,
6901                        float,
6902                        boolean,
6903                    },
6904                )
6905                .boxed()
6906        }
6907
6908        fn schema_strategy() -> BoxedStrategy<Schema> {
6909            (
6910                value_types_strategy(),
6911                proptest::collection::hash_map(
6912                    non_special_key_strategy(),
6913                    value_types_strategy(),
6914                    0..=3,
6915                ),
6916                proptest::option::of(value_types_strategy()),
6917                proptest::option::of(value_types_strategy()),
6918            )
6919                .prop_map(
6920                    |(defaults, mut extra_keys, document_override, embedding_override)| {
6921                        if let Some(doc) = document_override {
6922                            extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6923                        }
6924                        if let Some(embed) = embedding_override {
6925                            extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6926                        }
6927                        Schema {
6928                            defaults,
6929                            keys: extra_keys,
6930                            cmek: None,
6931                            source_attached_function_id: None,
6932                        }
6933                    },
6934                )
6935                .boxed()
6936        }
6937
6938        fn force_non_default_schema(mut schema: Schema) -> Schema {
6939            if schema.is_default() {
6940                if let Some(string_value) = schema
6941                    .defaults
6942                    .string
6943                    .as_mut()
6944                    .and_then(|string_value| string_value.string_inverted_index.as_mut())
6945                {
6946                    string_value.enabled = !string_value.enabled;
6947                } else {
6948                    schema.defaults.string = Some(StringValueType {
6949                        string_inverted_index: Some(StringInvertedIndexType {
6950                            enabled: false,
6951                            config: StringInvertedIndexConfig {},
6952                        }),
6953                        fts_index: None,
6954                    });
6955                }
6956            }
6957            schema
6958        }
6959
6960        fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6961            schema_strategy().prop_map(force_non_default_schema).boxed()
6962        }
6963
6964        fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6965            let defaults = schema
6966                .defaults
6967                .float_list
6968                .as_ref()
6969                .and_then(|fl| fl.vector_index.as_ref())
6970                .map(|vi| vi.config.clone())
6971                .expect("defaults vector index missing");
6972
6973            let embedding = schema
6974                .keys
6975                .get(EMBEDDING_KEY)
6976                .and_then(|value_types| value_types.float_list.as_ref())
6977                .and_then(|fl| fl.vector_index.as_ref())
6978                .map(|vi| vi.config.clone())
6979                .expect("#embedding vector index missing");
6980
6981            (defaults, embedding)
6982        }
6983
6984        proptest! {
6985            #[test]
6986            fn reconcile_schema_and_config_matches_convert_for_config_only(
6987                config in internal_collection_configuration_strategy(),
6988                knn in knn_index_strategy(),
6989            ) {
6990                let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6991                    .expect("reconciliation should succeed");
6992
6993                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6994                let expected_config = expected_vector_index_config(&config);
6995
6996                prop_assert_eq!(defaults_vi, expected_config.clone());
6997
6998                let mut expected_embedding_config = expected_config;
6999                expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
7000                prop_assert_eq!(embedding_vi, expected_embedding_config);
7001
7002                prop_assert_eq!(result.keys.len(), 2);
7003            }
7004        }
7005
7006        proptest! {
7007            #[test]
7008            fn reconcile_schema_and_config_errors_when_both_non_default(
7009                config in non_default_internal_collection_configuration_strategy(),
7010                knn in knn_index_strategy(),
7011            ) {
7012                let schema = Schema::try_from(&config)
7013                    .expect("conversion should succeed");
7014                prop_assume!(!schema.is_default());
7015
7016                let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
7017
7018                prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
7019            }
7020        }
7021
7022        proptest! {
7023            #[test]
7024            fn reconcile_schema_and_config_matches_schema_only_path(
7025                schema in schema_strategy(),
7026                knn in knn_index_strategy(),
7027            ) {
7028                let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
7029                    .expect("reconciliation should succeed");
7030
7031                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
7032
7033                // Property: schema defaults.float_list vector_index config should be merged into defaults
7034                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
7035                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
7036                        // Property: schema values take precedence over defaults
7037                        if let Some(schema_space) = &schema_vi.config.space {
7038                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
7039                        }
7040                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
7041                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
7042                        }
7043                        // Test nested config merging properties
7044                        match knn {
7045                            KnnIndex::Hnsw => {
7046                                if let Some(schema_hnsw) = &schema_vi.config.hnsw {
7047                                    if let Some(merged_hnsw) = &defaults_vi.hnsw {
7048                                        if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
7049                                            prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
7050                                        }
7051                                    }
7052                                }
7053                            }
7054                            KnnIndex::Spann => {
7055                                if let Some(schema_spann) = &schema_vi.config.spann {
7056                                    if let Some(merged_spann) = &defaults_vi.spann {
7057                                        if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
7058                                            prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
7059                                        }
7060                                    }
7061                                }
7062                            }
7063                        }
7064                    }
7065                }
7066
7067                // Property: schema #embedding float_list vector_index config should be merged into embedding
7068                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
7069                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
7070                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
7071                            if let Some(schema_space) = &embedding_vi_type.config.space {
7072                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
7073                            }
7074                        }
7075                    }
7076                }
7077            }
7078        }
7079
7080        proptest! {
7081            #[test]
7082            fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
7083                embedding_function in default_embedding_function_strategy(),
7084                knn in knn_index_strategy(),
7085            ) {
7086                let schema = Schema::new_default(knn);
7087                let mut config = match knn {
7088                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
7089                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
7090                };
7091                config.embedding_function = embedding_function.clone();
7092
7093                let result = Schema::reconcile_schema_and_config(
7094                    Some(&schema),
7095                    Some(&config),
7096                    knn,
7097                )
7098                .expect("reconciliation should succeed");
7099
7100                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
7101
7102                // Property: embedding function from config should be applied to both defaults and embedding
7103                if let Some(ef) = embedding_function {
7104                    prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
7105                    prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
7106                } else {
7107                    // Property: when embedding function is None, it should remain None
7108                    prop_assert_eq!(defaults_vi.embedding_function, None);
7109                    prop_assert_eq!(embedding_vi.embedding_function, None);
7110                }
7111            }
7112        }
7113
7114        proptest! {
7115            #[test]
7116            fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
7117                schema in non_default_schema_strategy(),
7118                knn in knn_index_strategy(),
7119            ) {
7120                let default_config = match knn {
7121                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
7122                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
7123                };
7124
7125                let result = Schema::reconcile_schema_and_config(
7126                    Some(&schema),
7127                    Some(&default_config),
7128                    knn,
7129                )
7130                .expect("reconciliation should succeed");
7131
7132                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
7133
7134                // Property: when config is default, schema values should be preserved
7135                // Test that schema defaults.float_list vector_index config is applied
7136                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
7137                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
7138                        if let Some(schema_space) = &schema_vi.config.space {
7139                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
7140                        }
7141                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
7142                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
7143                        }
7144                    }
7145                }
7146
7147                // Property: schema #embedding float_list vector_index config should be applied
7148                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
7149                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
7150                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
7151                            if let Some(schema_space) = &embedding_vi_type.config.space {
7152                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
7153                            }
7154                        }
7155                    }
7156                }
7157            }
7158        }
7159    }
7160}