chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8    EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14    default_batch_size, default_construction_ef, default_construction_ef_spann,
15    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21    InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25    fn code(&self) -> ErrorCodes {
26        ErrorCodes::Internal
27    }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33    MissingIndexConfiguration { key: String, value_type: String },
34    #[error("Schema reconciliation failed: {reason}")]
35    InvalidSchema { reason: String },
36}
37
38#[derive(Debug, Error)]
39pub enum FilterValidationError {
40    #[error(
41        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
42    )]
43    IndexingDisabled {
44        key: String,
45        value_type: MetadataValueType,
46    },
47    #[error(transparent)]
48    Schema(#[from] SchemaError),
49}
50
51impl ChromaError for FilterValidationError {
52    fn code(&self) -> ErrorCodes {
53        match self {
54            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
55            FilterValidationError::Schema(_) => ErrorCodes::Internal,
56        }
57    }
58}
59
60// ============================================================================
61// SCHEMA CONSTANTS
62// ============================================================================
63// These constants must match the Python constants in chromadb/api/types.py
64
65// Value type name constants
66pub const STRING_VALUE_NAME: &str = "string";
67pub const INT_VALUE_NAME: &str = "int";
68pub const BOOL_VALUE_NAME: &str = "bool";
69pub const FLOAT_VALUE_NAME: &str = "float";
70pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
71pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
72
73// Index type name constants
74pub const FTS_INDEX_NAME: &str = "fts_index";
75pub const VECTOR_INDEX_NAME: &str = "vector_index";
76pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
77pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
78pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
79pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
80pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
81
82// Special metadata keys - must match Python constants in chromadb/api/types.py
83pub const DOCUMENT_KEY: &str = "#document";
84pub const EMBEDDING_KEY: &str = "#embedding";
85
86// ============================================================================
87// SCHEMA STRUCTURES
88// ============================================================================
89
90/// Schema representation for collection index configurations
91///
92/// This represents the server-side schema structure used for index management
93
94#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
95#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
96pub struct Schema {
97    /// Default index configurations for each value type
98    pub defaults: ValueTypes,
99    /// Key-specific index overrides
100    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
101    #[serde(rename = "keys", alias = "key_overrides")]
102    pub keys: HashMap<String, ValueTypes>,
103}
104
105pub fn is_embedding_function_default(
106    embedding_function: &Option<EmbeddingFunctionConfiguration>,
107) -> bool {
108    match embedding_function {
109        None => true,
110        Some(embedding_function) => embedding_function.is_default(),
111    }
112}
113
114/// Check if space is default (None means default, or if present, should be default space)
115pub fn is_space_default(space: &Option<Space>) -> bool {
116    match space {
117        None => true,                     // None means default
118        Some(s) => *s == default_space(), // If present, check if it's the default space
119    }
120}
121
122/// Check if HNSW config is default
123pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
124    hnsw_config.ef_construction == Some(default_construction_ef())
125        && hnsw_config.ef_search == Some(default_search_ef())
126        && hnsw_config.max_neighbors == Some(default_m())
127        && hnsw_config.num_threads == Some(default_num_threads())
128        && hnsw_config.batch_size == Some(default_batch_size())
129        && hnsw_config.sync_threshold == Some(default_sync_threshold())
130        && hnsw_config.resize_factor == Some(default_resize_factor())
131}
132
133// ============================================================================
134// NEW STRONGLY-TYPED SCHEMA STRUCTURES
135// ============================================================================
136
137/// Strongly-typed value type configurations
138/// Contains optional configurations for each supported value type
139#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
140#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
141pub struct ValueTypes {
142    #[serde(
143        rename = "string",
144        alias = "#string",
145        skip_serializing_if = "Option::is_none"
146    )] // STRING_VALUE_NAME
147    pub string: Option<StringValueType>,
148
149    #[serde(
150        rename = "float_list",
151        alias = "#float_list",
152        skip_serializing_if = "Option::is_none"
153    )]
154    // FLOAT_LIST_VALUE_NAME
155    pub float_list: Option<FloatListValueType>,
156
157    #[serde(
158        rename = "sparse_vector",
159        alias = "#sparse_vector",
160        skip_serializing_if = "Option::is_none"
161    )]
162    // SPARSE_VECTOR_VALUE_NAME
163    pub sparse_vector: Option<SparseVectorValueType>,
164
165    #[serde(
166        rename = "int",
167        alias = "#int",
168        skip_serializing_if = "Option::is_none"
169    )] // INT_VALUE_NAME
170    pub int: Option<IntValueType>,
171
172    #[serde(
173        rename = "float",
174        alias = "#float",
175        skip_serializing_if = "Option::is_none"
176    )] // FLOAT_VALUE_NAME
177    pub float: Option<FloatValueType>,
178
179    #[serde(
180        rename = "bool",
181        alias = "#bool",
182        skip_serializing_if = "Option::is_none"
183    )] // BOOL_VALUE_NAME
184    pub boolean: Option<BoolValueType>,
185}
186
187/// String value type index configurations
188#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
189#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
190pub struct StringValueType {
191    #[serde(
192        rename = "fts_index",
193        alias = "$fts_index",
194        skip_serializing_if = "Option::is_none"
195    )] // FTS_INDEX_NAME
196    pub fts_index: Option<FtsIndexType>,
197
198    #[serde(
199        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
200        alias = "$string_inverted_index",
201        skip_serializing_if = "Option::is_none"
202    )]
203    pub string_inverted_index: Option<StringInvertedIndexType>,
204}
205
206/// Float list value type index configurations (for vectors)
207#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
208#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
209pub struct FloatListValueType {
210    #[serde(
211        rename = "vector_index",
212        alias = "$vector_index",
213        skip_serializing_if = "Option::is_none"
214    )] // VECTOR_INDEX_NAME
215    pub vector_index: Option<VectorIndexType>,
216}
217
218/// Sparse vector value type index configurations
219#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
220#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
221pub struct SparseVectorValueType {
222    #[serde(
223        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
224        alias = "$sparse_vector_index",
225        skip_serializing_if = "Option::is_none"
226    )]
227    pub sparse_vector_index: Option<SparseVectorIndexType>,
228}
229
230/// Integer value type index configurations
231#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
232#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
233pub struct IntValueType {
234    #[serde(
235        rename = "int_inverted_index",
236        alias = "$int_inverted_index",
237        skip_serializing_if = "Option::is_none"
238    )]
239    // INT_INVERTED_INDEX_NAME
240    pub int_inverted_index: Option<IntInvertedIndexType>,
241}
242
243/// Float value type index configurations
244#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
245#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
246pub struct FloatValueType {
247    #[serde(
248        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
249        alias = "$float_inverted_index",
250        skip_serializing_if = "Option::is_none"
251    )]
252    pub float_inverted_index: Option<FloatInvertedIndexType>,
253}
254
255/// Boolean value type index configurations
256#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
257#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
258pub struct BoolValueType {
259    #[serde(
260        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
261        alias = "$bool_inverted_index",
262        skip_serializing_if = "Option::is_none"
263    )]
264    pub bool_inverted_index: Option<BoolInvertedIndexType>,
265}
266
267// Individual index type structs with enabled status and config
268#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
269#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
270pub struct FtsIndexType {
271    pub enabled: bool,
272    pub config: FtsIndexConfig,
273}
274
275#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
276#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
277pub struct VectorIndexType {
278    pub enabled: bool,
279    pub config: VectorIndexConfig,
280}
281
282#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
283#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
284pub struct SparseVectorIndexType {
285    pub enabled: bool,
286    pub config: SparseVectorIndexConfig,
287}
288
289#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
290#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
291pub struct StringInvertedIndexType {
292    pub enabled: bool,
293    pub config: StringInvertedIndexConfig,
294}
295
296#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
297#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
298pub struct IntInvertedIndexType {
299    pub enabled: bool,
300    pub config: IntInvertedIndexConfig,
301}
302
303#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
304#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
305pub struct FloatInvertedIndexType {
306    pub enabled: bool,
307    pub config: FloatInvertedIndexConfig,
308}
309
310#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
311#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
312pub struct BoolInvertedIndexType {
313    pub enabled: bool,
314    pub config: BoolInvertedIndexConfig,
315}
316
317impl Schema {
318    /// Create a new Schema with strongly-typed default configurations
319    pub fn new_default(default_knn_index: KnnIndex) -> Self {
320        // Vector index disabled on all keys except #embedding.
321        let vector_config = VectorIndexType {
322            enabled: false,
323            config: VectorIndexConfig {
324                space: Some(default_space()),
325                embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
326                source_key: None,
327                hnsw: match default_knn_index {
328                    KnnIndex::Hnsw => Some(HnswIndexConfig {
329                        ef_construction: Some(default_construction_ef()),
330                        max_neighbors: Some(default_m()),
331                        ef_search: Some(default_search_ef()),
332                        num_threads: Some(default_num_threads()),
333                        batch_size: Some(default_batch_size()),
334                        sync_threshold: Some(default_sync_threshold()),
335                        resize_factor: Some(default_resize_factor()),
336                    }),
337                    KnnIndex::Spann => None,
338                },
339                spann: match default_knn_index {
340                    KnnIndex::Hnsw => None,
341                    KnnIndex::Spann => Some(SpannIndexConfig {
342                        search_nprobe: Some(default_search_nprobe()),
343                        search_rng_factor: Some(default_search_rng_factor()),
344                        search_rng_epsilon: Some(default_search_rng_epsilon()),
345                        nreplica_count: Some(default_nreplica_count()),
346                        write_rng_factor: Some(default_write_rng_factor()),
347                        write_rng_epsilon: Some(default_write_rng_epsilon()),
348                        split_threshold: Some(default_split_threshold()),
349                        num_samples_kmeans: Some(default_num_samples_kmeans()),
350                        initial_lambda: Some(default_initial_lambda()),
351                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
352                        merge_threshold: Some(default_merge_threshold()),
353                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
354                        write_nprobe: Some(default_write_nprobe()),
355                        ef_construction: Some(default_construction_ef_spann()),
356                        ef_search: Some(default_search_ef_spann()),
357                        max_neighbors: Some(default_m_spann()),
358                    }),
359                },
360            },
361        };
362
363        // Initialize defaults struct directly instead of using Default::default() + field assignments
364        let defaults = ValueTypes {
365            string: Some(StringValueType {
366                string_inverted_index: Some(StringInvertedIndexType {
367                    enabled: true,
368                    config: StringInvertedIndexConfig {},
369                }),
370                fts_index: Some(FtsIndexType {
371                    enabled: false,
372                    config: FtsIndexConfig {},
373                }),
374            }),
375            float: Some(FloatValueType {
376                float_inverted_index: Some(FloatInvertedIndexType {
377                    enabled: true,
378                    config: FloatInvertedIndexConfig {},
379                }),
380            }),
381            int: Some(IntValueType {
382                int_inverted_index: Some(IntInvertedIndexType {
383                    enabled: true,
384                    config: IntInvertedIndexConfig {},
385                }),
386            }),
387            boolean: Some(BoolValueType {
388                bool_inverted_index: Some(BoolInvertedIndexType {
389                    enabled: true,
390                    config: BoolInvertedIndexConfig {},
391                }),
392            }),
393            float_list: Some(FloatListValueType {
394                vector_index: Some(vector_config),
395            }),
396            sparse_vector: Some(SparseVectorValueType {
397                sparse_vector_index: Some(SparseVectorIndexType {
398                    enabled: false,
399                    config: SparseVectorIndexConfig {
400                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
401                        source_key: None,
402                        bm25: Some(false),
403                    },
404                }),
405            }),
406        };
407
408        // Set up key overrides
409        let mut keys = HashMap::new();
410
411        // Enable vector index for #embedding.
412        let embedding_defaults = ValueTypes {
413            float_list: Some(FloatListValueType {
414                vector_index: Some(VectorIndexType {
415                    enabled: true,
416                    config: VectorIndexConfig {
417                        space: Some(default_space()),
418                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
419                        source_key: Some(DOCUMENT_KEY.to_string()),
420                        hnsw: match default_knn_index {
421                            KnnIndex::Hnsw => Some(HnswIndexConfig {
422                                ef_construction: Some(default_construction_ef()),
423                                max_neighbors: Some(default_m()),
424                                ef_search: Some(default_search_ef()),
425                                num_threads: Some(default_num_threads()),
426                                batch_size: Some(default_batch_size()),
427                                sync_threshold: Some(default_sync_threshold()),
428                                resize_factor: Some(default_resize_factor()),
429                            }),
430                            KnnIndex::Spann => None,
431                        },
432                        spann: match default_knn_index {
433                            KnnIndex::Hnsw => None,
434                            KnnIndex::Spann => Some(SpannIndexConfig {
435                                search_nprobe: Some(default_search_nprobe()),
436                                search_rng_factor: Some(default_search_rng_factor()),
437                                search_rng_epsilon: Some(default_search_rng_epsilon()),
438                                nreplica_count: Some(default_nreplica_count()),
439                                write_rng_factor: Some(default_write_rng_factor()),
440                                write_rng_epsilon: Some(default_write_rng_epsilon()),
441                                split_threshold: Some(default_split_threshold()),
442                                num_samples_kmeans: Some(default_num_samples_kmeans()),
443                                initial_lambda: Some(default_initial_lambda()),
444                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
445                                merge_threshold: Some(default_merge_threshold()),
446                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
447                                write_nprobe: Some(default_write_nprobe()),
448                                ef_construction: Some(default_construction_ef_spann()),
449                                ef_search: Some(default_search_ef_spann()),
450                                max_neighbors: Some(default_m_spann()),
451                            }),
452                        },
453                    },
454                }),
455            }),
456            ..Default::default()
457        };
458        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
459
460        // Document defaults - initialize directly instead of Default::default() + field assignment
461        let document_defaults = ValueTypes {
462            string: Some(StringValueType {
463                fts_index: Some(FtsIndexType {
464                    enabled: true,
465                    config: FtsIndexConfig {},
466                }),
467                string_inverted_index: Some(StringInvertedIndexType {
468                    enabled: false,
469                    config: StringInvertedIndexConfig {},
470                }),
471            }),
472            ..Default::default()
473        };
474        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
475
476        Schema { defaults, keys }
477    }
478
479    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
480        let to_internal = |vector_index: &VectorIndexType| {
481            let space = vector_index.config.space.clone();
482            vector_index
483                .config
484                .spann
485                .clone()
486                .map(|config| (space.as_ref(), &config).into())
487        };
488
489        self.keys
490            .get(EMBEDDING_KEY)
491            .and_then(|value_types| value_types.float_list.as_ref())
492            .and_then(|float_list| float_list.vector_index.as_ref())
493            .and_then(to_internal)
494            .or_else(|| {
495                self.defaults
496                    .float_list
497                    .as_ref()
498                    .and_then(|float_list| float_list.vector_index.as_ref())
499                    .and_then(to_internal)
500            })
501    }
502
503    /// Reconcile user-provided schema with system defaults
504    ///
505    /// This method merges user configurations with system defaults, ensuring that:
506    /// - User overrides take precedence over defaults
507    /// - Missing user configurations fall back to system defaults
508    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
509    pub fn reconcile_with_defaults(user_schema: Option<Schema>) -> Result<Self, String> {
510        let default_schema = Schema::new_default(KnnIndex::Spann);
511
512        match user_schema {
513            Some(user) => {
514                // Merge defaults with user overrides
515                let merged_defaults =
516                    Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
517
518                // Merge key overrides
519                let mut merged_keys = default_schema.keys.clone();
520                for (key, user_value_types) in user.keys {
521                    if let Some(default_value_types) = merged_keys.get(&key) {
522                        // Merge with existing default key override
523                        let merged_value_types =
524                            Self::merge_value_types(default_value_types, &user_value_types)?;
525                        merged_keys.insert(key, merged_value_types);
526                    } else {
527                        // New key override from user
528                        merged_keys.insert(key, user_value_types);
529                    }
530                }
531
532                Ok(Schema {
533                    defaults: merged_defaults,
534                    keys: merged_keys,
535                })
536            }
537            None => Ok(default_schema),
538        }
539    }
540
541    /// Merge two schemas together, combining key overrides when possible.
542    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
543        if self.defaults != other.defaults {
544            return Err(SchemaError::InvalidSchema {
545                reason: "Cannot merge schemas with differing defaults".to_string(),
546            });
547        }
548
549        let mut keys = self.keys.clone();
550
551        for (key, other_value_types) in &other.keys {
552            if let Some(existing) = keys.get(key).cloned() {
553                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
554                keys.insert(key.clone(), merged);
555            } else {
556                keys.insert(key.clone(), other_value_types.clone());
557            }
558        }
559
560        Ok(Schema {
561            defaults: self.defaults.clone(),
562            keys,
563        })
564    }
565
566    fn merge_override_value_types(
567        key: &str,
568        left: &ValueTypes,
569        right: &ValueTypes,
570    ) -> Result<ValueTypes, SchemaError> {
571        Ok(ValueTypes {
572            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
573            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
574            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
575            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
576            float_list: Self::merge_float_list_override(
577                key,
578                left.float_list.as_ref(),
579                right.float_list.as_ref(),
580            )?,
581            sparse_vector: Self::merge_sparse_vector_override(
582                key,
583                left.sparse_vector.as_ref(),
584                right.sparse_vector.as_ref(),
585            )?,
586        })
587    }
588
589    fn merge_string_override(
590        key: &str,
591        left: Option<&StringValueType>,
592        right: Option<&StringValueType>,
593    ) -> Result<Option<StringValueType>, SchemaError> {
594        match (left, right) {
595            (Some(l), Some(r)) => Ok(Some(StringValueType {
596                string_inverted_index: Self::merge_index_or_error(
597                    l.string_inverted_index.as_ref(),
598                    r.string_inverted_index.as_ref(),
599                    &format!("key '{key}' string.string_inverted_index"),
600                )?,
601                fts_index: Self::merge_index_or_error(
602                    l.fts_index.as_ref(),
603                    r.fts_index.as_ref(),
604                    &format!("key '{key}' string.fts_index"),
605                )?,
606            })),
607            (Some(l), None) => Ok(Some(l.clone())),
608            (None, Some(r)) => Ok(Some(r.clone())),
609            (None, None) => Ok(None),
610        }
611    }
612
613    fn merge_float_override(
614        key: &str,
615        left: Option<&FloatValueType>,
616        right: Option<&FloatValueType>,
617    ) -> Result<Option<FloatValueType>, SchemaError> {
618        match (left, right) {
619            (Some(l), Some(r)) => Ok(Some(FloatValueType {
620                float_inverted_index: Self::merge_index_or_error(
621                    l.float_inverted_index.as_ref(),
622                    r.float_inverted_index.as_ref(),
623                    &format!("key '{key}' float.float_inverted_index"),
624                )?,
625            })),
626            (Some(l), None) => Ok(Some(l.clone())),
627            (None, Some(r)) => Ok(Some(r.clone())),
628            (None, None) => Ok(None),
629        }
630    }
631
632    fn merge_int_override(
633        key: &str,
634        left: Option<&IntValueType>,
635        right: Option<&IntValueType>,
636    ) -> Result<Option<IntValueType>, SchemaError> {
637        match (left, right) {
638            (Some(l), Some(r)) => Ok(Some(IntValueType {
639                int_inverted_index: Self::merge_index_or_error(
640                    l.int_inverted_index.as_ref(),
641                    r.int_inverted_index.as_ref(),
642                    &format!("key '{key}' int.int_inverted_index"),
643                )?,
644            })),
645            (Some(l), None) => Ok(Some(l.clone())),
646            (None, Some(r)) => Ok(Some(r.clone())),
647            (None, None) => Ok(None),
648        }
649    }
650
651    fn merge_bool_override(
652        key: &str,
653        left: Option<&BoolValueType>,
654        right: Option<&BoolValueType>,
655    ) -> Result<Option<BoolValueType>, SchemaError> {
656        match (left, right) {
657            (Some(l), Some(r)) => Ok(Some(BoolValueType {
658                bool_inverted_index: Self::merge_index_or_error(
659                    l.bool_inverted_index.as_ref(),
660                    r.bool_inverted_index.as_ref(),
661                    &format!("key '{key}' bool.bool_inverted_index"),
662                )?,
663            })),
664            (Some(l), None) => Ok(Some(l.clone())),
665            (None, Some(r)) => Ok(Some(r.clone())),
666            (None, None) => Ok(None),
667        }
668    }
669
670    fn merge_float_list_override(
671        key: &str,
672        left: Option<&FloatListValueType>,
673        right: Option<&FloatListValueType>,
674    ) -> Result<Option<FloatListValueType>, SchemaError> {
675        match (left, right) {
676            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
677                vector_index: Self::merge_index_or_error(
678                    l.vector_index.as_ref(),
679                    r.vector_index.as_ref(),
680                    &format!("key '{key}' float_list.vector_index"),
681                )?,
682            })),
683            (Some(l), None) => Ok(Some(l.clone())),
684            (None, Some(r)) => Ok(Some(r.clone())),
685            (None, None) => Ok(None),
686        }
687    }
688
689    fn merge_sparse_vector_override(
690        key: &str,
691        left: Option<&SparseVectorValueType>,
692        right: Option<&SparseVectorValueType>,
693    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
694        match (left, right) {
695            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
696                sparse_vector_index: Self::merge_index_or_error(
697                    l.sparse_vector_index.as_ref(),
698                    r.sparse_vector_index.as_ref(),
699                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
700                )?,
701            })),
702            (Some(l), None) => Ok(Some(l.clone())),
703            (None, Some(r)) => Ok(Some(r.clone())),
704            (None, None) => Ok(None),
705        }
706    }
707
708    fn merge_index_or_error<T: Clone + PartialEq>(
709        left: Option<&T>,
710        right: Option<&T>,
711        context: &str,
712    ) -> Result<Option<T>, SchemaError> {
713        match (left, right) {
714            (Some(l), Some(r)) => {
715                if l == r {
716                    Ok(Some(l.clone()))
717                } else {
718                    Err(SchemaError::InvalidSchema {
719                        reason: format!("Conflicting configuration for {context}"),
720                    })
721                }
722            }
723            (Some(l), None) => Ok(Some(l.clone())),
724            (None, Some(r)) => Ok(Some(r.clone())),
725            (None, None) => Ok(None),
726        }
727    }
728
729    /// Merge two ValueTypes with field-level merging
730    /// User values take precedence over default values
731    fn merge_value_types(default: &ValueTypes, user: &ValueTypes) -> Result<ValueTypes, String> {
732        // Merge float_list first
733        let float_list =
734            Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
735
736        // Validate the merged float_list (covers all merge cases)
737        if let Some(ref fl) = float_list {
738            Self::validate_float_list_value_type(fl)?;
739        }
740
741        Ok(ValueTypes {
742            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
743            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
744            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
745            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
746            float_list,
747            sparse_vector: Self::merge_sparse_vector_type(
748                default.sparse_vector.as_ref(),
749                user.sparse_vector.as_ref(),
750            )?,
751        })
752    }
753
754    /// Merge StringValueType configurations
755    fn merge_string_type(
756        default: Option<&StringValueType>,
757        user: Option<&StringValueType>,
758    ) -> Result<Option<StringValueType>, String> {
759        match (default, user) {
760            (Some(default), Some(user)) => Ok(Some(StringValueType {
761                string_inverted_index: Self::merge_string_inverted_index_type(
762                    default.string_inverted_index.as_ref(),
763                    user.string_inverted_index.as_ref(),
764                )?,
765                fts_index: Self::merge_fts_index_type(
766                    default.fts_index.as_ref(),
767                    user.fts_index.as_ref(),
768                )?,
769            })),
770            (Some(default), None) => Ok(Some(default.clone())),
771            (None, Some(user)) => Ok(Some(user.clone())),
772            (None, None) => Ok(None),
773        }
774    }
775
776    /// Merge FloatValueType configurations
777    fn merge_float_type(
778        default: Option<&FloatValueType>,
779        user: Option<&FloatValueType>,
780    ) -> Result<Option<FloatValueType>, String> {
781        match (default, user) {
782            (Some(default), Some(user)) => Ok(Some(FloatValueType {
783                float_inverted_index: Self::merge_float_inverted_index_type(
784                    default.float_inverted_index.as_ref(),
785                    user.float_inverted_index.as_ref(),
786                )?,
787            })),
788            (Some(default), None) => Ok(Some(default.clone())),
789            (None, Some(user)) => Ok(Some(user.clone())),
790            (None, None) => Ok(None),
791        }
792    }
793
794    /// Merge IntValueType configurations
795    fn merge_int_type(
796        default: Option<&IntValueType>,
797        user: Option<&IntValueType>,
798    ) -> Result<Option<IntValueType>, String> {
799        match (default, user) {
800            (Some(default), Some(user)) => Ok(Some(IntValueType {
801                int_inverted_index: Self::merge_int_inverted_index_type(
802                    default.int_inverted_index.as_ref(),
803                    user.int_inverted_index.as_ref(),
804                )?,
805            })),
806            (Some(default), None) => Ok(Some(default.clone())),
807            (None, Some(user)) => Ok(Some(user.clone())),
808            (None, None) => Ok(None),
809        }
810    }
811
812    /// Merge BoolValueType configurations
813    fn merge_bool_type(
814        default: Option<&BoolValueType>,
815        user: Option<&BoolValueType>,
816    ) -> Result<Option<BoolValueType>, String> {
817        match (default, user) {
818            (Some(default), Some(user)) => Ok(Some(BoolValueType {
819                bool_inverted_index: Self::merge_bool_inverted_index_type(
820                    default.bool_inverted_index.as_ref(),
821                    user.bool_inverted_index.as_ref(),
822                )?,
823            })),
824            (Some(default), None) => Ok(Some(default.clone())),
825            (None, Some(user)) => Ok(Some(user.clone())),
826            (None, None) => Ok(None),
827        }
828    }
829
830    /// Merge FloatListValueType configurations
831    fn merge_float_list_type(
832        default: Option<&FloatListValueType>,
833        user: Option<&FloatListValueType>,
834    ) -> Option<FloatListValueType> {
835        match (default, user) {
836            (Some(default), Some(user)) => Some(FloatListValueType {
837                vector_index: Self::merge_vector_index_type(
838                    default.vector_index.as_ref(),
839                    user.vector_index.as_ref(),
840                ),
841            }),
842            (Some(default), None) => Some(default.clone()),
843            (None, Some(user)) => Some(user.clone()),
844            (None, None) => None,
845        }
846    }
847
848    /// Merge SparseVectorValueType configurations
849    fn merge_sparse_vector_type(
850        default: Option<&SparseVectorValueType>,
851        user: Option<&SparseVectorValueType>,
852    ) -> Result<Option<SparseVectorValueType>, String> {
853        match (default, user) {
854            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
855                sparse_vector_index: Self::merge_sparse_vector_index_type(
856                    default.sparse_vector_index.as_ref(),
857                    user.sparse_vector_index.as_ref(),
858                )?,
859            })),
860            (Some(default), None) => Ok(Some(default.clone())),
861            (None, Some(user)) => Ok(Some(user.clone())),
862            (None, None) => Ok(None),
863        }
864    }
865
866    /// Merge individual index type configurations
867    fn merge_string_inverted_index_type(
868        default: Option<&StringInvertedIndexType>,
869        user: Option<&StringInvertedIndexType>,
870    ) -> Result<Option<StringInvertedIndexType>, String> {
871        match (default, user) {
872            (Some(_default), Some(user)) => {
873                Ok(Some(StringInvertedIndexType {
874                    enabled: user.enabled,       // User enabled state takes precedence
875                    config: user.config.clone(), // User config takes precedence
876                }))
877            }
878            (Some(default), None) => Ok(Some(default.clone())),
879            (None, Some(user)) => Ok(Some(user.clone())),
880            (None, None) => Ok(None),
881        }
882    }
883
884    fn merge_fts_index_type(
885        default: Option<&FtsIndexType>,
886        user: Option<&FtsIndexType>,
887    ) -> Result<Option<FtsIndexType>, String> {
888        match (default, user) {
889            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
890                enabled: user.enabled,
891                config: user.config.clone(),
892            })),
893            (Some(default), None) => Ok(Some(default.clone())),
894            (None, Some(user)) => Ok(Some(user.clone())),
895            (None, None) => Ok(None),
896        }
897    }
898
899    fn merge_float_inverted_index_type(
900        default: Option<&FloatInvertedIndexType>,
901        user: Option<&FloatInvertedIndexType>,
902    ) -> Result<Option<FloatInvertedIndexType>, String> {
903        match (default, user) {
904            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
905                enabled: user.enabled,
906                config: user.config.clone(),
907            })),
908            (Some(default), None) => Ok(Some(default.clone())),
909            (None, Some(user)) => Ok(Some(user.clone())),
910            (None, None) => Ok(None),
911        }
912    }
913
914    fn merge_int_inverted_index_type(
915        default: Option<&IntInvertedIndexType>,
916        user: Option<&IntInvertedIndexType>,
917    ) -> Result<Option<IntInvertedIndexType>, String> {
918        match (default, user) {
919            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
920                enabled: user.enabled,
921                config: user.config.clone(),
922            })),
923            (Some(default), None) => Ok(Some(default.clone())),
924            (None, Some(user)) => Ok(Some(user.clone())),
925            (None, None) => Ok(None),
926        }
927    }
928
929    fn merge_bool_inverted_index_type(
930        default: Option<&BoolInvertedIndexType>,
931        user: Option<&BoolInvertedIndexType>,
932    ) -> Result<Option<BoolInvertedIndexType>, String> {
933        match (default, user) {
934            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
935                enabled: user.enabled,
936                config: user.config.clone(),
937            })),
938            (Some(default), None) => Ok(Some(default.clone())),
939            (None, Some(user)) => Ok(Some(user.clone())),
940            (None, None) => Ok(None),
941        }
942    }
943
944    fn merge_vector_index_type(
945        default: Option<&VectorIndexType>,
946        user: Option<&VectorIndexType>,
947    ) -> Option<VectorIndexType> {
948        match (default, user) {
949            (Some(default), Some(user)) => Some(VectorIndexType {
950                enabled: user.enabled,
951                config: Self::merge_vector_index_config(&default.config, &user.config),
952            }),
953            (Some(default), None) => Some(default.clone()),
954            (None, Some(user)) => Some(user.clone()),
955            (None, None) => None,
956        }
957    }
958
959    fn merge_sparse_vector_index_type(
960        default: Option<&SparseVectorIndexType>,
961        user: Option<&SparseVectorIndexType>,
962    ) -> Result<Option<SparseVectorIndexType>, String> {
963        match (default, user) {
964            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
965                enabled: user.enabled,
966                config: Self::merge_sparse_vector_index_config(&default.config, &user.config)?,
967            })),
968            (Some(default), None) => Ok(Some(default.clone())),
969            (None, Some(user)) => Ok(Some(user.clone())),
970            (None, None) => Ok(None),
971        }
972    }
973
974    /// Validate FloatListValueType vector index configurations
975    /// This validates HNSW and SPANN configs within the merged float_list
976    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), String> {
977        if let Some(vector_index) = &float_list.vector_index {
978            if let Some(hnsw) = &vector_index.config.hnsw {
979                hnsw.validate()
980                    .map_err(|e| format!("Invalid HNSW configuration: {}", e))?;
981            }
982            if let Some(spann) = &vector_index.config.spann {
983                spann
984                    .validate()
985                    .map_err(|e| format!("Invalid SPANN configuration: {}", e))?;
986            }
987        }
988        Ok(())
989    }
990
991    /// Merge VectorIndexConfig with field-level merging
992    fn merge_vector_index_config(
993        default: &VectorIndexConfig,
994        user: &VectorIndexConfig,
995    ) -> VectorIndexConfig {
996        VectorIndexConfig {
997            space: user.space.clone().or(default.space.clone()),
998            embedding_function: user
999                .embedding_function
1000                .clone()
1001                .or(default.embedding_function.clone()),
1002            source_key: user.source_key.clone().or(default.source_key.clone()),
1003            hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1004            spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1005        }
1006    }
1007
1008    /// Merge SparseVectorIndexConfig with field-level merging
1009    fn merge_sparse_vector_index_config(
1010        default: &SparseVectorIndexConfig,
1011        user: &SparseVectorIndexConfig,
1012    ) -> Result<SparseVectorIndexConfig, String> {
1013        Ok(SparseVectorIndexConfig {
1014            embedding_function: user
1015                .embedding_function
1016                .clone()
1017                .or(default.embedding_function.clone()),
1018            source_key: user.source_key.clone().or(default.source_key.clone()),
1019            bm25: user.bm25.or(default.bm25),
1020        })
1021    }
1022
1023    /// Merge HNSW configurations with field-level merging
1024    fn merge_hnsw_configs(
1025        default_hnsw: Option<&HnswIndexConfig>,
1026        user_hnsw: Option<&HnswIndexConfig>,
1027    ) -> Option<HnswIndexConfig> {
1028        match (default_hnsw, user_hnsw) {
1029            (Some(default), Some(user)) => Some(HnswIndexConfig {
1030                ef_construction: user.ef_construction.or(default.ef_construction),
1031                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1032                ef_search: user.ef_search.or(default.ef_search),
1033                num_threads: user.num_threads.or(default.num_threads),
1034                batch_size: user.batch_size.or(default.batch_size),
1035                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1036                resize_factor: user.resize_factor.or(default.resize_factor),
1037            }),
1038            (Some(default), None) => Some(default.clone()),
1039            (None, Some(user)) => Some(user.clone()),
1040            (None, None) => None,
1041        }
1042    }
1043
1044    /// Merge SPANN configurations with field-level merging
1045    fn merge_spann_configs(
1046        default_spann: Option<&SpannIndexConfig>,
1047        user_spann: Option<&SpannIndexConfig>,
1048    ) -> Option<SpannIndexConfig> {
1049        match (default_spann, user_spann) {
1050            (Some(default), Some(user)) => Some(SpannIndexConfig {
1051                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1052                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1053                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1054                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1055                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1056                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1057                split_threshold: user.split_threshold.or(default.split_threshold),
1058                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1059                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1060                reassign_neighbor_count: user
1061                    .reassign_neighbor_count
1062                    .or(default.reassign_neighbor_count),
1063                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1064                num_centers_to_merge_to: user
1065                    .num_centers_to_merge_to
1066                    .or(default.num_centers_to_merge_to),
1067                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1068                ef_construction: user.ef_construction.or(default.ef_construction),
1069                ef_search: user.ef_search.or(default.ef_search),
1070                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1071            }),
1072            (Some(default), None) => Some(default.clone()),
1073            (None, Some(user)) => Some(user.clone()),
1074            (None, None) => None,
1075        }
1076    }
1077
1078    /// Reconcile Schema with InternalCollectionConfiguration
1079    ///
1080    /// Simple reconciliation logic:
1081    /// 1. If collection config is default → return schema (schema is source of truth)
1082    /// 2. If collection config is non-default and schema is non-default → error (both set)
1083    /// 3. If collection config is non-default and schema is default → override schema with collection config
1084    pub fn reconcile_with_collection_config(
1085        schema: Schema,
1086        collection_config: InternalCollectionConfiguration,
1087    ) -> Result<Schema, String> {
1088        // 1. Check if collection config is default
1089        if collection_config.is_default() {
1090            // Collection config is default → schema is source of truth
1091            return Ok(schema);
1092        }
1093
1094        // 2. Collection config is non-default, check if schema is also non-default
1095        if !Self::is_schema_default(&schema) {
1096            // Both are non-default → error
1097            return Err(
1098                "Cannot set both collection config and schema at the same time".to_string(),
1099            );
1100        }
1101
1102        // 3. Collection config is non-default, schema is default → override schema with collection config
1103        Self::convert_collection_config_to_schema(collection_config)
1104    }
1105
1106    pub fn reconcile_schema_and_config(
1107        schema: Option<Schema>,
1108        configuration: Option<InternalCollectionConfiguration>,
1109    ) -> Result<Schema, String> {
1110        let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1111        if let Some(config) = configuration {
1112            Self::reconcile_with_collection_config(reconciled_schema, config)
1113        } else {
1114            Ok(reconciled_schema)
1115        }
1116    }
1117
1118    pub fn default_with_embedding_function(
1119        embedding_function: EmbeddingFunctionConfiguration,
1120    ) -> Schema {
1121        let mut schema = Schema::new_default(KnnIndex::Spann);
1122        if let Some(float_list) = &mut schema.defaults.float_list {
1123            if let Some(vector_index) = &mut float_list.vector_index {
1124                vector_index.config.embedding_function = Some(embedding_function.clone());
1125            }
1126        }
1127        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1128            if let Some(float_list) = &mut embedding_types.float_list {
1129                if let Some(vector_index) = &mut float_list.vector_index {
1130                    vector_index.config.embedding_function = Some(embedding_function);
1131                }
1132            }
1133        }
1134        schema
1135    }
1136
1137    /// Check if schema is default by comparing it word-by-word with new_default
1138    fn is_schema_default(schema: &Schema) -> bool {
1139        // Compare with both possible default schemas (HNSW and SPANN)
1140        let default_hnsw = Schema::new_default(KnnIndex::Hnsw);
1141        let default_spann = Schema::new_default(KnnIndex::Spann);
1142
1143        schema == &default_hnsw || schema == &default_spann
1144    }
1145
1146    /// Convert InternalCollectionConfiguration to Schema
1147    fn convert_collection_config_to_schema(
1148        collection_config: InternalCollectionConfiguration,
1149    ) -> Result<Schema, String> {
1150        // Start with a default schema structure
1151        let mut schema = Schema::new_default(KnnIndex::Spann); // Default to HNSW, will be overridden
1152
1153        // Convert vector index configuration
1154        let vector_config = match collection_config.vector_index {
1155            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1156                space: Some(hnsw_config.space),
1157                embedding_function: collection_config.embedding_function,
1158                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1159                hnsw: Some(HnswIndexConfig {
1160                    ef_construction: Some(hnsw_config.ef_construction),
1161                    max_neighbors: Some(hnsw_config.max_neighbors),
1162                    ef_search: Some(hnsw_config.ef_search),
1163                    num_threads: Some(hnsw_config.num_threads),
1164                    batch_size: Some(hnsw_config.batch_size),
1165                    sync_threshold: Some(hnsw_config.sync_threshold),
1166                    resize_factor: Some(hnsw_config.resize_factor),
1167                }),
1168                spann: None,
1169            },
1170            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1171                space: Some(spann_config.space),
1172                embedding_function: collection_config.embedding_function,
1173                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1174                hnsw: None,
1175                spann: Some(SpannIndexConfig {
1176                    search_nprobe: Some(spann_config.search_nprobe),
1177                    search_rng_factor: Some(spann_config.search_rng_factor),
1178                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1179                    nreplica_count: Some(spann_config.nreplica_count),
1180                    write_rng_factor: Some(spann_config.write_rng_factor),
1181                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1182                    split_threshold: Some(spann_config.split_threshold),
1183                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1184                    initial_lambda: Some(spann_config.initial_lambda),
1185                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1186                    merge_threshold: Some(spann_config.merge_threshold),
1187                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1188                    write_nprobe: Some(spann_config.write_nprobe),
1189                    ef_construction: Some(spann_config.ef_construction),
1190                    ef_search: Some(spann_config.ef_search),
1191                    max_neighbors: Some(spann_config.max_neighbors),
1192                }),
1193            },
1194        };
1195
1196        // Update defaults (keep enabled=false, just update the config)
1197        // This serves as the template for any new float_list fields
1198        if let Some(float_list) = &mut schema.defaults.float_list {
1199            if let Some(vector_index) = &mut float_list.vector_index {
1200                vector_index.config = vector_config.clone();
1201            }
1202        }
1203
1204        // Update the vector_index in the existing #embedding key override
1205        // Keep enabled=true (already set by new_default) and update the config
1206        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1207            if let Some(float_list) = &mut embedding_types.float_list {
1208                if let Some(vector_index) = &mut float_list.vector_index {
1209                    vector_index.config = vector_config;
1210                }
1211            }
1212        }
1213
1214        Ok(schema)
1215    }
1216
1217    /// Check if a specific metadata key-value should be indexed based on schema configuration
1218    pub fn is_metadata_type_index_enabled(
1219        &self,
1220        key: &str,
1221        value_type: MetadataValueType,
1222    ) -> Result<bool, SchemaError> {
1223        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1224
1225        match value_type {
1226            MetadataValueType::Bool => match &v_type.boolean {
1227                Some(bool_type) => match &bool_type.bool_inverted_index {
1228                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1229                    None => Err(SchemaError::MissingIndexConfiguration {
1230                        key: key.to_string(),
1231                        value_type: "bool".to_string(),
1232                    }),
1233                },
1234                None => match &self.defaults.boolean {
1235                    Some(bool_type) => match &bool_type.bool_inverted_index {
1236                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1237                        None => Err(SchemaError::MissingIndexConfiguration {
1238                            key: key.to_string(),
1239                            value_type: "bool".to_string(),
1240                        }),
1241                    },
1242                    None => Err(SchemaError::MissingIndexConfiguration {
1243                        key: key.to_string(),
1244                        value_type: "bool".to_string(),
1245                    }),
1246                },
1247            },
1248            MetadataValueType::Int => match &v_type.int {
1249                Some(int_type) => match &int_type.int_inverted_index {
1250                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1251                    None => Err(SchemaError::MissingIndexConfiguration {
1252                        key: key.to_string(),
1253                        value_type: "int".to_string(),
1254                    }),
1255                },
1256                None => match &self.defaults.int {
1257                    Some(int_type) => match &int_type.int_inverted_index {
1258                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1259                        None => Err(SchemaError::MissingIndexConfiguration {
1260                            key: key.to_string(),
1261                            value_type: "int".to_string(),
1262                        }),
1263                    },
1264                    None => Err(SchemaError::MissingIndexConfiguration {
1265                        key: key.to_string(),
1266                        value_type: "int".to_string(),
1267                    }),
1268                },
1269            },
1270            MetadataValueType::Float => match &v_type.float {
1271                Some(float_type) => match &float_type.float_inverted_index {
1272                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1273                    None => Err(SchemaError::MissingIndexConfiguration {
1274                        key: key.to_string(),
1275                        value_type: "float".to_string(),
1276                    }),
1277                },
1278                None => match &self.defaults.float {
1279                    Some(float_type) => match &float_type.float_inverted_index {
1280                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1281                        None => Err(SchemaError::MissingIndexConfiguration {
1282                            key: key.to_string(),
1283                            value_type: "float".to_string(),
1284                        }),
1285                    },
1286                    None => Err(SchemaError::MissingIndexConfiguration {
1287                        key: key.to_string(),
1288                        value_type: "float".to_string(),
1289                    }),
1290                },
1291            },
1292            MetadataValueType::Str => match &v_type.string {
1293                Some(string_type) => match &string_type.string_inverted_index {
1294                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1295                    None => Err(SchemaError::MissingIndexConfiguration {
1296                        key: key.to_string(),
1297                        value_type: "string".to_string(),
1298                    }),
1299                },
1300                None => match &self.defaults.string {
1301                    Some(string_type) => match &string_type.string_inverted_index {
1302                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1303                        None => Err(SchemaError::MissingIndexConfiguration {
1304                            key: key.to_string(),
1305                            value_type: "string".to_string(),
1306                        }),
1307                    },
1308                    None => Err(SchemaError::MissingIndexConfiguration {
1309                        key: key.to_string(),
1310                        value_type: "string".to_string(),
1311                    }),
1312                },
1313            },
1314            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1315                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1316                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1317                    None => Err(SchemaError::MissingIndexConfiguration {
1318                        key: key.to_string(),
1319                        value_type: "sparse_vector".to_string(),
1320                    }),
1321                },
1322                None => match &self.defaults.sparse_vector {
1323                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1324                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1325                        None => Err(SchemaError::MissingIndexConfiguration {
1326                            key: key.to_string(),
1327                            value_type: "sparse_vector".to_string(),
1328                        }),
1329                    },
1330                    None => Err(SchemaError::MissingIndexConfiguration {
1331                        key: key.to_string(),
1332                        value_type: "sparse_vector".to_string(),
1333                    }),
1334                },
1335            },
1336        }
1337    }
1338
1339    pub fn is_metadata_where_indexing_enabled(
1340        &self,
1341        where_clause: &Where,
1342    ) -> Result<(), FilterValidationError> {
1343        match where_clause {
1344            Where::Composite(composite) => {
1345                for child in &composite.children {
1346                    self.is_metadata_where_indexing_enabled(child)?;
1347                }
1348                Ok(())
1349            }
1350            Where::Document(_) => Ok(()),
1351            Where::Metadata(expression) => {
1352                let value_type = match &expression.comparison {
1353                    MetadataComparison::Primitive(_, value) => value.value_type(),
1354                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1355                };
1356                let is_enabled = self
1357                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1358                    .map_err(FilterValidationError::Schema)?;
1359                if !is_enabled {
1360                    return Err(FilterValidationError::IndexingDisabled {
1361                        key: expression.key.clone(),
1362                        value_type,
1363                    });
1364                }
1365                Ok(())
1366            }
1367        }
1368    }
1369
1370    pub fn is_knn_key_indexing_enabled(
1371        &self,
1372        key: &str,
1373        query: &QueryVector,
1374    ) -> Result<(), FilterValidationError> {
1375        match query {
1376            QueryVector::Sparse(_) => {
1377                let is_enabled = self
1378                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1379                    .map_err(FilterValidationError::Schema)?;
1380                if !is_enabled {
1381                    return Err(FilterValidationError::IndexingDisabled {
1382                        key: key.to_string(),
1383                        value_type: MetadataValueType::SparseVector,
1384                    });
1385                }
1386                Ok(())
1387            }
1388            QueryVector::Dense(_) => {
1389                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1390                // Dense vectors are always indexed
1391                Ok(())
1392            }
1393        }
1394    }
1395
1396    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1397        let value_types = self.keys.entry(key.to_string()).or_default();
1398        match value_type {
1399            MetadataValueType::Bool => {
1400                if value_types.boolean.is_none() {
1401                    value_types.boolean = self.defaults.boolean.clone();
1402                    return true;
1403                }
1404            }
1405            MetadataValueType::Int => {
1406                if value_types.int.is_none() {
1407                    value_types.int = self.defaults.int.clone();
1408                    return true;
1409                }
1410            }
1411            MetadataValueType::Float => {
1412                if value_types.float.is_none() {
1413                    value_types.float = self.defaults.float.clone();
1414                    return true;
1415                }
1416            }
1417            MetadataValueType::Str => {
1418                if value_types.string.is_none() {
1419                    value_types.string = self.defaults.string.clone();
1420                    return true;
1421                }
1422            }
1423            MetadataValueType::SparseVector => {
1424                if value_types.sparse_vector.is_none() {
1425                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
1426                    return true;
1427                }
1428            }
1429        }
1430        false
1431    }
1432}
1433
1434// ============================================================================
1435// INDEX CONFIGURATION STRUCTURES
1436// ============================================================================
1437
1438#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1439#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1440#[serde(deny_unknown_fields)]
1441pub struct VectorIndexConfig {
1442    /// Vector space for similarity calculation (cosine, l2, ip)
1443    #[serde(skip_serializing_if = "Option::is_none")]
1444    pub space: Option<Space>,
1445    /// Embedding function configuration
1446    #[serde(skip_serializing_if = "Option::is_none")]
1447    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1448    /// Key to source the vector from
1449    #[serde(skip_serializing_if = "Option::is_none")]
1450    pub source_key: Option<String>,
1451    /// HNSW algorithm configuration
1452    #[serde(skip_serializing_if = "Option::is_none")]
1453    pub hnsw: Option<HnswIndexConfig>,
1454    /// SPANN algorithm configuration
1455    #[serde(skip_serializing_if = "Option::is_none")]
1456    pub spann: Option<SpannIndexConfig>,
1457}
1458
1459/// Configuration for HNSW vector index algorithm parameters
1460#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1461#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1462#[serde(deny_unknown_fields)]
1463pub struct HnswIndexConfig {
1464    #[serde(skip_serializing_if = "Option::is_none")]
1465    pub ef_construction: Option<usize>,
1466    #[serde(skip_serializing_if = "Option::is_none")]
1467    pub max_neighbors: Option<usize>,
1468    #[serde(skip_serializing_if = "Option::is_none")]
1469    pub ef_search: Option<usize>,
1470    #[serde(skip_serializing_if = "Option::is_none")]
1471    pub num_threads: Option<usize>,
1472    #[serde(skip_serializing_if = "Option::is_none")]
1473    #[validate(range(min = 2))]
1474    pub batch_size: Option<usize>,
1475    #[serde(skip_serializing_if = "Option::is_none")]
1476    #[validate(range(min = 2))]
1477    pub sync_threshold: Option<usize>,
1478    #[serde(skip_serializing_if = "Option::is_none")]
1479    pub resize_factor: Option<f64>,
1480}
1481
1482/// Configuration for SPANN vector index algorithm parameters
1483#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1484#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1485#[serde(deny_unknown_fields)]
1486pub struct SpannIndexConfig {
1487    #[serde(skip_serializing_if = "Option::is_none")]
1488    #[validate(range(max = 128))]
1489    pub search_nprobe: Option<u32>,
1490    #[serde(skip_serializing_if = "Option::is_none")]
1491    #[validate(range(min = 1.0, max = 1.0))]
1492    pub search_rng_factor: Option<f32>,
1493    #[serde(skip_serializing_if = "Option::is_none")]
1494    #[validate(range(min = 5.0, max = 10.0))]
1495    pub search_rng_epsilon: Option<f32>,
1496    #[serde(skip_serializing_if = "Option::is_none")]
1497    #[validate(range(max = 8))]
1498    pub nreplica_count: Option<u32>,
1499    #[serde(skip_serializing_if = "Option::is_none")]
1500    #[validate(range(min = 1.0, max = 1.0))]
1501    pub write_rng_factor: Option<f32>,
1502    #[serde(skip_serializing_if = "Option::is_none")]
1503    #[validate(range(min = 5.0, max = 10.0))]
1504    pub write_rng_epsilon: Option<f32>,
1505    #[serde(skip_serializing_if = "Option::is_none")]
1506    #[validate(range(min = 50, max = 200))]
1507    pub split_threshold: Option<u32>,
1508    #[serde(skip_serializing_if = "Option::is_none")]
1509    #[validate(range(max = 1000))]
1510    pub num_samples_kmeans: Option<usize>,
1511    #[serde(skip_serializing_if = "Option::is_none")]
1512    #[validate(range(min = 100.0, max = 100.0))]
1513    pub initial_lambda: Option<f32>,
1514    #[serde(skip_serializing_if = "Option::is_none")]
1515    #[validate(range(max = 64))]
1516    pub reassign_neighbor_count: Option<u32>,
1517    #[serde(skip_serializing_if = "Option::is_none")]
1518    #[validate(range(min = 25, max = 100))]
1519    pub merge_threshold: Option<u32>,
1520    #[serde(skip_serializing_if = "Option::is_none")]
1521    #[validate(range(max = 8))]
1522    pub num_centers_to_merge_to: Option<u32>,
1523    #[serde(skip_serializing_if = "Option::is_none")]
1524    #[validate(range(max = 64))]
1525    pub write_nprobe: Option<u32>,
1526    #[serde(skip_serializing_if = "Option::is_none")]
1527    #[validate(range(max = 200))]
1528    pub ef_construction: Option<usize>,
1529    #[serde(skip_serializing_if = "Option::is_none")]
1530    #[validate(range(max = 200))]
1531    pub ef_search: Option<usize>,
1532    #[serde(skip_serializing_if = "Option::is_none")]
1533    #[validate(range(max = 64))]
1534    pub max_neighbors: Option<usize>,
1535}
1536
1537#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1538#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1539#[serde(deny_unknown_fields)]
1540pub struct SparseVectorIndexConfig {
1541    /// Embedding function configuration
1542    #[serde(skip_serializing_if = "Option::is_none")]
1543    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1544    /// Key to source the sparse vector from
1545    #[serde(skip_serializing_if = "Option::is_none")]
1546    pub source_key: Option<String>,
1547    /// Whether this embedding is BM25
1548    #[serde(skip_serializing_if = "Option::is_none")]
1549    pub bm25: Option<bool>,
1550}
1551
1552#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1553#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1554#[serde(deny_unknown_fields)]
1555pub struct FtsIndexConfig {
1556    // FTS index typically has no additional parameters
1557}
1558
1559#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1560#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1561#[serde(deny_unknown_fields)]
1562pub struct StringInvertedIndexConfig {
1563    // String inverted index typically has no additional parameters
1564}
1565
1566#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1567#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1568#[serde(deny_unknown_fields)]
1569pub struct IntInvertedIndexConfig {
1570    // Integer inverted index typically has no additional parameters
1571}
1572
1573#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1574#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1575#[serde(deny_unknown_fields)]
1576pub struct FloatInvertedIndexConfig {
1577    // Float inverted index typically has no additional parameters
1578}
1579
1580#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1581#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1582#[serde(deny_unknown_fields)]
1583pub struct BoolInvertedIndexConfig {
1584    // Boolean inverted index typically has no additional parameters
1585}
1586
1587#[cfg(test)]
1588mod tests {
1589    use super::*;
1590    use crate::hnsw_configuration::Space;
1591    use crate::metadata::SparseVector;
1592    use crate::{
1593        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
1594    };
1595    use serde_json::json;
1596
1597    #[test]
1598    fn test_reconcile_with_defaults_none_user_schema() {
1599        // Test that when no user schema is provided, we get the default schema
1600        let result = Schema::reconcile_with_defaults(None).unwrap();
1601        let expected = Schema::new_default(KnnIndex::Spann);
1602        assert_eq!(result, expected);
1603    }
1604
1605    #[test]
1606    fn test_reconcile_with_defaults_empty_user_schema() {
1607        // Test merging with an empty user schema
1608        let user_schema = Schema {
1609            defaults: ValueTypes::default(),
1610            keys: HashMap::new(),
1611        };
1612
1613        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1614        let expected = Schema::new_default(KnnIndex::Spann);
1615        assert_eq!(result, expected);
1616    }
1617
1618    #[test]
1619    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
1620        // Test that user can override string inverted index enabled state
1621        let mut user_schema = Schema {
1622            defaults: ValueTypes::default(),
1623            keys: HashMap::new(),
1624        };
1625
1626        user_schema.defaults.string = Some(StringValueType {
1627            string_inverted_index: Some(StringInvertedIndexType {
1628                enabled: false, // Override default (true) to false
1629                config: StringInvertedIndexConfig {},
1630            }),
1631            fts_index: None,
1632        });
1633
1634        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1635
1636        // Check that the user override took precedence
1637        assert!(
1638            !result
1639                .defaults
1640                .string
1641                .as_ref()
1642                .unwrap()
1643                .string_inverted_index
1644                .as_ref()
1645                .unwrap()
1646                .enabled
1647        );
1648        // Check that other defaults are still present
1649        assert!(result.defaults.float.is_some());
1650        assert!(result.defaults.int.is_some());
1651    }
1652
1653    #[test]
1654    fn test_reconcile_with_defaults_user_overrides_vector_config() {
1655        // Test field-level merging for vector configurations
1656        let mut user_schema = Schema {
1657            defaults: ValueTypes::default(),
1658            keys: HashMap::new(),
1659        };
1660
1661        user_schema.defaults.float_list = Some(FloatListValueType {
1662            vector_index: Some(VectorIndexType {
1663                enabled: true, // Enable vector index (default is false)
1664                config: VectorIndexConfig {
1665                    space: Some(Space::L2),                     // Override default space
1666                    embedding_function: None,                   // Will use default
1667                    source_key: Some("custom_key".to_string()), // Override default
1668                    hnsw: Some(HnswIndexConfig {
1669                        ef_construction: Some(500), // Override default
1670                        max_neighbors: None,        // Will use default
1671                        ef_search: None,            // Will use default
1672                        num_threads: None,
1673                        batch_size: None,
1674                        sync_threshold: None,
1675                        resize_factor: None,
1676                    }),
1677                    spann: None,
1678                },
1679            }),
1680        });
1681
1682        // Use HNSW defaults for this test so we have HNSW config to merge with
1683        let result = {
1684            let default_schema = Schema::new_default(KnnIndex::Hnsw);
1685            let merged_defaults =
1686                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
1687            let mut merged_keys = default_schema.keys.clone();
1688            for (key, user_value_types) in user_schema.keys {
1689                if let Some(default_value_types) = merged_keys.get(&key) {
1690                    let merged_value_types =
1691                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
1692                    merged_keys.insert(key, merged_value_types);
1693                } else {
1694                    merged_keys.insert(key, user_value_types);
1695                }
1696            }
1697            Schema {
1698                defaults: merged_defaults,
1699                keys: merged_keys,
1700            }
1701        };
1702
1703        let vector_config = &result
1704            .defaults
1705            .float_list
1706            .as_ref()
1707            .unwrap()
1708            .vector_index
1709            .as_ref()
1710            .unwrap()
1711            .config;
1712
1713        // Check user overrides took precedence
1714        assert_eq!(vector_config.space, Some(Space::L2));
1715        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
1716        assert_eq!(
1717            vector_config.hnsw.as_ref().unwrap().ef_construction,
1718            Some(500)
1719        );
1720
1721        // Check defaults were preserved for unspecified fields
1722        assert_eq!(
1723            vector_config.embedding_function,
1724            Some(EmbeddingFunctionConfiguration::Legacy)
1725        );
1726        // Since user provided HNSW config, the default max_neighbors should be merged in
1727        assert_eq!(
1728            vector_config.hnsw.as_ref().unwrap().max_neighbors,
1729            Some(default_m())
1730        );
1731    }
1732
1733    #[test]
1734    fn test_reconcile_with_defaults_keys() {
1735        // Test that key overrides are properly merged
1736        let mut user_schema = Schema {
1737            defaults: ValueTypes::default(),
1738            keys: HashMap::new(),
1739        };
1740
1741        // Add a custom key override
1742        let custom_key_types = ValueTypes {
1743            string: Some(StringValueType {
1744                fts_index: Some(FtsIndexType {
1745                    enabled: true,
1746                    config: FtsIndexConfig {},
1747                }),
1748                string_inverted_index: Some(StringInvertedIndexType {
1749                    enabled: false,
1750                    config: StringInvertedIndexConfig {},
1751                }),
1752            }),
1753            ..Default::default()
1754        };
1755        user_schema
1756            .keys
1757            .insert("custom_key".to_string(), custom_key_types);
1758
1759        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1760
1761        // Check that default key overrides are preserved
1762        assert!(result.keys.contains_key(EMBEDDING_KEY));
1763        assert!(result.keys.contains_key(DOCUMENT_KEY));
1764
1765        // Check that user key override was added
1766        assert!(result.keys.contains_key("custom_key"));
1767        let custom_override = result.keys.get("custom_key").unwrap();
1768        assert!(
1769            custom_override
1770                .string
1771                .as_ref()
1772                .unwrap()
1773                .fts_index
1774                .as_ref()
1775                .unwrap()
1776                .enabled
1777        );
1778    }
1779
1780    #[test]
1781    fn test_reconcile_with_defaults_override_existing_key() {
1782        // Test overriding an existing key override (like #embedding)
1783        let mut user_schema = Schema {
1784            defaults: ValueTypes::default(),
1785            keys: HashMap::new(),
1786        };
1787
1788        // Override the #embedding key with custom settings
1789        let embedding_override = ValueTypes {
1790            float_list: Some(FloatListValueType {
1791                vector_index: Some(VectorIndexType {
1792                    enabled: false, // Override default enabled=true to false
1793                    config: VectorIndexConfig {
1794                        space: Some(Space::Ip), // Override default space
1795                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1796                        source_key: Some("custom_embedding_key".to_string()),
1797                        hnsw: None,
1798                        spann: None,
1799                    },
1800                }),
1801            }),
1802            ..Default::default()
1803        };
1804        user_schema
1805            .keys
1806            .insert(EMBEDDING_KEY.to_string(), embedding_override);
1807
1808        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1809
1810        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
1811        let vector_config = &embedding_config
1812            .float_list
1813            .as_ref()
1814            .unwrap()
1815            .vector_index
1816            .as_ref()
1817            .unwrap();
1818
1819        // Check user overrides took precedence
1820        assert!(!vector_config.enabled);
1821        assert_eq!(vector_config.config.space, Some(Space::Ip));
1822        assert_eq!(
1823            vector_config.config.source_key,
1824            Some("custom_embedding_key".to_string())
1825        );
1826    }
1827
1828    #[test]
1829    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
1830        let collection_config = InternalCollectionConfiguration {
1831            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
1832                space: Space::Cosine,
1833                ef_construction: 128,
1834                ef_search: 96,
1835                max_neighbors: 42,
1836                num_threads: 8,
1837                resize_factor: 1.5,
1838                sync_threshold: 2_000,
1839                batch_size: 256,
1840            }),
1841            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1842                EmbeddingFunctionNewConfiguration {
1843                    name: "custom".to_string(),
1844                    config: json!({"alpha": 1}),
1845                },
1846            )),
1847        };
1848
1849        let schema =
1850            Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1851        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1852
1853        assert_eq!(reconstructed, collection_config);
1854    }
1855
1856    #[test]
1857    fn test_convert_schema_to_collection_config_spann_roundtrip() {
1858        let spann_config = InternalSpannConfiguration {
1859            space: Space::Cosine,
1860            search_nprobe: 11,
1861            search_rng_factor: 1.7,
1862            write_nprobe: 5,
1863            nreplica_count: 3,
1864            split_threshold: 150,
1865            merge_threshold: 80,
1866            ef_construction: 120,
1867            ef_search: 90,
1868            max_neighbors: 40,
1869            ..Default::default()
1870        };
1871
1872        let collection_config = InternalCollectionConfiguration {
1873            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
1874            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1875                EmbeddingFunctionNewConfiguration {
1876                    name: "custom".to_string(),
1877                    config: json!({"beta": true}),
1878                },
1879            )),
1880        };
1881
1882        let schema =
1883            Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1884        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1885
1886        assert_eq!(reconstructed, collection_config);
1887    }
1888
1889    #[test]
1890    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
1891        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1892        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
1893            if let Some(float_list) = &mut embedding.float_list {
1894                if let Some(vector_index) = &mut float_list.vector_index {
1895                    vector_index.config.spann = Some(SpannIndexConfig {
1896                        search_nprobe: Some(1),
1897                        search_rng_factor: Some(1.0),
1898                        search_rng_epsilon: Some(0.1),
1899                        nreplica_count: Some(1),
1900                        write_rng_factor: Some(1.0),
1901                        write_rng_epsilon: Some(0.1),
1902                        split_threshold: Some(100),
1903                        num_samples_kmeans: Some(10),
1904                        initial_lambda: Some(0.5),
1905                        reassign_neighbor_count: Some(10),
1906                        merge_threshold: Some(50),
1907                        num_centers_to_merge_to: Some(3),
1908                        write_nprobe: Some(1),
1909                        ef_construction: Some(50),
1910                        ef_search: Some(40),
1911                        max_neighbors: Some(20),
1912                    });
1913                }
1914            }
1915        }
1916
1917        let result = InternalCollectionConfiguration::try_from(&schema);
1918        assert!(result.is_err());
1919    }
1920
1921    #[test]
1922    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
1923        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1924        let before = schema.clone();
1925        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
1926        assert!(!modified);
1927        assert_eq!(schema, before);
1928    }
1929
1930    #[test]
1931    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
1932        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1933        assert!(!schema.keys.contains_key("custom_field"));
1934
1935        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1936
1937        assert!(modified);
1938        let entry = schema
1939            .keys
1940            .get("custom_field")
1941            .expect("expected new key override to be inserted");
1942        assert_eq!(entry.boolean, schema.defaults.boolean);
1943        assert!(entry.string.is_none());
1944        assert!(entry.int.is_none());
1945        assert!(entry.float.is_none());
1946        assert!(entry.float_list.is_none());
1947        assert!(entry.sparse_vector.is_none());
1948    }
1949
1950    #[test]
1951    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
1952        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1953        let initial_len = schema.keys.len();
1954        schema.keys.insert(
1955            "custom_field".to_string(),
1956            ValueTypes {
1957                string: schema.defaults.string.clone(),
1958                ..Default::default()
1959            },
1960        );
1961
1962        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1963
1964        assert!(modified);
1965        assert_eq!(schema.keys.len(), initial_len + 1);
1966        let entry = schema
1967            .keys
1968            .get("custom_field")
1969            .expect("expected key override to exist after ensure call");
1970        assert!(entry.string.is_some());
1971        assert_eq!(entry.boolean, schema.defaults.boolean);
1972    }
1973
1974    #[test]
1975    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
1976        let schema = Schema::new_default(KnnIndex::Spann);
1977        let result = schema.is_knn_key_indexing_enabled(
1978            "custom_sparse",
1979            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1980        );
1981
1982        let err = result.expect_err("expected indexing disabled error");
1983        match err {
1984            FilterValidationError::IndexingDisabled { key, value_type } => {
1985                assert_eq!(key, "custom_sparse");
1986                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
1987            }
1988            other => panic!("unexpected error variant: {other:?}"),
1989        }
1990    }
1991
1992    #[test]
1993    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
1994        let mut schema = Schema::new_default(KnnIndex::Spann);
1995        schema.keys.insert(
1996            "sparse_enabled".to_string(),
1997            ValueTypes {
1998                sparse_vector: Some(SparseVectorValueType {
1999                    sparse_vector_index: Some(SparseVectorIndexType {
2000                        enabled: true,
2001                        config: SparseVectorIndexConfig {
2002                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2003                            source_key: None,
2004                            bm25: None,
2005                        },
2006                    }),
2007                }),
2008                ..Default::default()
2009            },
2010        );
2011
2012        let result = schema.is_knn_key_indexing_enabled(
2013            "sparse_enabled",
2014            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2015        );
2016
2017        assert!(result.is_ok());
2018    }
2019
2020    #[test]
2021    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2022        let schema = Schema::new_default(KnnIndex::Spann);
2023        let result = schema.is_knn_key_indexing_enabled(
2024            EMBEDDING_KEY,
2025            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2026        );
2027
2028        assert!(result.is_ok());
2029    }
2030
2031    #[test]
2032    fn test_merge_hnsw_configs_field_level() {
2033        // Test field-level merging for HNSW configurations
2034        let default_hnsw = HnswIndexConfig {
2035            ef_construction: Some(200),
2036            max_neighbors: Some(16),
2037            ef_search: Some(10),
2038            num_threads: Some(4),
2039            batch_size: Some(100),
2040            sync_threshold: Some(1000),
2041            resize_factor: Some(1.2),
2042        };
2043
2044        let user_hnsw = HnswIndexConfig {
2045            ef_construction: Some(300), // Override
2046            max_neighbors: None,        // Will use default
2047            ef_search: Some(20),        // Override
2048            num_threads: None,          // Will use default
2049            batch_size: None,           // Will use default
2050            sync_threshold: Some(2000), // Override
2051            resize_factor: None,        // Will use default
2052        };
2053
2054        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2055
2056        // Check user overrides
2057        assert_eq!(result.ef_construction, Some(300));
2058        assert_eq!(result.ef_search, Some(20));
2059        assert_eq!(result.sync_threshold, Some(2000));
2060
2061        // Check defaults preserved
2062        assert_eq!(result.max_neighbors, Some(16));
2063        assert_eq!(result.num_threads, Some(4));
2064        assert_eq!(result.batch_size, Some(100));
2065        assert_eq!(result.resize_factor, Some(1.2));
2066    }
2067
2068    #[test]
2069    fn test_merge_spann_configs_field_level() {
2070        // Test field-level merging for SPANN configurations
2071        let default_spann = SpannIndexConfig {
2072            search_nprobe: Some(10),
2073            search_rng_factor: Some(1.0),  // Must be exactly 1.0
2074            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
2075            nreplica_count: Some(3),
2076            write_rng_factor: Some(1.0),  // Must be exactly 1.0
2077            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
2078            split_threshold: Some(100),   // Must be 50-200
2079            num_samples_kmeans: Some(100),
2080            initial_lambda: Some(100.0), // Must be exactly 100.0
2081            reassign_neighbor_count: Some(50),
2082            merge_threshold: Some(50),        // Must be 25-100
2083            num_centers_to_merge_to: Some(4), // Max is 8
2084            write_nprobe: Some(5),
2085            ef_construction: Some(100),
2086            ef_search: Some(10),
2087            max_neighbors: Some(16),
2088        };
2089
2090        let user_spann = SpannIndexConfig {
2091            search_nprobe: Some(20),       // Override
2092            search_rng_factor: None,       // Will use default
2093            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
2094            nreplica_count: None,          // Will use default
2095            write_rng_factor: None,
2096            write_rng_epsilon: None,
2097            split_threshold: Some(150), // Override (valid: 50-200)
2098            num_samples_kmeans: None,
2099            initial_lambda: None,
2100            reassign_neighbor_count: None,
2101            merge_threshold: None,
2102            num_centers_to_merge_to: None,
2103            write_nprobe: None,
2104            ef_construction: None,
2105            ef_search: None,
2106            max_neighbors: None,
2107        };
2108
2109        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
2110
2111        // Check user overrides
2112        assert_eq!(result.search_nprobe, Some(20));
2113        assert_eq!(result.search_rng_epsilon, Some(8.0));
2114        assert_eq!(result.split_threshold, Some(150));
2115
2116        // Check defaults preserved
2117        assert_eq!(result.search_rng_factor, Some(1.0));
2118        assert_eq!(result.nreplica_count, Some(3));
2119        assert_eq!(result.initial_lambda, Some(100.0));
2120    }
2121
2122    #[test]
2123    fn test_spann_index_config_into_internal_configuration() {
2124        let config = SpannIndexConfig {
2125            search_nprobe: Some(33),
2126            search_rng_factor: Some(1.2),
2127            search_rng_epsilon: None,
2128            nreplica_count: None,
2129            write_rng_factor: Some(1.5),
2130            write_rng_epsilon: None,
2131            split_threshold: Some(75),
2132            num_samples_kmeans: None,
2133            initial_lambda: Some(0.9),
2134            reassign_neighbor_count: Some(40),
2135            merge_threshold: None,
2136            num_centers_to_merge_to: Some(4),
2137            write_nprobe: Some(60),
2138            ef_construction: Some(180),
2139            ef_search: Some(170),
2140            max_neighbors: Some(32),
2141        };
2142
2143        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
2144        assert_eq!(with_space.space, Space::Cosine);
2145        assert_eq!(with_space.search_nprobe, 33);
2146        assert_eq!(with_space.search_rng_factor, 1.2);
2147        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2148        assert_eq!(with_space.write_rng_factor, 1.5);
2149        assert_eq!(with_space.write_nprobe, 60);
2150        assert_eq!(with_space.ef_construction, 180);
2151        assert_eq!(with_space.ef_search, 170);
2152        assert_eq!(with_space.max_neighbors, 32);
2153        assert_eq!(with_space.merge_threshold, default_merge_threshold());
2154
2155        let default_space_config: InternalSpannConfiguration = (None, &config).into();
2156        assert_eq!(default_space_config.space, default_space());
2157    }
2158
2159    #[test]
2160    fn test_merge_string_type_combinations() {
2161        // Test all combinations of default and user StringValueType
2162
2163        // Both Some - should merge
2164        let default = StringValueType {
2165            string_inverted_index: Some(StringInvertedIndexType {
2166                enabled: true,
2167                config: StringInvertedIndexConfig {},
2168            }),
2169            fts_index: Some(FtsIndexType {
2170                enabled: false,
2171                config: FtsIndexConfig {},
2172            }),
2173        };
2174
2175        let user = StringValueType {
2176            string_inverted_index: Some(StringInvertedIndexType {
2177                enabled: false, // Override
2178                config: StringInvertedIndexConfig {},
2179            }),
2180            fts_index: None, // Will use default
2181        };
2182
2183        let result = Schema::merge_string_type(Some(&default), Some(&user))
2184            .unwrap()
2185            .unwrap();
2186        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
2187        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
2188
2189        // Default Some, User None - should return default
2190        let result = Schema::merge_string_type(Some(&default), None)
2191            .unwrap()
2192            .unwrap();
2193        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2194
2195        // Default None, User Some - should return user
2196        let result = Schema::merge_string_type(None, Some(&user))
2197            .unwrap()
2198            .unwrap();
2199        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2200
2201        // Both None - should return None
2202        let result = Schema::merge_string_type(None, None).unwrap();
2203        assert!(result.is_none());
2204    }
2205
2206    #[test]
2207    fn test_merge_vector_index_config_comprehensive() {
2208        // Test comprehensive vector index config merging
2209        let default_config = VectorIndexConfig {
2210            space: Some(Space::Cosine),
2211            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2212            source_key: Some("default_key".to_string()),
2213            hnsw: Some(HnswIndexConfig {
2214                ef_construction: Some(200),
2215                max_neighbors: Some(16),
2216                ef_search: Some(10),
2217                num_threads: Some(4),
2218                batch_size: Some(100),
2219                sync_threshold: Some(1000),
2220                resize_factor: Some(1.2),
2221            }),
2222            spann: None,
2223        };
2224
2225        let user_config = VectorIndexConfig {
2226            space: Some(Space::L2),                   // Override
2227            embedding_function: None,                 // Will use default
2228            source_key: Some("user_key".to_string()), // Override
2229            hnsw: Some(HnswIndexConfig {
2230                ef_construction: Some(300), // Override
2231                max_neighbors: None,        // Will use default
2232                ef_search: None,            // Will use default
2233                num_threads: None,
2234                batch_size: None,
2235                sync_threshold: None,
2236                resize_factor: None,
2237            }),
2238            spann: Some(SpannIndexConfig {
2239                search_nprobe: Some(15),
2240                search_rng_factor: None,
2241                search_rng_epsilon: None,
2242                nreplica_count: None,
2243                write_rng_factor: None,
2244                write_rng_epsilon: None,
2245                split_threshold: None,
2246                num_samples_kmeans: None,
2247                initial_lambda: None,
2248                reassign_neighbor_count: None,
2249                merge_threshold: None,
2250                num_centers_to_merge_to: None,
2251                write_nprobe: None,
2252                ef_construction: None,
2253                ef_search: None,
2254                max_neighbors: None,
2255            }), // Add SPANN config
2256        };
2257
2258        let result = Schema::merge_vector_index_config(&default_config, &user_config);
2259
2260        // Check field-level merging
2261        assert_eq!(result.space, Some(Space::L2)); // User override
2262        assert_eq!(
2263            result.embedding_function,
2264            Some(EmbeddingFunctionConfiguration::Legacy)
2265        ); // Default preserved
2266        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
2267
2268        // Check HNSW merging
2269        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
2270        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
2271
2272        // Check SPANN was added from user
2273        assert!(result.spann.is_some());
2274        assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2275    }
2276
2277    #[test]
2278    fn test_merge_sparse_vector_index_config() {
2279        // Test sparse vector index config merging
2280        let default_config = SparseVectorIndexConfig {
2281            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2282            source_key: Some("default_sparse_key".to_string()),
2283            bm25: None,
2284        };
2285
2286        let user_config = SparseVectorIndexConfig {
2287            embedding_function: None,                        // Will use default
2288            source_key: Some("user_sparse_key".to_string()), // Override
2289            bm25: None,
2290        };
2291
2292        let result =
2293            Schema::merge_sparse_vector_index_config(&default_config, &user_config).unwrap();
2294
2295        // Check user override
2296        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2297        // Check default preserved
2298        assert_eq!(
2299            result.embedding_function,
2300            Some(EmbeddingFunctionConfiguration::Legacy)
2301        );
2302    }
2303
2304    #[test]
2305    fn test_complex_nested_merging_scenario() {
2306        // Test a complex scenario with multiple levels of merging
2307        let mut user_schema = Schema {
2308            defaults: ValueTypes::default(),
2309            keys: HashMap::new(),
2310        };
2311
2312        // Set up complex user defaults
2313        user_schema.defaults.string = Some(StringValueType {
2314            string_inverted_index: Some(StringInvertedIndexType {
2315                enabled: false,
2316                config: StringInvertedIndexConfig {},
2317            }),
2318            fts_index: Some(FtsIndexType {
2319                enabled: true,
2320                config: FtsIndexConfig {},
2321            }),
2322        });
2323
2324        user_schema.defaults.float_list = Some(FloatListValueType {
2325            vector_index: Some(VectorIndexType {
2326                enabled: true,
2327                config: VectorIndexConfig {
2328                    space: Some(Space::Ip),
2329                    embedding_function: None, // Will use default
2330                    source_key: Some("custom_vector_key".to_string()),
2331                    hnsw: Some(HnswIndexConfig {
2332                        ef_construction: Some(400),
2333                        max_neighbors: Some(32),
2334                        ef_search: None, // Will use default
2335                        num_threads: None,
2336                        batch_size: None,
2337                        sync_threshold: None,
2338                        resize_factor: None,
2339                    }),
2340                    spann: None,
2341                },
2342            }),
2343        });
2344
2345        // Set up key overrides
2346        let custom_key_override = ValueTypes {
2347            string: Some(StringValueType {
2348                fts_index: Some(FtsIndexType {
2349                    enabled: true,
2350                    config: FtsIndexConfig {},
2351                }),
2352                string_inverted_index: None,
2353            }),
2354            ..Default::default()
2355        };
2356        user_schema
2357            .keys
2358            .insert("custom_field".to_string(), custom_key_override);
2359
2360        // Use HNSW defaults for this test so we have HNSW config to merge with
2361        let result = {
2362            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2363            let merged_defaults =
2364                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2365            let mut merged_keys = default_schema.keys.clone();
2366            for (key, user_value_types) in user_schema.keys {
2367                if let Some(default_value_types) = merged_keys.get(&key) {
2368                    let merged_value_types =
2369                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2370                    merged_keys.insert(key, merged_value_types);
2371                } else {
2372                    merged_keys.insert(key, user_value_types);
2373                }
2374            }
2375            Schema {
2376                defaults: merged_defaults,
2377                keys: merged_keys,
2378            }
2379        };
2380
2381        // Verify complex merging worked correctly
2382
2383        // Check defaults merging
2384        assert!(
2385            !result
2386                .defaults
2387                .string
2388                .as_ref()
2389                .unwrap()
2390                .string_inverted_index
2391                .as_ref()
2392                .unwrap()
2393                .enabled
2394        );
2395        assert!(
2396            result
2397                .defaults
2398                .string
2399                .as_ref()
2400                .unwrap()
2401                .fts_index
2402                .as_ref()
2403                .unwrap()
2404                .enabled
2405        );
2406
2407        let vector_config = &result
2408            .defaults
2409            .float_list
2410            .as_ref()
2411            .unwrap()
2412            .vector_index
2413            .as_ref()
2414            .unwrap()
2415            .config;
2416        assert_eq!(vector_config.space, Some(Space::Ip));
2417        assert_eq!(
2418            vector_config.embedding_function,
2419            Some(EmbeddingFunctionConfiguration::Legacy)
2420        ); // Default preserved
2421        assert_eq!(
2422            vector_config.source_key,
2423            Some("custom_vector_key".to_string())
2424        );
2425        assert_eq!(
2426            vector_config.hnsw.as_ref().unwrap().ef_construction,
2427            Some(400)
2428        );
2429        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2430        assert_eq!(
2431            vector_config.hnsw.as_ref().unwrap().ef_search,
2432            Some(default_search_ef())
2433        ); // Default preserved
2434
2435        // Check key overrides
2436        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
2437        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
2438        assert!(result.keys.contains_key("custom_field")); // User added
2439
2440        let custom_override = result.keys.get("custom_field").unwrap();
2441        assert!(
2442            custom_override
2443                .string
2444                .as_ref()
2445                .unwrap()
2446                .fts_index
2447                .as_ref()
2448                .unwrap()
2449                .enabled
2450        );
2451        assert!(custom_override
2452            .string
2453            .as_ref()
2454            .unwrap()
2455            .string_inverted_index
2456            .is_none());
2457    }
2458
2459    #[test]
2460    fn test_reconcile_with_collection_config_default_config() {
2461        // Test that when collection config is default, schema is returned as-is
2462        let schema = Schema::new_default(KnnIndex::Hnsw);
2463        let collection_config = InternalCollectionConfiguration::default_hnsw();
2464
2465        let result =
2466            Schema::reconcile_with_collection_config(schema.clone(), collection_config).unwrap();
2467        assert_eq!(result, schema);
2468    }
2469
2470    #[test]
2471    fn test_reconcile_with_collection_config_both_non_default() {
2472        // Test that when both schema and collection config are non-default, it returns an error
2473        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2474        schema.defaults.string = Some(StringValueType {
2475            fts_index: Some(FtsIndexType {
2476                enabled: true,
2477                config: FtsIndexConfig {},
2478            }),
2479            string_inverted_index: None,
2480        });
2481
2482        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
2483        // Make collection config non-default by changing a parameter
2484        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
2485        {
2486            hnsw_config.ef_construction = 500; // Non-default value
2487        }
2488
2489        let result = Schema::reconcile_with_collection_config(schema, collection_config);
2490        assert!(result.is_err());
2491        assert_eq!(
2492            result.unwrap_err(),
2493            "Cannot set both collection config and schema at the same time"
2494        );
2495    }
2496
2497    #[test]
2498    fn test_reconcile_with_collection_config_hnsw_override() {
2499        // Test that non-default HNSW collection config overrides default schema
2500        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
2501
2502        let collection_config = InternalCollectionConfiguration {
2503            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2504                ef_construction: 300,
2505                max_neighbors: 32,
2506                ef_search: 50,
2507                num_threads: 8,
2508                batch_size: 200,
2509                sync_threshold: 2000,
2510                resize_factor: 1.5,
2511                space: Space::L2,
2512            }),
2513            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2514        };
2515
2516        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2517
2518        // Check that #embedding key override was created with the collection config settings
2519        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2520        let vector_index = embedding_override
2521            .float_list
2522            .as_ref()
2523            .unwrap()
2524            .vector_index
2525            .as_ref()
2526            .unwrap();
2527
2528        assert!(vector_index.enabled);
2529        assert_eq!(vector_index.config.space, Some(Space::L2));
2530        assert_eq!(
2531            vector_index.config.embedding_function,
2532            Some(EmbeddingFunctionConfiguration::Legacy)
2533        );
2534        assert_eq!(
2535            vector_index.config.source_key,
2536            Some(DOCUMENT_KEY.to_string())
2537        );
2538
2539        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
2540        assert_eq!(hnsw_config.ef_construction, Some(300));
2541        assert_eq!(hnsw_config.max_neighbors, Some(32));
2542        assert_eq!(hnsw_config.ef_search, Some(50));
2543        assert_eq!(hnsw_config.num_threads, Some(8));
2544        assert_eq!(hnsw_config.batch_size, Some(200));
2545        assert_eq!(hnsw_config.sync_threshold, Some(2000));
2546        assert_eq!(hnsw_config.resize_factor, Some(1.5));
2547
2548        assert!(vector_index.config.spann.is_none());
2549    }
2550
2551    #[test]
2552    fn test_reconcile_with_collection_config_spann_override() {
2553        // Test that non-default SPANN collection config overrides default schema
2554        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
2555
2556        let collection_config = InternalCollectionConfiguration {
2557            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
2558                search_nprobe: 20,
2559                search_rng_factor: 3.0,
2560                search_rng_epsilon: 0.2,
2561                nreplica_count: 5,
2562                write_rng_factor: 2.0,
2563                write_rng_epsilon: 0.1,
2564                split_threshold: 2000,
2565                num_samples_kmeans: 200,
2566                initial_lambda: 0.8,
2567                reassign_neighbor_count: 100,
2568                merge_threshold: 800,
2569                num_centers_to_merge_to: 20,
2570                write_nprobe: 10,
2571                ef_construction: 400,
2572                ef_search: 60,
2573                max_neighbors: 24,
2574                space: Space::Cosine,
2575            }),
2576            embedding_function: None,
2577        };
2578
2579        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2580
2581        // Check that #embedding key override was created with the collection config settings
2582        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2583        let vector_index = embedding_override
2584            .float_list
2585            .as_ref()
2586            .unwrap()
2587            .vector_index
2588            .as_ref()
2589            .unwrap();
2590
2591        assert!(vector_index.enabled);
2592        assert_eq!(vector_index.config.space, Some(Space::Cosine));
2593        assert_eq!(vector_index.config.embedding_function, None);
2594        assert_eq!(
2595            vector_index.config.source_key,
2596            Some(DOCUMENT_KEY.to_string())
2597        );
2598
2599        assert!(vector_index.config.hnsw.is_none());
2600
2601        let spann_config = vector_index.config.spann.as_ref().unwrap();
2602        assert_eq!(spann_config.search_nprobe, Some(20));
2603        assert_eq!(spann_config.search_rng_factor, Some(3.0));
2604        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
2605        assert_eq!(spann_config.nreplica_count, Some(5));
2606        assert_eq!(spann_config.write_rng_factor, Some(2.0));
2607        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
2608        assert_eq!(spann_config.split_threshold, Some(2000));
2609        assert_eq!(spann_config.num_samples_kmeans, Some(200));
2610        assert_eq!(spann_config.initial_lambda, Some(0.8));
2611        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
2612        assert_eq!(spann_config.merge_threshold, Some(800));
2613        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
2614        assert_eq!(spann_config.write_nprobe, Some(10));
2615        assert_eq!(spann_config.ef_construction, Some(400));
2616        assert_eq!(spann_config.ef_search, Some(60));
2617        assert_eq!(spann_config.max_neighbors, Some(24));
2618    }
2619
2620    #[test]
2621    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
2622        // Test that collection config updates BOTH defaults.float_list.vector_index
2623        // AND keys["embedding"].float_list.vector_index
2624        let schema = Schema::new_default(KnnIndex::Hnsw);
2625
2626        let collection_config = InternalCollectionConfiguration {
2627            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2628                ef_construction: 300,
2629                max_neighbors: 32,
2630                ef_search: 50,
2631                num_threads: 8,
2632                batch_size: 200,
2633                sync_threshold: 2000,
2634                resize_factor: 1.5,
2635                space: Space::L2,
2636            }),
2637            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2638        };
2639
2640        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2641
2642        // Check that defaults.float_list.vector_index was updated
2643        let defaults_vector_index = result
2644            .defaults
2645            .float_list
2646            .as_ref()
2647            .unwrap()
2648            .vector_index
2649            .as_ref()
2650            .unwrap();
2651
2652        // Should be disabled in defaults (template for new keys)
2653        assert!(!defaults_vector_index.enabled);
2654        // But config should be updated
2655        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
2656        assert_eq!(
2657            defaults_vector_index.config.embedding_function,
2658            Some(EmbeddingFunctionConfiguration::Legacy)
2659        );
2660        assert_eq!(
2661            defaults_vector_index.config.source_key,
2662            Some(DOCUMENT_KEY.to_string())
2663        );
2664        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
2665        assert_eq!(defaults_hnsw.ef_construction, Some(300));
2666        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
2667
2668        // Check that #embedding key override was also updated
2669        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2670        let embedding_vector_index = embedding_override
2671            .float_list
2672            .as_ref()
2673            .unwrap()
2674            .vector_index
2675            .as_ref()
2676            .unwrap();
2677
2678        // Should be enabled on #embedding
2679        assert!(embedding_vector_index.enabled);
2680        // Config should match defaults
2681        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
2682        assert_eq!(
2683            embedding_vector_index.config.embedding_function,
2684            Some(EmbeddingFunctionConfiguration::Legacy)
2685        );
2686        assert_eq!(
2687            embedding_vector_index.config.source_key,
2688            Some(DOCUMENT_KEY.to_string())
2689        );
2690        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
2691        assert_eq!(embedding_hnsw.ef_construction, Some(300));
2692        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
2693    }
2694
2695    #[test]
2696    fn test_is_schema_default() {
2697        // Test that actual default schemas are correctly identified
2698        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
2699        assert!(Schema::is_schema_default(&default_hnsw_schema));
2700
2701        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
2702        assert!(Schema::is_schema_default(&default_spann_schema));
2703
2704        // Test that an empty schema is NOT considered default (since it doesn't match new_default structure)
2705        let empty_schema = Schema {
2706            defaults: ValueTypes::default(),
2707            keys: HashMap::new(),
2708        };
2709        assert!(!Schema::is_schema_default(&empty_schema));
2710
2711        // Test that a modified default schema is not considered default
2712        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
2713        // Make a clear modification - change the string inverted index enabled state
2714        if let Some(ref mut string_type) = modified_schema.defaults.string {
2715            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
2716                string_inverted.enabled = false; // Default is true, so this should make it non-default
2717            }
2718        }
2719        assert!(!Schema::is_schema_default(&modified_schema));
2720
2721        // Test that schema with additional key overrides is not default
2722        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
2723        schema_with_extra_overrides
2724            .keys
2725            .insert("custom_key".to_string(), ValueTypes::default());
2726        assert!(!Schema::is_schema_default(&schema_with_extra_overrides));
2727    }
2728
2729    #[test]
2730    fn test_add_merges_keys_by_value_type() {
2731        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2732        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2733
2734        let string_override = ValueTypes {
2735            string: Some(StringValueType {
2736                string_inverted_index: Some(StringInvertedIndexType {
2737                    enabled: true,
2738                    config: StringInvertedIndexConfig {},
2739                }),
2740                fts_index: None,
2741            }),
2742            ..Default::default()
2743        };
2744        schema_a
2745            .keys
2746            .insert("custom_field".to_string(), string_override);
2747
2748        let float_override = ValueTypes {
2749            float: Some(FloatValueType {
2750                float_inverted_index: Some(FloatInvertedIndexType {
2751                    enabled: true,
2752                    config: FloatInvertedIndexConfig {},
2753                }),
2754            }),
2755            ..Default::default()
2756        };
2757        schema_b
2758            .keys
2759            .insert("custom_field".to_string(), float_override);
2760
2761        let merged = schema_a.merge(&schema_b).unwrap();
2762        let merged_override = merged.keys.get("custom_field").unwrap();
2763
2764        assert!(merged_override.string.is_some());
2765        assert!(merged_override.float.is_some());
2766        assert!(
2767            merged_override
2768                .string
2769                .as_ref()
2770                .unwrap()
2771                .string_inverted_index
2772                .as_ref()
2773                .unwrap()
2774                .enabled
2775        );
2776        assert!(
2777            merged_override
2778                .float
2779                .as_ref()
2780                .unwrap()
2781                .float_inverted_index
2782                .as_ref()
2783                .unwrap()
2784                .enabled
2785        );
2786    }
2787
2788    #[test]
2789    fn test_add_rejects_different_defaults() {
2790        let schema_a = Schema::new_default(KnnIndex::Hnsw);
2791        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2792
2793        if let Some(string_type) = schema_b.defaults.string.as_mut() {
2794            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
2795                string_index.enabled = false;
2796            }
2797        }
2798
2799        let err = schema_a.merge(&schema_b).unwrap_err();
2800        match err {
2801            SchemaError::InvalidSchema { reason } => {
2802                assert_eq!(reason, "Cannot merge schemas with differing defaults")
2803            }
2804            _ => panic!("Expected InvalidSchema error"),
2805        }
2806    }
2807
2808    #[test]
2809    fn test_add_detects_conflicting_value_type_configuration() {
2810        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2811        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2812
2813        let string_override_enabled = ValueTypes {
2814            string: Some(StringValueType {
2815                string_inverted_index: Some(StringInvertedIndexType {
2816                    enabled: true,
2817                    config: StringInvertedIndexConfig {},
2818                }),
2819                fts_index: None,
2820            }),
2821            ..Default::default()
2822        };
2823        schema_a
2824            .keys
2825            .insert("custom_field".to_string(), string_override_enabled);
2826
2827        let string_override_disabled = ValueTypes {
2828            string: Some(StringValueType {
2829                string_inverted_index: Some(StringInvertedIndexType {
2830                    enabled: false,
2831                    config: StringInvertedIndexConfig {},
2832                }),
2833                fts_index: None,
2834            }),
2835            ..Default::default()
2836        };
2837        schema_b
2838            .keys
2839            .insert("custom_field".to_string(), string_override_disabled);
2840
2841        let err = schema_a.merge(&schema_b).unwrap_err();
2842        match err {
2843            SchemaError::InvalidSchema { reason } => {
2844                assert!(reason.contains("Conflicting configuration"));
2845            }
2846            _ => panic!("Expected InvalidSchema error"),
2847        }
2848    }
2849
2850    // TODO(Sanket): Remove this test once deployed
2851    #[test]
2852    fn test_backward_compatibility_aliases() {
2853        // Test that old format with # and $ prefixes and key_overrides can be deserialized
2854        let old_format_json = r###"{
2855            "defaults": {
2856                "#string": {
2857                    "$fts_index": {
2858                        "enabled": true,
2859                        "config": {}
2860                    }
2861                },
2862                "#int": {
2863                    "$int_inverted_index": {
2864                        "enabled": true,
2865                        "config": {}
2866                    }
2867                },
2868                "#float_list": {
2869                    "$vector_index": {
2870                        "enabled": true,
2871                        "config": {
2872                            "spann": {
2873                                "search_nprobe": 10
2874                            }
2875                        }
2876                    }
2877                }
2878            },
2879            "key_overrides": {
2880                "#document": {
2881                    "#string": {
2882                        "$fts_index": {
2883                            "enabled": false,
2884                            "config": {}
2885                        }
2886                    }
2887                }
2888            }
2889        }"###;
2890
2891        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
2892
2893        // Test that new format without prefixes and keys can be deserialized
2894        let new_format_json = r###"{
2895            "defaults": {
2896                "string": {
2897                    "fts_index": {
2898                        "enabled": true,
2899                        "config": {}
2900                    }
2901                },
2902                "int": {
2903                    "int_inverted_index": {
2904                        "enabled": true,
2905                        "config": {}
2906                    }
2907                },
2908                "float_list": {
2909                    "vector_index": {
2910                        "enabled": true,
2911                        "config": {
2912                            "spann": {
2913                                "search_nprobe": 10
2914                            }
2915                        }
2916                    }
2917                }
2918            },
2919            "keys": {
2920                "#document": {
2921                    "string": {
2922                        "fts_index": {
2923                            "enabled": false,
2924                            "config": {}
2925                        }
2926                    }
2927                }
2928            }
2929        }"###;
2930
2931        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
2932
2933        // Both should deserialize to the same structure
2934        assert_eq!(schema_from_old, schema_from_new);
2935
2936        // Verify the deserialized content is correct
2937        assert!(schema_from_old.defaults.string.is_some());
2938        assert!(schema_from_old
2939            .defaults
2940            .string
2941            .as_ref()
2942            .unwrap()
2943            .fts_index
2944            .is_some());
2945        assert!(
2946            schema_from_old
2947                .defaults
2948                .string
2949                .as_ref()
2950                .unwrap()
2951                .fts_index
2952                .as_ref()
2953                .unwrap()
2954                .enabled
2955        );
2956
2957        assert!(schema_from_old.defaults.int.is_some());
2958        assert!(schema_from_old
2959            .defaults
2960            .int
2961            .as_ref()
2962            .unwrap()
2963            .int_inverted_index
2964            .is_some());
2965
2966        assert!(schema_from_old.defaults.float_list.is_some());
2967        assert!(schema_from_old
2968            .defaults
2969            .float_list
2970            .as_ref()
2971            .unwrap()
2972            .vector_index
2973            .is_some());
2974
2975        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
2976        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
2977        assert!(doc_override.string.is_some());
2978        assert!(
2979            !doc_override
2980                .string
2981                .as_ref()
2982                .unwrap()
2983                .fts_index
2984                .as_ref()
2985                .unwrap()
2986                .enabled
2987        );
2988
2989        // Test that serialization always outputs the new format (without prefixes)
2990        let serialized = serde_json::to_string(&schema_from_old).unwrap();
2991
2992        // Should contain new format keys
2993        assert!(serialized.contains(r#""keys":"#));
2994        assert!(serialized.contains(r#""string":"#));
2995        assert!(serialized.contains(r#""fts_index":"#));
2996        assert!(serialized.contains(r#""int_inverted_index":"#));
2997        assert!(serialized.contains(r#""vector_index":"#));
2998
2999        // Should NOT contain old format keys
3000        assert!(!serialized.contains(r#""key_overrides":"#));
3001        assert!(!serialized.contains(r###""#string":"###));
3002        assert!(!serialized.contains(r###""$fts_index":"###));
3003        assert!(!serialized.contains(r###""$int_inverted_index":"###));
3004        assert!(!serialized.contains(r###""$vector_index":"###));
3005    }
3006
3007    #[test]
3008    fn test_hnsw_index_config_validation() {
3009        use validator::Validate;
3010
3011        // Valid configuration - should pass
3012        let valid_config = HnswIndexConfig {
3013            batch_size: Some(10),
3014            sync_threshold: Some(100),
3015            ef_construction: Some(100),
3016            max_neighbors: Some(16),
3017            ..Default::default()
3018        };
3019        assert!(valid_config.validate().is_ok());
3020
3021        // Invalid: batch_size too small (min 2)
3022        let invalid_batch_size = HnswIndexConfig {
3023            batch_size: Some(1),
3024            ..Default::default()
3025        };
3026        assert!(invalid_batch_size.validate().is_err());
3027
3028        // Invalid: sync_threshold too small (min 2)
3029        let invalid_sync_threshold = HnswIndexConfig {
3030            sync_threshold: Some(1),
3031            ..Default::default()
3032        };
3033        assert!(invalid_sync_threshold.validate().is_err());
3034
3035        // Valid: boundary values (exactly 2) should pass
3036        let boundary_config = HnswIndexConfig {
3037            batch_size: Some(2),
3038            sync_threshold: Some(2),
3039            ..Default::default()
3040        };
3041        assert!(boundary_config.validate().is_ok());
3042
3043        // Valid: None values should pass validation
3044        let all_none_config = HnswIndexConfig {
3045            ..Default::default()
3046        };
3047        assert!(all_none_config.validate().is_ok());
3048
3049        // Valid: fields without validation can be any value
3050        let other_fields_config = HnswIndexConfig {
3051            ef_construction: Some(1),
3052            max_neighbors: Some(1),
3053            ef_search: Some(1),
3054            num_threads: Some(1),
3055            resize_factor: Some(0.1),
3056            ..Default::default()
3057        };
3058        assert!(other_fields_config.validate().is_ok());
3059    }
3060
3061    #[test]
3062    fn test_spann_index_config_validation() {
3063        use validator::Validate;
3064
3065        // Valid configuration - should pass
3066        let valid_config = SpannIndexConfig {
3067            write_nprobe: Some(32),
3068            nreplica_count: Some(4),
3069            split_threshold: Some(100),
3070            merge_threshold: Some(50),
3071            reassign_neighbor_count: Some(32),
3072            num_centers_to_merge_to: Some(4),
3073            ef_construction: Some(100),
3074            ef_search: Some(100),
3075            max_neighbors: Some(32),
3076            search_rng_factor: Some(1.0),
3077            write_rng_factor: Some(1.0),
3078            search_rng_epsilon: Some(7.5),
3079            write_rng_epsilon: Some(7.5),
3080            ..Default::default()
3081        };
3082        assert!(valid_config.validate().is_ok());
3083
3084        // Invalid: write_nprobe too large (max 64)
3085        let invalid_write_nprobe = SpannIndexConfig {
3086            write_nprobe: Some(200),
3087            ..Default::default()
3088        };
3089        assert!(invalid_write_nprobe.validate().is_err());
3090
3091        // Invalid: split_threshold too small (min 50)
3092        let invalid_split_threshold = SpannIndexConfig {
3093            split_threshold: Some(10),
3094            ..Default::default()
3095        };
3096        assert!(invalid_split_threshold.validate().is_err());
3097
3098        // Invalid: split_threshold too large (max 200)
3099        let invalid_split_threshold_high = SpannIndexConfig {
3100            split_threshold: Some(250),
3101            ..Default::default()
3102        };
3103        assert!(invalid_split_threshold_high.validate().is_err());
3104
3105        // Invalid: nreplica_count too large (max 8)
3106        let invalid_nreplica = SpannIndexConfig {
3107            nreplica_count: Some(10),
3108            ..Default::default()
3109        };
3110        assert!(invalid_nreplica.validate().is_err());
3111
3112        // Invalid: reassign_neighbor_count too large (max 64)
3113        let invalid_reassign = SpannIndexConfig {
3114            reassign_neighbor_count: Some(100),
3115            ..Default::default()
3116        };
3117        assert!(invalid_reassign.validate().is_err());
3118
3119        // Invalid: merge_threshold out of range (min 25, max 100)
3120        let invalid_merge_threshold_low = SpannIndexConfig {
3121            merge_threshold: Some(5),
3122            ..Default::default()
3123        };
3124        assert!(invalid_merge_threshold_low.validate().is_err());
3125
3126        let invalid_merge_threshold_high = SpannIndexConfig {
3127            merge_threshold: Some(150),
3128            ..Default::default()
3129        };
3130        assert!(invalid_merge_threshold_high.validate().is_err());
3131
3132        // Invalid: num_centers_to_merge_to too large (max 8)
3133        let invalid_num_centers = SpannIndexConfig {
3134            num_centers_to_merge_to: Some(10),
3135            ..Default::default()
3136        };
3137        assert!(invalid_num_centers.validate().is_err());
3138
3139        // Invalid: ef_construction too large (max 200)
3140        let invalid_ef_construction = SpannIndexConfig {
3141            ef_construction: Some(300),
3142            ..Default::default()
3143        };
3144        assert!(invalid_ef_construction.validate().is_err());
3145
3146        // Invalid: ef_search too large (max 200)
3147        let invalid_ef_search = SpannIndexConfig {
3148            ef_search: Some(300),
3149            ..Default::default()
3150        };
3151        assert!(invalid_ef_search.validate().is_err());
3152
3153        // Invalid: max_neighbors too large (max 64)
3154        let invalid_max_neighbors = SpannIndexConfig {
3155            max_neighbors: Some(100),
3156            ..Default::default()
3157        };
3158        assert!(invalid_max_neighbors.validate().is_err());
3159
3160        // Invalid: search_nprobe too large (max 128)
3161        let invalid_search_nprobe = SpannIndexConfig {
3162            search_nprobe: Some(200),
3163            ..Default::default()
3164        };
3165        assert!(invalid_search_nprobe.validate().is_err());
3166
3167        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
3168        let invalid_search_rng_factor_low = SpannIndexConfig {
3169            search_rng_factor: Some(0.9),
3170            ..Default::default()
3171        };
3172        assert!(invalid_search_rng_factor_low.validate().is_err());
3173
3174        let invalid_search_rng_factor_high = SpannIndexConfig {
3175            search_rng_factor: Some(1.1),
3176            ..Default::default()
3177        };
3178        assert!(invalid_search_rng_factor_high.validate().is_err());
3179
3180        // Valid: search_rng_factor exactly 1.0
3181        let valid_search_rng_factor = SpannIndexConfig {
3182            search_rng_factor: Some(1.0),
3183            ..Default::default()
3184        };
3185        assert!(valid_search_rng_factor.validate().is_ok());
3186
3187        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
3188        let invalid_search_rng_epsilon_low = SpannIndexConfig {
3189            search_rng_epsilon: Some(4.0),
3190            ..Default::default()
3191        };
3192        assert!(invalid_search_rng_epsilon_low.validate().is_err());
3193
3194        let invalid_search_rng_epsilon_high = SpannIndexConfig {
3195            search_rng_epsilon: Some(11.0),
3196            ..Default::default()
3197        };
3198        assert!(invalid_search_rng_epsilon_high.validate().is_err());
3199
3200        // Valid: search_rng_epsilon within range
3201        let valid_search_rng_epsilon = SpannIndexConfig {
3202            search_rng_epsilon: Some(7.5),
3203            ..Default::default()
3204        };
3205        assert!(valid_search_rng_epsilon.validate().is_ok());
3206
3207        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
3208        let invalid_write_rng_factor_low = SpannIndexConfig {
3209            write_rng_factor: Some(0.9),
3210            ..Default::default()
3211        };
3212        assert!(invalid_write_rng_factor_low.validate().is_err());
3213
3214        let invalid_write_rng_factor_high = SpannIndexConfig {
3215            write_rng_factor: Some(1.1),
3216            ..Default::default()
3217        };
3218        assert!(invalid_write_rng_factor_high.validate().is_err());
3219
3220        // Valid: write_rng_factor exactly 1.0
3221        let valid_write_rng_factor = SpannIndexConfig {
3222            write_rng_factor: Some(1.0),
3223            ..Default::default()
3224        };
3225        assert!(valid_write_rng_factor.validate().is_ok());
3226
3227        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
3228        let invalid_write_rng_epsilon_low = SpannIndexConfig {
3229            write_rng_epsilon: Some(4.0),
3230            ..Default::default()
3231        };
3232        assert!(invalid_write_rng_epsilon_low.validate().is_err());
3233
3234        let invalid_write_rng_epsilon_high = SpannIndexConfig {
3235            write_rng_epsilon: Some(11.0),
3236            ..Default::default()
3237        };
3238        assert!(invalid_write_rng_epsilon_high.validate().is_err());
3239
3240        // Valid: write_rng_epsilon within range
3241        let valid_write_rng_epsilon = SpannIndexConfig {
3242            write_rng_epsilon: Some(7.5),
3243            ..Default::default()
3244        };
3245        assert!(valid_write_rng_epsilon.validate().is_ok());
3246
3247        // Invalid: num_samples_kmeans too large (max 1000)
3248        let invalid_num_samples_kmeans = SpannIndexConfig {
3249            num_samples_kmeans: Some(1500),
3250            ..Default::default()
3251        };
3252        assert!(invalid_num_samples_kmeans.validate().is_err());
3253
3254        // Valid: num_samples_kmeans within range
3255        let valid_num_samples_kmeans = SpannIndexConfig {
3256            num_samples_kmeans: Some(500),
3257            ..Default::default()
3258        };
3259        assert!(valid_num_samples_kmeans.validate().is_ok());
3260
3261        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
3262        let invalid_initial_lambda_high = SpannIndexConfig {
3263            initial_lambda: Some(150.0),
3264            ..Default::default()
3265        };
3266        assert!(invalid_initial_lambda_high.validate().is_err());
3267
3268        let invalid_initial_lambda_low = SpannIndexConfig {
3269            initial_lambda: Some(50.0),
3270            ..Default::default()
3271        };
3272        assert!(invalid_initial_lambda_low.validate().is_err());
3273
3274        // Valid: initial_lambda exactly 100.0
3275        let valid_initial_lambda = SpannIndexConfig {
3276            initial_lambda: Some(100.0),
3277            ..Default::default()
3278        };
3279        assert!(valid_initial_lambda.validate().is_ok());
3280
3281        // Valid: None values should pass validation
3282        let all_none_config = SpannIndexConfig {
3283            ..Default::default()
3284        };
3285        assert!(all_none_config.validate().is_ok());
3286    }
3287}