chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8    EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14    default_batch_size, default_construction_ef, default_construction_ef_spann,
15    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21    InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25    fn code(&self) -> ErrorCodes {
26        ErrorCodes::Internal
27    }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33    MissingIndexConfiguration { key: String, value_type: String },
34    #[error("Schema reconciliation failed: {reason}")]
35    InvalidSchema { reason: String },
36}
37
38#[derive(Debug, Error)]
39pub enum FilterValidationError {
40    #[error(
41        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
42    )]
43    IndexingDisabled {
44        key: String,
45        value_type: MetadataValueType,
46    },
47    #[error(transparent)]
48    Schema(#[from] SchemaError),
49}
50
51impl ChromaError for FilterValidationError {
52    fn code(&self) -> ErrorCodes {
53        match self {
54            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
55            FilterValidationError::Schema(_) => ErrorCodes::Internal,
56        }
57    }
58}
59
60// ============================================================================
61// SCHEMA CONSTANTS
62// ============================================================================
63// These constants must match the Python constants in chromadb/api/types.py
64
65// Value type name constants
66pub const STRING_VALUE_NAME: &str = "string";
67pub const INT_VALUE_NAME: &str = "int";
68pub const BOOL_VALUE_NAME: &str = "bool";
69pub const FLOAT_VALUE_NAME: &str = "float";
70pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
71pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
72
73// Index type name constants
74pub const FTS_INDEX_NAME: &str = "fts_index";
75pub const VECTOR_INDEX_NAME: &str = "vector_index";
76pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
77pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
78pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
79pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
80pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
81
82// Special metadata keys - must match Python constants in chromadb/api/types.py
83pub const DOCUMENT_KEY: &str = "#document";
84pub const EMBEDDING_KEY: &str = "#embedding";
85
86// ============================================================================
87// SCHEMA STRUCTURES
88// ============================================================================
89
90/// Schema representation for collection index configurations
91/// This represents the server-side schema structure used for index management
92
93#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
94#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
95pub struct Schema {
96    /// Default index configurations for each value type
97    pub defaults: ValueTypes,
98    /// Key-specific index overrides
99    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
100    #[serde(rename = "keys", alias = "key_overrides")]
101    pub keys: HashMap<String, ValueTypes>,
102}
103
104pub fn is_embedding_function_default(
105    embedding_function: &Option<EmbeddingFunctionConfiguration>,
106) -> bool {
107    match embedding_function {
108        None => true,
109        Some(embedding_function) => embedding_function.is_default(),
110    }
111}
112
113/// Check if space is default (None means default, or if present, should be default space)
114pub fn is_space_default(space: &Option<Space>) -> bool {
115    match space {
116        None => true,                     // None means default
117        Some(s) => *s == default_space(), // If present, check if it's the default space
118    }
119}
120
121/// Check if HNSW config is default
122pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
123    hnsw_config.ef_construction == Some(default_construction_ef())
124        && hnsw_config.ef_search == Some(default_search_ef())
125        && hnsw_config.max_neighbors == Some(default_m())
126        && hnsw_config.num_threads == Some(default_num_threads())
127        && hnsw_config.batch_size == Some(default_batch_size())
128        && hnsw_config.sync_threshold == Some(default_sync_threshold())
129        && hnsw_config.resize_factor == Some(default_resize_factor())
130}
131
132// ============================================================================
133// NEW STRONGLY-TYPED SCHEMA STRUCTURES
134// ============================================================================
135
136/// Strongly-typed value type configurations
137/// Contains optional configurations for each supported value type
138#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
139#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
140pub struct ValueTypes {
141    #[serde(
142        rename = "string",
143        alias = "#string",
144        skip_serializing_if = "Option::is_none"
145    )] // STRING_VALUE_NAME
146    pub string: Option<StringValueType>,
147
148    #[serde(
149        rename = "float_list",
150        alias = "#float_list",
151        skip_serializing_if = "Option::is_none"
152    )]
153    // FLOAT_LIST_VALUE_NAME
154    pub float_list: Option<FloatListValueType>,
155
156    #[serde(
157        rename = "sparse_vector",
158        alias = "#sparse_vector",
159        skip_serializing_if = "Option::is_none"
160    )]
161    // SPARSE_VECTOR_VALUE_NAME
162    pub sparse_vector: Option<SparseVectorValueType>,
163
164    #[serde(
165        rename = "int",
166        alias = "#int",
167        skip_serializing_if = "Option::is_none"
168    )] // INT_VALUE_NAME
169    pub int: Option<IntValueType>,
170
171    #[serde(
172        rename = "float",
173        alias = "#float",
174        skip_serializing_if = "Option::is_none"
175    )] // FLOAT_VALUE_NAME
176    pub float: Option<FloatValueType>,
177
178    #[serde(
179        rename = "bool",
180        alias = "#bool",
181        skip_serializing_if = "Option::is_none"
182    )] // BOOL_VALUE_NAME
183    pub boolean: Option<BoolValueType>,
184}
185
186/// String value type index configurations
187#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
188#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
189pub struct StringValueType {
190    #[serde(
191        rename = "fts_index",
192        alias = "$fts_index",
193        skip_serializing_if = "Option::is_none"
194    )] // FTS_INDEX_NAME
195    pub fts_index: Option<FtsIndexType>,
196
197    #[serde(
198        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
199        alias = "$string_inverted_index",
200        skip_serializing_if = "Option::is_none"
201    )]
202    pub string_inverted_index: Option<StringInvertedIndexType>,
203}
204
205/// Float list value type index configurations (for vectors)
206#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
207#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
208pub struct FloatListValueType {
209    #[serde(
210        rename = "vector_index",
211        alias = "$vector_index",
212        skip_serializing_if = "Option::is_none"
213    )] // VECTOR_INDEX_NAME
214    pub vector_index: Option<VectorIndexType>,
215}
216
217/// Sparse vector value type index configurations
218#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
219#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
220pub struct SparseVectorValueType {
221    #[serde(
222        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
223        alias = "$sparse_vector_index",
224        skip_serializing_if = "Option::is_none"
225    )]
226    pub sparse_vector_index: Option<SparseVectorIndexType>,
227}
228
229/// Integer value type index configurations
230#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
231#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
232pub struct IntValueType {
233    #[serde(
234        rename = "int_inverted_index",
235        alias = "$int_inverted_index",
236        skip_serializing_if = "Option::is_none"
237    )]
238    // INT_INVERTED_INDEX_NAME
239    pub int_inverted_index: Option<IntInvertedIndexType>,
240}
241
242/// Float value type index configurations
243#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
244#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
245pub struct FloatValueType {
246    #[serde(
247        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
248        alias = "$float_inverted_index",
249        skip_serializing_if = "Option::is_none"
250    )]
251    pub float_inverted_index: Option<FloatInvertedIndexType>,
252}
253
254/// Boolean value type index configurations
255#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
256#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
257pub struct BoolValueType {
258    #[serde(
259        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
260        alias = "$bool_inverted_index",
261        skip_serializing_if = "Option::is_none"
262    )]
263    pub bool_inverted_index: Option<BoolInvertedIndexType>,
264}
265
266// Individual index type structs with enabled status and config
267#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
268#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
269pub struct FtsIndexType {
270    pub enabled: bool,
271    pub config: FtsIndexConfig,
272}
273
274#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
275#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
276pub struct VectorIndexType {
277    pub enabled: bool,
278    pub config: VectorIndexConfig,
279}
280
281#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
282#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
283pub struct SparseVectorIndexType {
284    pub enabled: bool,
285    pub config: SparseVectorIndexConfig,
286}
287
288#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
289#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
290pub struct StringInvertedIndexType {
291    pub enabled: bool,
292    pub config: StringInvertedIndexConfig,
293}
294
295#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
296#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
297pub struct IntInvertedIndexType {
298    pub enabled: bool,
299    pub config: IntInvertedIndexConfig,
300}
301
302#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
303#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
304pub struct FloatInvertedIndexType {
305    pub enabled: bool,
306    pub config: FloatInvertedIndexConfig,
307}
308
309#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
310#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
311pub struct BoolInvertedIndexType {
312    pub enabled: bool,
313    pub config: BoolInvertedIndexConfig,
314}
315
316impl Schema {
317    /// Create a new Schema with strongly-typed default configurations
318    pub fn new_default(default_knn_index: KnnIndex) -> Self {
319        // Vector index disabled on all keys except #embedding.
320        let vector_config = VectorIndexType {
321            enabled: false,
322            config: VectorIndexConfig {
323                space: Some(default_space()),
324                embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
325                source_key: None,
326                hnsw: match default_knn_index {
327                    KnnIndex::Hnsw => Some(HnswIndexConfig {
328                        ef_construction: Some(default_construction_ef()),
329                        max_neighbors: Some(default_m()),
330                        ef_search: Some(default_search_ef()),
331                        num_threads: Some(default_num_threads()),
332                        batch_size: Some(default_batch_size()),
333                        sync_threshold: Some(default_sync_threshold()),
334                        resize_factor: Some(default_resize_factor()),
335                    }),
336                    KnnIndex::Spann => None,
337                },
338                spann: match default_knn_index {
339                    KnnIndex::Hnsw => None,
340                    KnnIndex::Spann => Some(SpannIndexConfig {
341                        search_nprobe: Some(default_search_nprobe()),
342                        search_rng_factor: Some(default_search_rng_factor()),
343                        search_rng_epsilon: Some(default_search_rng_epsilon()),
344                        nreplica_count: Some(default_nreplica_count()),
345                        write_rng_factor: Some(default_write_rng_factor()),
346                        write_rng_epsilon: Some(default_write_rng_epsilon()),
347                        split_threshold: Some(default_split_threshold()),
348                        num_samples_kmeans: Some(default_num_samples_kmeans()),
349                        initial_lambda: Some(default_initial_lambda()),
350                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
351                        merge_threshold: Some(default_merge_threshold()),
352                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
353                        write_nprobe: Some(default_write_nprobe()),
354                        ef_construction: Some(default_construction_ef_spann()),
355                        ef_search: Some(default_search_ef_spann()),
356                        max_neighbors: Some(default_m_spann()),
357                    }),
358                },
359            },
360        };
361
362        // Initialize defaults struct directly instead of using Default::default() + field assignments
363        let defaults = ValueTypes {
364            string: Some(StringValueType {
365                string_inverted_index: Some(StringInvertedIndexType {
366                    enabled: true,
367                    config: StringInvertedIndexConfig {},
368                }),
369                fts_index: Some(FtsIndexType {
370                    enabled: false,
371                    config: FtsIndexConfig {},
372                }),
373            }),
374            float: Some(FloatValueType {
375                float_inverted_index: Some(FloatInvertedIndexType {
376                    enabled: true,
377                    config: FloatInvertedIndexConfig {},
378                }),
379            }),
380            int: Some(IntValueType {
381                int_inverted_index: Some(IntInvertedIndexType {
382                    enabled: true,
383                    config: IntInvertedIndexConfig {},
384                }),
385            }),
386            boolean: Some(BoolValueType {
387                bool_inverted_index: Some(BoolInvertedIndexType {
388                    enabled: true,
389                    config: BoolInvertedIndexConfig {},
390                }),
391            }),
392            float_list: Some(FloatListValueType {
393                vector_index: Some(vector_config),
394            }),
395            sparse_vector: Some(SparseVectorValueType {
396                sparse_vector_index: Some(SparseVectorIndexType {
397                    enabled: false,
398                    config: SparseVectorIndexConfig {
399                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
400                        source_key: None,
401                        bm25: Some(false),
402                    },
403                }),
404            }),
405        };
406
407        // Set up key overrides
408        let mut keys = HashMap::new();
409
410        // Enable vector index for #embedding.
411        let embedding_defaults = ValueTypes {
412            float_list: Some(FloatListValueType {
413                vector_index: Some(VectorIndexType {
414                    enabled: true,
415                    config: VectorIndexConfig {
416                        space: Some(default_space()),
417                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
418                        source_key: Some(DOCUMENT_KEY.to_string()),
419                        hnsw: match default_knn_index {
420                            KnnIndex::Hnsw => Some(HnswIndexConfig {
421                                ef_construction: Some(default_construction_ef()),
422                                max_neighbors: Some(default_m()),
423                                ef_search: Some(default_search_ef()),
424                                num_threads: Some(default_num_threads()),
425                                batch_size: Some(default_batch_size()),
426                                sync_threshold: Some(default_sync_threshold()),
427                                resize_factor: Some(default_resize_factor()),
428                            }),
429                            KnnIndex::Spann => None,
430                        },
431                        spann: match default_knn_index {
432                            KnnIndex::Hnsw => None,
433                            KnnIndex::Spann => Some(SpannIndexConfig {
434                                search_nprobe: Some(default_search_nprobe()),
435                                search_rng_factor: Some(default_search_rng_factor()),
436                                search_rng_epsilon: Some(default_search_rng_epsilon()),
437                                nreplica_count: Some(default_nreplica_count()),
438                                write_rng_factor: Some(default_write_rng_factor()),
439                                write_rng_epsilon: Some(default_write_rng_epsilon()),
440                                split_threshold: Some(default_split_threshold()),
441                                num_samples_kmeans: Some(default_num_samples_kmeans()),
442                                initial_lambda: Some(default_initial_lambda()),
443                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
444                                merge_threshold: Some(default_merge_threshold()),
445                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
446                                write_nprobe: Some(default_write_nprobe()),
447                                ef_construction: Some(default_construction_ef_spann()),
448                                ef_search: Some(default_search_ef_spann()),
449                                max_neighbors: Some(default_m_spann()),
450                            }),
451                        },
452                    },
453                }),
454            }),
455            ..Default::default()
456        };
457        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
458
459        // Document defaults - initialize directly instead of Default::default() + field assignment
460        let document_defaults = ValueTypes {
461            string: Some(StringValueType {
462                fts_index: Some(FtsIndexType {
463                    enabled: true,
464                    config: FtsIndexConfig {},
465                }),
466                string_inverted_index: Some(StringInvertedIndexType {
467                    enabled: false,
468                    config: StringInvertedIndexConfig {},
469                }),
470            }),
471            ..Default::default()
472        };
473        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
474
475        Schema { defaults, keys }
476    }
477
478    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
479        let to_internal = |vector_index: &VectorIndexType| {
480            let space = vector_index.config.space.clone();
481            vector_index
482                .config
483                .spann
484                .clone()
485                .map(|config| (space.as_ref(), &config).into())
486        };
487
488        self.keys
489            .get(EMBEDDING_KEY)
490            .and_then(|value_types| value_types.float_list.as_ref())
491            .and_then(|float_list| float_list.vector_index.as_ref())
492            .and_then(to_internal)
493            .or_else(|| {
494                self.defaults
495                    .float_list
496                    .as_ref()
497                    .and_then(|float_list| float_list.vector_index.as_ref())
498                    .and_then(to_internal)
499            })
500    }
501
502    /// Reconcile user-provided schema with system defaults
503    ///
504    /// This method merges user configurations with system defaults, ensuring that:
505    /// - User overrides take precedence over defaults
506    /// - Missing user configurations fall back to system defaults
507    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
508    pub fn reconcile_with_defaults(user_schema: Option<Schema>) -> Result<Self, String> {
509        let default_schema = Schema::new_default(KnnIndex::Spann);
510
511        match user_schema {
512            Some(user) => {
513                // Merge defaults with user overrides
514                let merged_defaults =
515                    Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
516
517                // Merge key overrides
518                let mut merged_keys = default_schema.keys.clone();
519                for (key, user_value_types) in user.keys {
520                    if let Some(default_value_types) = merged_keys.get(&key) {
521                        // Merge with existing default key override
522                        let merged_value_types =
523                            Self::merge_value_types(default_value_types, &user_value_types)?;
524                        merged_keys.insert(key, merged_value_types);
525                    } else {
526                        // New key override from user
527                        merged_keys.insert(key, user_value_types);
528                    }
529                }
530
531                Ok(Schema {
532                    defaults: merged_defaults,
533                    keys: merged_keys,
534                })
535            }
536            None => Ok(default_schema),
537        }
538    }
539
540    /// Merge two schemas together, combining key overrides when possible.
541    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
542        if self.defaults != other.defaults {
543            return Err(SchemaError::InvalidSchema {
544                reason: "Cannot merge schemas with differing defaults".to_string(),
545            });
546        }
547
548        let mut keys = self.keys.clone();
549
550        for (key, other_value_types) in &other.keys {
551            if let Some(existing) = keys.get(key).cloned() {
552                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
553                keys.insert(key.clone(), merged);
554            } else {
555                keys.insert(key.clone(), other_value_types.clone());
556            }
557        }
558
559        Ok(Schema {
560            defaults: self.defaults.clone(),
561            keys,
562        })
563    }
564
565    fn merge_override_value_types(
566        key: &str,
567        left: &ValueTypes,
568        right: &ValueTypes,
569    ) -> Result<ValueTypes, SchemaError> {
570        Ok(ValueTypes {
571            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
572            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
573            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
574            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
575            float_list: Self::merge_float_list_override(
576                key,
577                left.float_list.as_ref(),
578                right.float_list.as_ref(),
579            )?,
580            sparse_vector: Self::merge_sparse_vector_override(
581                key,
582                left.sparse_vector.as_ref(),
583                right.sparse_vector.as_ref(),
584            )?,
585        })
586    }
587
588    fn merge_string_override(
589        key: &str,
590        left: Option<&StringValueType>,
591        right: Option<&StringValueType>,
592    ) -> Result<Option<StringValueType>, SchemaError> {
593        match (left, right) {
594            (Some(l), Some(r)) => Ok(Some(StringValueType {
595                string_inverted_index: Self::merge_index_or_error(
596                    l.string_inverted_index.as_ref(),
597                    r.string_inverted_index.as_ref(),
598                    &format!("key '{key}' string.string_inverted_index"),
599                )?,
600                fts_index: Self::merge_index_or_error(
601                    l.fts_index.as_ref(),
602                    r.fts_index.as_ref(),
603                    &format!("key '{key}' string.fts_index"),
604                )?,
605            })),
606            (Some(l), None) => Ok(Some(l.clone())),
607            (None, Some(r)) => Ok(Some(r.clone())),
608            (None, None) => Ok(None),
609        }
610    }
611
612    fn merge_float_override(
613        key: &str,
614        left: Option<&FloatValueType>,
615        right: Option<&FloatValueType>,
616    ) -> Result<Option<FloatValueType>, SchemaError> {
617        match (left, right) {
618            (Some(l), Some(r)) => Ok(Some(FloatValueType {
619                float_inverted_index: Self::merge_index_or_error(
620                    l.float_inverted_index.as_ref(),
621                    r.float_inverted_index.as_ref(),
622                    &format!("key '{key}' float.float_inverted_index"),
623                )?,
624            })),
625            (Some(l), None) => Ok(Some(l.clone())),
626            (None, Some(r)) => Ok(Some(r.clone())),
627            (None, None) => Ok(None),
628        }
629    }
630
631    fn merge_int_override(
632        key: &str,
633        left: Option<&IntValueType>,
634        right: Option<&IntValueType>,
635    ) -> Result<Option<IntValueType>, SchemaError> {
636        match (left, right) {
637            (Some(l), Some(r)) => Ok(Some(IntValueType {
638                int_inverted_index: Self::merge_index_or_error(
639                    l.int_inverted_index.as_ref(),
640                    r.int_inverted_index.as_ref(),
641                    &format!("key '{key}' int.int_inverted_index"),
642                )?,
643            })),
644            (Some(l), None) => Ok(Some(l.clone())),
645            (None, Some(r)) => Ok(Some(r.clone())),
646            (None, None) => Ok(None),
647        }
648    }
649
650    fn merge_bool_override(
651        key: &str,
652        left: Option<&BoolValueType>,
653        right: Option<&BoolValueType>,
654    ) -> Result<Option<BoolValueType>, SchemaError> {
655        match (left, right) {
656            (Some(l), Some(r)) => Ok(Some(BoolValueType {
657                bool_inverted_index: Self::merge_index_or_error(
658                    l.bool_inverted_index.as_ref(),
659                    r.bool_inverted_index.as_ref(),
660                    &format!("key '{key}' bool.bool_inverted_index"),
661                )?,
662            })),
663            (Some(l), None) => Ok(Some(l.clone())),
664            (None, Some(r)) => Ok(Some(r.clone())),
665            (None, None) => Ok(None),
666        }
667    }
668
669    fn merge_float_list_override(
670        key: &str,
671        left: Option<&FloatListValueType>,
672        right: Option<&FloatListValueType>,
673    ) -> Result<Option<FloatListValueType>, SchemaError> {
674        match (left, right) {
675            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
676                vector_index: Self::merge_index_or_error(
677                    l.vector_index.as_ref(),
678                    r.vector_index.as_ref(),
679                    &format!("key '{key}' float_list.vector_index"),
680                )?,
681            })),
682            (Some(l), None) => Ok(Some(l.clone())),
683            (None, Some(r)) => Ok(Some(r.clone())),
684            (None, None) => Ok(None),
685        }
686    }
687
688    fn merge_sparse_vector_override(
689        key: &str,
690        left: Option<&SparseVectorValueType>,
691        right: Option<&SparseVectorValueType>,
692    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
693        match (left, right) {
694            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
695                sparse_vector_index: Self::merge_index_or_error(
696                    l.sparse_vector_index.as_ref(),
697                    r.sparse_vector_index.as_ref(),
698                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
699                )?,
700            })),
701            (Some(l), None) => Ok(Some(l.clone())),
702            (None, Some(r)) => Ok(Some(r.clone())),
703            (None, None) => Ok(None),
704        }
705    }
706
707    fn merge_index_or_error<T: Clone + PartialEq>(
708        left: Option<&T>,
709        right: Option<&T>,
710        context: &str,
711    ) -> Result<Option<T>, SchemaError> {
712        match (left, right) {
713            (Some(l), Some(r)) => {
714                if l == r {
715                    Ok(Some(l.clone()))
716                } else {
717                    Err(SchemaError::InvalidSchema {
718                        reason: format!("Conflicting configuration for {context}"),
719                    })
720                }
721            }
722            (Some(l), None) => Ok(Some(l.clone())),
723            (None, Some(r)) => Ok(Some(r.clone())),
724            (None, None) => Ok(None),
725        }
726    }
727
728    /// Merge two ValueTypes with field-level merging
729    /// User values take precedence over default values
730    fn merge_value_types(default: &ValueTypes, user: &ValueTypes) -> Result<ValueTypes, String> {
731        // Merge float_list first
732        let float_list =
733            Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
734
735        // Validate the merged float_list (covers all merge cases)
736        if let Some(ref fl) = float_list {
737            Self::validate_float_list_value_type(fl)?;
738        }
739
740        Ok(ValueTypes {
741            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
742            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
743            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
744            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
745            float_list,
746            sparse_vector: Self::merge_sparse_vector_type(
747                default.sparse_vector.as_ref(),
748                user.sparse_vector.as_ref(),
749            )?,
750        })
751    }
752
753    /// Merge StringValueType configurations
754    fn merge_string_type(
755        default: Option<&StringValueType>,
756        user: Option<&StringValueType>,
757    ) -> Result<Option<StringValueType>, String> {
758        match (default, user) {
759            (Some(default), Some(user)) => Ok(Some(StringValueType {
760                string_inverted_index: Self::merge_string_inverted_index_type(
761                    default.string_inverted_index.as_ref(),
762                    user.string_inverted_index.as_ref(),
763                )?,
764                fts_index: Self::merge_fts_index_type(
765                    default.fts_index.as_ref(),
766                    user.fts_index.as_ref(),
767                )?,
768            })),
769            (Some(default), None) => Ok(Some(default.clone())),
770            (None, Some(user)) => Ok(Some(user.clone())),
771            (None, None) => Ok(None),
772        }
773    }
774
775    /// Merge FloatValueType configurations
776    fn merge_float_type(
777        default: Option<&FloatValueType>,
778        user: Option<&FloatValueType>,
779    ) -> Result<Option<FloatValueType>, String> {
780        match (default, user) {
781            (Some(default), Some(user)) => Ok(Some(FloatValueType {
782                float_inverted_index: Self::merge_float_inverted_index_type(
783                    default.float_inverted_index.as_ref(),
784                    user.float_inverted_index.as_ref(),
785                )?,
786            })),
787            (Some(default), None) => Ok(Some(default.clone())),
788            (None, Some(user)) => Ok(Some(user.clone())),
789            (None, None) => Ok(None),
790        }
791    }
792
793    /// Merge IntValueType configurations
794    fn merge_int_type(
795        default: Option<&IntValueType>,
796        user: Option<&IntValueType>,
797    ) -> Result<Option<IntValueType>, String> {
798        match (default, user) {
799            (Some(default), Some(user)) => Ok(Some(IntValueType {
800                int_inverted_index: Self::merge_int_inverted_index_type(
801                    default.int_inverted_index.as_ref(),
802                    user.int_inverted_index.as_ref(),
803                )?,
804            })),
805            (Some(default), None) => Ok(Some(default.clone())),
806            (None, Some(user)) => Ok(Some(user.clone())),
807            (None, None) => Ok(None),
808        }
809    }
810
811    /// Merge BoolValueType configurations
812    fn merge_bool_type(
813        default: Option<&BoolValueType>,
814        user: Option<&BoolValueType>,
815    ) -> Result<Option<BoolValueType>, String> {
816        match (default, user) {
817            (Some(default), Some(user)) => Ok(Some(BoolValueType {
818                bool_inverted_index: Self::merge_bool_inverted_index_type(
819                    default.bool_inverted_index.as_ref(),
820                    user.bool_inverted_index.as_ref(),
821                )?,
822            })),
823            (Some(default), None) => Ok(Some(default.clone())),
824            (None, Some(user)) => Ok(Some(user.clone())),
825            (None, None) => Ok(None),
826        }
827    }
828
829    /// Merge FloatListValueType configurations
830    fn merge_float_list_type(
831        default: Option<&FloatListValueType>,
832        user: Option<&FloatListValueType>,
833    ) -> Option<FloatListValueType> {
834        match (default, user) {
835            (Some(default), Some(user)) => Some(FloatListValueType {
836                vector_index: Self::merge_vector_index_type(
837                    default.vector_index.as_ref(),
838                    user.vector_index.as_ref(),
839                ),
840            }),
841            (Some(default), None) => Some(default.clone()),
842            (None, Some(user)) => Some(user.clone()),
843            (None, None) => None,
844        }
845    }
846
847    /// Merge SparseVectorValueType configurations
848    fn merge_sparse_vector_type(
849        default: Option<&SparseVectorValueType>,
850        user: Option<&SparseVectorValueType>,
851    ) -> Result<Option<SparseVectorValueType>, String> {
852        match (default, user) {
853            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
854                sparse_vector_index: Self::merge_sparse_vector_index_type(
855                    default.sparse_vector_index.as_ref(),
856                    user.sparse_vector_index.as_ref(),
857                )?,
858            })),
859            (Some(default), None) => Ok(Some(default.clone())),
860            (None, Some(user)) => Ok(Some(user.clone())),
861            (None, None) => Ok(None),
862        }
863    }
864
865    /// Merge individual index type configurations
866    fn merge_string_inverted_index_type(
867        default: Option<&StringInvertedIndexType>,
868        user: Option<&StringInvertedIndexType>,
869    ) -> Result<Option<StringInvertedIndexType>, String> {
870        match (default, user) {
871            (Some(_default), Some(user)) => {
872                Ok(Some(StringInvertedIndexType {
873                    enabled: user.enabled,       // User enabled state takes precedence
874                    config: user.config.clone(), // User config takes precedence
875                }))
876            }
877            (Some(default), None) => Ok(Some(default.clone())),
878            (None, Some(user)) => Ok(Some(user.clone())),
879            (None, None) => Ok(None),
880        }
881    }
882
883    fn merge_fts_index_type(
884        default: Option<&FtsIndexType>,
885        user: Option<&FtsIndexType>,
886    ) -> Result<Option<FtsIndexType>, String> {
887        match (default, user) {
888            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
889                enabled: user.enabled,
890                config: user.config.clone(),
891            })),
892            (Some(default), None) => Ok(Some(default.clone())),
893            (None, Some(user)) => Ok(Some(user.clone())),
894            (None, None) => Ok(None),
895        }
896    }
897
898    fn merge_float_inverted_index_type(
899        default: Option<&FloatInvertedIndexType>,
900        user: Option<&FloatInvertedIndexType>,
901    ) -> Result<Option<FloatInvertedIndexType>, String> {
902        match (default, user) {
903            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
904                enabled: user.enabled,
905                config: user.config.clone(),
906            })),
907            (Some(default), None) => Ok(Some(default.clone())),
908            (None, Some(user)) => Ok(Some(user.clone())),
909            (None, None) => Ok(None),
910        }
911    }
912
913    fn merge_int_inverted_index_type(
914        default: Option<&IntInvertedIndexType>,
915        user: Option<&IntInvertedIndexType>,
916    ) -> Result<Option<IntInvertedIndexType>, String> {
917        match (default, user) {
918            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
919                enabled: user.enabled,
920                config: user.config.clone(),
921            })),
922            (Some(default), None) => Ok(Some(default.clone())),
923            (None, Some(user)) => Ok(Some(user.clone())),
924            (None, None) => Ok(None),
925        }
926    }
927
928    fn merge_bool_inverted_index_type(
929        default: Option<&BoolInvertedIndexType>,
930        user: Option<&BoolInvertedIndexType>,
931    ) -> Result<Option<BoolInvertedIndexType>, String> {
932        match (default, user) {
933            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
934                enabled: user.enabled,
935                config: user.config.clone(),
936            })),
937            (Some(default), None) => Ok(Some(default.clone())),
938            (None, Some(user)) => Ok(Some(user.clone())),
939            (None, None) => Ok(None),
940        }
941    }
942
943    fn merge_vector_index_type(
944        default: Option<&VectorIndexType>,
945        user: Option<&VectorIndexType>,
946    ) -> Option<VectorIndexType> {
947        match (default, user) {
948            (Some(default), Some(user)) => Some(VectorIndexType {
949                enabled: user.enabled,
950                config: Self::merge_vector_index_config(&default.config, &user.config),
951            }),
952            (Some(default), None) => Some(default.clone()),
953            (None, Some(user)) => Some(user.clone()),
954            (None, None) => None,
955        }
956    }
957
958    fn merge_sparse_vector_index_type(
959        default: Option<&SparseVectorIndexType>,
960        user: Option<&SparseVectorIndexType>,
961    ) -> Result<Option<SparseVectorIndexType>, String> {
962        match (default, user) {
963            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
964                enabled: user.enabled,
965                config: Self::merge_sparse_vector_index_config(&default.config, &user.config)?,
966            })),
967            (Some(default), None) => Ok(Some(default.clone())),
968            (None, Some(user)) => Ok(Some(user.clone())),
969            (None, None) => Ok(None),
970        }
971    }
972
973    /// Validate FloatListValueType vector index configurations
974    /// This validates HNSW and SPANN configs within the merged float_list
975    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), String> {
976        if let Some(vector_index) = &float_list.vector_index {
977            if let Some(hnsw) = &vector_index.config.hnsw {
978                hnsw.validate()
979                    .map_err(|e| format!("Invalid HNSW configuration: {}", e))?;
980            }
981            if let Some(spann) = &vector_index.config.spann {
982                spann
983                    .validate()
984                    .map_err(|e| format!("Invalid SPANN configuration: {}", e))?;
985            }
986        }
987        Ok(())
988    }
989
990    /// Merge VectorIndexConfig with field-level merging
991    fn merge_vector_index_config(
992        default: &VectorIndexConfig,
993        user: &VectorIndexConfig,
994    ) -> VectorIndexConfig {
995        VectorIndexConfig {
996            space: user.space.clone().or(default.space.clone()),
997            embedding_function: user
998                .embedding_function
999                .clone()
1000                .or(default.embedding_function.clone()),
1001            source_key: user.source_key.clone().or(default.source_key.clone()),
1002            hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1003            spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1004        }
1005    }
1006
1007    /// Merge SparseVectorIndexConfig with field-level merging
1008    fn merge_sparse_vector_index_config(
1009        default: &SparseVectorIndexConfig,
1010        user: &SparseVectorIndexConfig,
1011    ) -> Result<SparseVectorIndexConfig, String> {
1012        Ok(SparseVectorIndexConfig {
1013            embedding_function: user
1014                .embedding_function
1015                .clone()
1016                .or(default.embedding_function.clone()),
1017            source_key: user.source_key.clone().or(default.source_key.clone()),
1018            bm25: user.bm25.or(default.bm25),
1019        })
1020    }
1021
1022    /// Merge HNSW configurations with field-level merging
1023    fn merge_hnsw_configs(
1024        default_hnsw: Option<&HnswIndexConfig>,
1025        user_hnsw: Option<&HnswIndexConfig>,
1026    ) -> Option<HnswIndexConfig> {
1027        match (default_hnsw, user_hnsw) {
1028            (Some(default), Some(user)) => Some(HnswIndexConfig {
1029                ef_construction: user.ef_construction.or(default.ef_construction),
1030                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1031                ef_search: user.ef_search.or(default.ef_search),
1032                num_threads: user.num_threads.or(default.num_threads),
1033                batch_size: user.batch_size.or(default.batch_size),
1034                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1035                resize_factor: user.resize_factor.or(default.resize_factor),
1036            }),
1037            (Some(default), None) => Some(default.clone()),
1038            (None, Some(user)) => Some(user.clone()),
1039            (None, None) => None,
1040        }
1041    }
1042
1043    /// Merge SPANN configurations with field-level merging
1044    fn merge_spann_configs(
1045        default_spann: Option<&SpannIndexConfig>,
1046        user_spann: Option<&SpannIndexConfig>,
1047    ) -> Option<SpannIndexConfig> {
1048        match (default_spann, user_spann) {
1049            (Some(default), Some(user)) => Some(SpannIndexConfig {
1050                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1051                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1052                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1053                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1054                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1055                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1056                split_threshold: user.split_threshold.or(default.split_threshold),
1057                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1058                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1059                reassign_neighbor_count: user
1060                    .reassign_neighbor_count
1061                    .or(default.reassign_neighbor_count),
1062                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1063                num_centers_to_merge_to: user
1064                    .num_centers_to_merge_to
1065                    .or(default.num_centers_to_merge_to),
1066                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1067                ef_construction: user.ef_construction.or(default.ef_construction),
1068                ef_search: user.ef_search.or(default.ef_search),
1069                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1070            }),
1071            (Some(default), None) => Some(default.clone()),
1072            (None, Some(user)) => Some(user.clone()),
1073            (None, None) => None,
1074        }
1075    }
1076
1077    /// Reconcile Schema with InternalCollectionConfiguration
1078    ///
1079    /// Simple reconciliation logic:
1080    /// 1. If collection config is default → return schema (schema is source of truth)
1081    /// 2. If collection config is non-default and schema is non-default → error (both set)
1082    /// 3. If collection config is non-default and schema is default → override schema with collection config
1083    pub fn reconcile_with_collection_config(
1084        schema: Schema,
1085        collection_config: InternalCollectionConfiguration,
1086    ) -> Result<Schema, String> {
1087        // 1. Check if collection config is default
1088        if collection_config.is_default() {
1089            // Collection config is default → schema is source of truth
1090            return Ok(schema);
1091        }
1092
1093        // 2. Collection config is non-default, check if schema is also non-default
1094        if !Self::is_schema_default(&schema) {
1095            // Both are non-default → error
1096            return Err(
1097                "Cannot set both collection config and schema at the same time".to_string(),
1098            );
1099        }
1100
1101        // 3. Collection config is non-default, schema is default → override schema with collection config
1102        Self::convert_collection_config_to_schema(collection_config)
1103    }
1104
1105    pub fn reconcile_schema_and_config(
1106        schema: Option<Schema>,
1107        configuration: Option<InternalCollectionConfiguration>,
1108    ) -> Result<Schema, String> {
1109        let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1110        if let Some(config) = configuration {
1111            Self::reconcile_with_collection_config(reconciled_schema, config)
1112        } else {
1113            Ok(reconciled_schema)
1114        }
1115    }
1116
1117    /// Check if schema is default by comparing it word-by-word with new_default
1118    fn is_schema_default(schema: &Schema) -> bool {
1119        // Compare with both possible default schemas (HNSW and SPANN)
1120        let default_hnsw = Schema::new_default(KnnIndex::Hnsw);
1121        let default_spann = Schema::new_default(KnnIndex::Spann);
1122
1123        schema == &default_hnsw || schema == &default_spann
1124    }
1125
1126    /// Convert InternalCollectionConfiguration to Schema
1127    fn convert_collection_config_to_schema(
1128        collection_config: InternalCollectionConfiguration,
1129    ) -> Result<Schema, String> {
1130        // Start with a default schema structure
1131        let mut schema = Schema::new_default(KnnIndex::Spann); // Default to HNSW, will be overridden
1132
1133        // Convert vector index configuration
1134        let vector_config = match collection_config.vector_index {
1135            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1136                space: Some(hnsw_config.space),
1137                embedding_function: collection_config.embedding_function,
1138                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1139                hnsw: Some(HnswIndexConfig {
1140                    ef_construction: Some(hnsw_config.ef_construction),
1141                    max_neighbors: Some(hnsw_config.max_neighbors),
1142                    ef_search: Some(hnsw_config.ef_search),
1143                    num_threads: Some(hnsw_config.num_threads),
1144                    batch_size: Some(hnsw_config.batch_size),
1145                    sync_threshold: Some(hnsw_config.sync_threshold),
1146                    resize_factor: Some(hnsw_config.resize_factor),
1147                }),
1148                spann: None,
1149            },
1150            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1151                space: Some(spann_config.space),
1152                embedding_function: collection_config.embedding_function,
1153                source_key: Some(DOCUMENT_KEY.to_string()), // Default source key
1154                hnsw: None,
1155                spann: Some(SpannIndexConfig {
1156                    search_nprobe: Some(spann_config.search_nprobe),
1157                    search_rng_factor: Some(spann_config.search_rng_factor),
1158                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1159                    nreplica_count: Some(spann_config.nreplica_count),
1160                    write_rng_factor: Some(spann_config.write_rng_factor),
1161                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1162                    split_threshold: Some(spann_config.split_threshold),
1163                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1164                    initial_lambda: Some(spann_config.initial_lambda),
1165                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1166                    merge_threshold: Some(spann_config.merge_threshold),
1167                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1168                    write_nprobe: Some(spann_config.write_nprobe),
1169                    ef_construction: Some(spann_config.ef_construction),
1170                    ef_search: Some(spann_config.ef_search),
1171                    max_neighbors: Some(spann_config.max_neighbors),
1172                }),
1173            },
1174        };
1175
1176        // Update defaults (keep enabled=false, just update the config)
1177        // This serves as the template for any new float_list fields
1178        if let Some(float_list) = &mut schema.defaults.float_list {
1179            if let Some(vector_index) = &mut float_list.vector_index {
1180                vector_index.config = vector_config.clone();
1181            }
1182        }
1183
1184        // Update the vector_index in the existing #embedding key override
1185        // Keep enabled=true (already set by new_default) and update the config
1186        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1187            if let Some(float_list) = &mut embedding_types.float_list {
1188                if let Some(vector_index) = &mut float_list.vector_index {
1189                    vector_index.config = vector_config;
1190                }
1191            }
1192        }
1193
1194        Ok(schema)
1195    }
1196
1197    /// Check if a specific metadata key-value should be indexed based on schema configuration
1198    pub fn is_metadata_type_index_enabled(
1199        &self,
1200        key: &str,
1201        value_type: MetadataValueType,
1202    ) -> Result<bool, SchemaError> {
1203        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1204
1205        match value_type {
1206            MetadataValueType::Bool => match &v_type.boolean {
1207                Some(bool_type) => match &bool_type.bool_inverted_index {
1208                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1209                    None => Err(SchemaError::MissingIndexConfiguration {
1210                        key: key.to_string(),
1211                        value_type: "bool".to_string(),
1212                    }),
1213                },
1214                None => match &self.defaults.boolean {
1215                    Some(bool_type) => match &bool_type.bool_inverted_index {
1216                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1217                        None => Err(SchemaError::MissingIndexConfiguration {
1218                            key: key.to_string(),
1219                            value_type: "bool".to_string(),
1220                        }),
1221                    },
1222                    None => Err(SchemaError::MissingIndexConfiguration {
1223                        key: key.to_string(),
1224                        value_type: "bool".to_string(),
1225                    }),
1226                },
1227            },
1228            MetadataValueType::Int => match &v_type.int {
1229                Some(int_type) => match &int_type.int_inverted_index {
1230                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1231                    None => Err(SchemaError::MissingIndexConfiguration {
1232                        key: key.to_string(),
1233                        value_type: "int".to_string(),
1234                    }),
1235                },
1236                None => match &self.defaults.int {
1237                    Some(int_type) => match &int_type.int_inverted_index {
1238                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1239                        None => Err(SchemaError::MissingIndexConfiguration {
1240                            key: key.to_string(),
1241                            value_type: "int".to_string(),
1242                        }),
1243                    },
1244                    None => Err(SchemaError::MissingIndexConfiguration {
1245                        key: key.to_string(),
1246                        value_type: "int".to_string(),
1247                    }),
1248                },
1249            },
1250            MetadataValueType::Float => match &v_type.float {
1251                Some(float_type) => match &float_type.float_inverted_index {
1252                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1253                    None => Err(SchemaError::MissingIndexConfiguration {
1254                        key: key.to_string(),
1255                        value_type: "float".to_string(),
1256                    }),
1257                },
1258                None => match &self.defaults.float {
1259                    Some(float_type) => match &float_type.float_inverted_index {
1260                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1261                        None => Err(SchemaError::MissingIndexConfiguration {
1262                            key: key.to_string(),
1263                            value_type: "float".to_string(),
1264                        }),
1265                    },
1266                    None => Err(SchemaError::MissingIndexConfiguration {
1267                        key: key.to_string(),
1268                        value_type: "float".to_string(),
1269                    }),
1270                },
1271            },
1272            MetadataValueType::Str => match &v_type.string {
1273                Some(string_type) => match &string_type.string_inverted_index {
1274                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1275                    None => Err(SchemaError::MissingIndexConfiguration {
1276                        key: key.to_string(),
1277                        value_type: "string".to_string(),
1278                    }),
1279                },
1280                None => match &self.defaults.string {
1281                    Some(string_type) => match &string_type.string_inverted_index {
1282                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1283                        None => Err(SchemaError::MissingIndexConfiguration {
1284                            key: key.to_string(),
1285                            value_type: "string".to_string(),
1286                        }),
1287                    },
1288                    None => Err(SchemaError::MissingIndexConfiguration {
1289                        key: key.to_string(),
1290                        value_type: "string".to_string(),
1291                    }),
1292                },
1293            },
1294            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1295                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1296                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1297                    None => Err(SchemaError::MissingIndexConfiguration {
1298                        key: key.to_string(),
1299                        value_type: "sparse_vector".to_string(),
1300                    }),
1301                },
1302                None => match &self.defaults.sparse_vector {
1303                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1304                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1305                        None => Err(SchemaError::MissingIndexConfiguration {
1306                            key: key.to_string(),
1307                            value_type: "sparse_vector".to_string(),
1308                        }),
1309                    },
1310                    None => Err(SchemaError::MissingIndexConfiguration {
1311                        key: key.to_string(),
1312                        value_type: "sparse_vector".to_string(),
1313                    }),
1314                },
1315            },
1316        }
1317    }
1318
1319    pub fn is_metadata_where_indexing_enabled(
1320        &self,
1321        where_clause: &Where,
1322    ) -> Result<(), FilterValidationError> {
1323        match where_clause {
1324            Where::Composite(composite) => {
1325                for child in &composite.children {
1326                    self.is_metadata_where_indexing_enabled(child)?;
1327                }
1328                Ok(())
1329            }
1330            Where::Document(_) => Ok(()),
1331            Where::Metadata(expression) => {
1332                let value_type = match &expression.comparison {
1333                    MetadataComparison::Primitive(_, value) => value.value_type(),
1334                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1335                };
1336                let is_enabled = self
1337                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1338                    .map_err(FilterValidationError::Schema)?;
1339                if !is_enabled {
1340                    return Err(FilterValidationError::IndexingDisabled {
1341                        key: expression.key.clone(),
1342                        value_type,
1343                    });
1344                }
1345                Ok(())
1346            }
1347        }
1348    }
1349
1350    pub fn is_knn_key_indexing_enabled(
1351        &self,
1352        key: &str,
1353        query: &QueryVector,
1354    ) -> Result<(), FilterValidationError> {
1355        match query {
1356            QueryVector::Sparse(_) => {
1357                let is_enabled = self
1358                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1359                    .map_err(FilterValidationError::Schema)?;
1360                if !is_enabled {
1361                    return Err(FilterValidationError::IndexingDisabled {
1362                        key: key.to_string(),
1363                        value_type: MetadataValueType::SparseVector,
1364                    });
1365                }
1366                Ok(())
1367            }
1368            QueryVector::Dense(_) => {
1369                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1370                // Dense vectors are always indexed
1371                Ok(())
1372            }
1373        }
1374    }
1375
1376    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1377        let value_types = self.keys.entry(key.to_string()).or_default();
1378        match value_type {
1379            MetadataValueType::Bool => {
1380                if value_types.boolean.is_none() {
1381                    value_types.boolean = self.defaults.boolean.clone();
1382                    return true;
1383                }
1384            }
1385            MetadataValueType::Int => {
1386                if value_types.int.is_none() {
1387                    value_types.int = self.defaults.int.clone();
1388                    return true;
1389                }
1390            }
1391            MetadataValueType::Float => {
1392                if value_types.float.is_none() {
1393                    value_types.float = self.defaults.float.clone();
1394                    return true;
1395                }
1396            }
1397            MetadataValueType::Str => {
1398                if value_types.string.is_none() {
1399                    value_types.string = self.defaults.string.clone();
1400                    return true;
1401                }
1402            }
1403            MetadataValueType::SparseVector => {
1404                if value_types.sparse_vector.is_none() {
1405                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
1406                    return true;
1407                }
1408            }
1409        }
1410        false
1411    }
1412}
1413
1414// ============================================================================
1415// INDEX CONFIGURATION STRUCTURES
1416// ============================================================================
1417
1418#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1419#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1420#[serde(deny_unknown_fields)]
1421pub struct VectorIndexConfig {
1422    /// Vector space for similarity calculation (cosine, l2, ip)
1423    #[serde(skip_serializing_if = "Option::is_none")]
1424    pub space: Option<Space>,
1425    /// Embedding function configuration
1426    #[serde(skip_serializing_if = "Option::is_none")]
1427    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1428    /// Key to source the vector from
1429    #[serde(skip_serializing_if = "Option::is_none")]
1430    pub source_key: Option<String>,
1431    /// HNSW algorithm configuration
1432    #[serde(skip_serializing_if = "Option::is_none")]
1433    pub hnsw: Option<HnswIndexConfig>,
1434    /// SPANN algorithm configuration
1435    #[serde(skip_serializing_if = "Option::is_none")]
1436    pub spann: Option<SpannIndexConfig>,
1437}
1438
1439/// Configuration for HNSW vector index algorithm parameters
1440#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1441#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1442#[serde(deny_unknown_fields)]
1443pub struct HnswIndexConfig {
1444    #[serde(skip_serializing_if = "Option::is_none")]
1445    pub ef_construction: Option<usize>,
1446    #[serde(skip_serializing_if = "Option::is_none")]
1447    pub max_neighbors: Option<usize>,
1448    #[serde(skip_serializing_if = "Option::is_none")]
1449    pub ef_search: Option<usize>,
1450    #[serde(skip_serializing_if = "Option::is_none")]
1451    pub num_threads: Option<usize>,
1452    #[serde(skip_serializing_if = "Option::is_none")]
1453    #[validate(range(min = 2))]
1454    pub batch_size: Option<usize>,
1455    #[serde(skip_serializing_if = "Option::is_none")]
1456    #[validate(range(min = 2))]
1457    pub sync_threshold: Option<usize>,
1458    #[serde(skip_serializing_if = "Option::is_none")]
1459    pub resize_factor: Option<f64>,
1460}
1461
1462/// Configuration for SPANN vector index algorithm parameters
1463#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1464#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1465#[serde(deny_unknown_fields)]
1466pub struct SpannIndexConfig {
1467    #[serde(skip_serializing_if = "Option::is_none")]
1468    #[validate(range(max = 128))]
1469    pub search_nprobe: Option<u32>,
1470    #[serde(skip_serializing_if = "Option::is_none")]
1471    #[validate(range(min = 1.0, max = 1.0))]
1472    pub search_rng_factor: Option<f32>,
1473    #[serde(skip_serializing_if = "Option::is_none")]
1474    #[validate(range(min = 5.0, max = 10.0))]
1475    pub search_rng_epsilon: Option<f32>,
1476    #[serde(skip_serializing_if = "Option::is_none")]
1477    #[validate(range(max = 8))]
1478    pub nreplica_count: Option<u32>,
1479    #[serde(skip_serializing_if = "Option::is_none")]
1480    #[validate(range(min = 1.0, max = 1.0))]
1481    pub write_rng_factor: Option<f32>,
1482    #[serde(skip_serializing_if = "Option::is_none")]
1483    #[validate(range(min = 5.0, max = 10.0))]
1484    pub write_rng_epsilon: Option<f32>,
1485    #[serde(skip_serializing_if = "Option::is_none")]
1486    #[validate(range(min = 50, max = 200))]
1487    pub split_threshold: Option<u32>,
1488    #[serde(skip_serializing_if = "Option::is_none")]
1489    #[validate(range(max = 1000))]
1490    pub num_samples_kmeans: Option<usize>,
1491    #[serde(skip_serializing_if = "Option::is_none")]
1492    #[validate(range(min = 100.0, max = 100.0))]
1493    pub initial_lambda: Option<f32>,
1494    #[serde(skip_serializing_if = "Option::is_none")]
1495    #[validate(range(max = 64))]
1496    pub reassign_neighbor_count: Option<u32>,
1497    #[serde(skip_serializing_if = "Option::is_none")]
1498    #[validate(range(min = 25, max = 100))]
1499    pub merge_threshold: Option<u32>,
1500    #[serde(skip_serializing_if = "Option::is_none")]
1501    #[validate(range(max = 8))]
1502    pub num_centers_to_merge_to: Option<u32>,
1503    #[serde(skip_serializing_if = "Option::is_none")]
1504    #[validate(range(max = 64))]
1505    pub write_nprobe: Option<u32>,
1506    #[serde(skip_serializing_if = "Option::is_none")]
1507    #[validate(range(max = 200))]
1508    pub ef_construction: Option<usize>,
1509    #[serde(skip_serializing_if = "Option::is_none")]
1510    #[validate(range(max = 200))]
1511    pub ef_search: Option<usize>,
1512    #[serde(skip_serializing_if = "Option::is_none")]
1513    #[validate(range(max = 64))]
1514    pub max_neighbors: Option<usize>,
1515}
1516
1517#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1518#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1519#[serde(deny_unknown_fields)]
1520pub struct SparseVectorIndexConfig {
1521    /// Embedding function configuration
1522    #[serde(skip_serializing_if = "Option::is_none")]
1523    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1524    /// Key to source the sparse vector from
1525    #[serde(skip_serializing_if = "Option::is_none")]
1526    pub source_key: Option<String>,
1527    /// Whether this embedding is BM25
1528    #[serde(skip_serializing_if = "Option::is_none")]
1529    pub bm25: Option<bool>,
1530}
1531
1532#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1533#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1534#[serde(deny_unknown_fields)]
1535pub struct FtsIndexConfig {
1536    // FTS index typically has no additional parameters
1537}
1538
1539#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1540#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1541#[serde(deny_unknown_fields)]
1542pub struct StringInvertedIndexConfig {
1543    // String inverted index typically has no additional parameters
1544}
1545
1546#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1547#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1548#[serde(deny_unknown_fields)]
1549pub struct IntInvertedIndexConfig {
1550    // Integer inverted index typically has no additional parameters
1551}
1552
1553#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1554#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1555#[serde(deny_unknown_fields)]
1556pub struct FloatInvertedIndexConfig {
1557    // Float inverted index typically has no additional parameters
1558}
1559
1560#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1561#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1562#[serde(deny_unknown_fields)]
1563pub struct BoolInvertedIndexConfig {
1564    // Boolean inverted index typically has no additional parameters
1565}
1566
1567#[cfg(test)]
1568mod tests {
1569    use super::*;
1570    use crate::hnsw_configuration::Space;
1571    use crate::metadata::SparseVector;
1572    use crate::{
1573        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
1574    };
1575    use serde_json::json;
1576
1577    #[test]
1578    fn test_reconcile_with_defaults_none_user_schema() {
1579        // Test that when no user schema is provided, we get the default schema
1580        let result = Schema::reconcile_with_defaults(None).unwrap();
1581        let expected = Schema::new_default(KnnIndex::Spann);
1582        assert_eq!(result, expected);
1583    }
1584
1585    #[test]
1586    fn test_reconcile_with_defaults_empty_user_schema() {
1587        // Test merging with an empty user schema
1588        let user_schema = Schema {
1589            defaults: ValueTypes::default(),
1590            keys: HashMap::new(),
1591        };
1592
1593        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1594        let expected = Schema::new_default(KnnIndex::Spann);
1595        assert_eq!(result, expected);
1596    }
1597
1598    #[test]
1599    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
1600        // Test that user can override string inverted index enabled state
1601        let mut user_schema = Schema {
1602            defaults: ValueTypes::default(),
1603            keys: HashMap::new(),
1604        };
1605
1606        user_schema.defaults.string = Some(StringValueType {
1607            string_inverted_index: Some(StringInvertedIndexType {
1608                enabled: false, // Override default (true) to false
1609                config: StringInvertedIndexConfig {},
1610            }),
1611            fts_index: None,
1612        });
1613
1614        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1615
1616        // Check that the user override took precedence
1617        assert!(
1618            !result
1619                .defaults
1620                .string
1621                .as_ref()
1622                .unwrap()
1623                .string_inverted_index
1624                .as_ref()
1625                .unwrap()
1626                .enabled
1627        );
1628        // Check that other defaults are still present
1629        assert!(result.defaults.float.is_some());
1630        assert!(result.defaults.int.is_some());
1631    }
1632
1633    #[test]
1634    fn test_reconcile_with_defaults_user_overrides_vector_config() {
1635        // Test field-level merging for vector configurations
1636        let mut user_schema = Schema {
1637            defaults: ValueTypes::default(),
1638            keys: HashMap::new(),
1639        };
1640
1641        user_schema.defaults.float_list = Some(FloatListValueType {
1642            vector_index: Some(VectorIndexType {
1643                enabled: true, // Enable vector index (default is false)
1644                config: VectorIndexConfig {
1645                    space: Some(Space::L2),                     // Override default space
1646                    embedding_function: None,                   // Will use default
1647                    source_key: Some("custom_key".to_string()), // Override default
1648                    hnsw: Some(HnswIndexConfig {
1649                        ef_construction: Some(500), // Override default
1650                        max_neighbors: None,        // Will use default
1651                        ef_search: None,            // Will use default
1652                        num_threads: None,
1653                        batch_size: None,
1654                        sync_threshold: None,
1655                        resize_factor: None,
1656                    }),
1657                    spann: None,
1658                },
1659            }),
1660        });
1661
1662        // Use HNSW defaults for this test so we have HNSW config to merge with
1663        let result = {
1664            let default_schema = Schema::new_default(KnnIndex::Hnsw);
1665            let merged_defaults =
1666                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
1667            let mut merged_keys = default_schema.keys.clone();
1668            for (key, user_value_types) in user_schema.keys {
1669                if let Some(default_value_types) = merged_keys.get(&key) {
1670                    let merged_value_types =
1671                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
1672                    merged_keys.insert(key, merged_value_types);
1673                } else {
1674                    merged_keys.insert(key, user_value_types);
1675                }
1676            }
1677            Schema {
1678                defaults: merged_defaults,
1679                keys: merged_keys,
1680            }
1681        };
1682
1683        let vector_config = &result
1684            .defaults
1685            .float_list
1686            .as_ref()
1687            .unwrap()
1688            .vector_index
1689            .as_ref()
1690            .unwrap()
1691            .config;
1692
1693        // Check user overrides took precedence
1694        assert_eq!(vector_config.space, Some(Space::L2));
1695        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
1696        assert_eq!(
1697            vector_config.hnsw.as_ref().unwrap().ef_construction,
1698            Some(500)
1699        );
1700
1701        // Check defaults were preserved for unspecified fields
1702        assert_eq!(
1703            vector_config.embedding_function,
1704            Some(EmbeddingFunctionConfiguration::Legacy)
1705        );
1706        // Since user provided HNSW config, the default max_neighbors should be merged in
1707        assert_eq!(
1708            vector_config.hnsw.as_ref().unwrap().max_neighbors,
1709            Some(default_m())
1710        );
1711    }
1712
1713    #[test]
1714    fn test_reconcile_with_defaults_keys() {
1715        // Test that key overrides are properly merged
1716        let mut user_schema = Schema {
1717            defaults: ValueTypes::default(),
1718            keys: HashMap::new(),
1719        };
1720
1721        // Add a custom key override
1722        let custom_key_types = ValueTypes {
1723            string: Some(StringValueType {
1724                fts_index: Some(FtsIndexType {
1725                    enabled: true,
1726                    config: FtsIndexConfig {},
1727                }),
1728                string_inverted_index: Some(StringInvertedIndexType {
1729                    enabled: false,
1730                    config: StringInvertedIndexConfig {},
1731                }),
1732            }),
1733            ..Default::default()
1734        };
1735        user_schema
1736            .keys
1737            .insert("custom_key".to_string(), custom_key_types);
1738
1739        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1740
1741        // Check that default key overrides are preserved
1742        assert!(result.keys.contains_key(EMBEDDING_KEY));
1743        assert!(result.keys.contains_key(DOCUMENT_KEY));
1744
1745        // Check that user key override was added
1746        assert!(result.keys.contains_key("custom_key"));
1747        let custom_override = result.keys.get("custom_key").unwrap();
1748        assert!(
1749            custom_override
1750                .string
1751                .as_ref()
1752                .unwrap()
1753                .fts_index
1754                .as_ref()
1755                .unwrap()
1756                .enabled
1757        );
1758    }
1759
1760    #[test]
1761    fn test_reconcile_with_defaults_override_existing_key() {
1762        // Test overriding an existing key override (like #embedding)
1763        let mut user_schema = Schema {
1764            defaults: ValueTypes::default(),
1765            keys: HashMap::new(),
1766        };
1767
1768        // Override the #embedding key with custom settings
1769        let embedding_override = ValueTypes {
1770            float_list: Some(FloatListValueType {
1771                vector_index: Some(VectorIndexType {
1772                    enabled: false, // Override default enabled=true to false
1773                    config: VectorIndexConfig {
1774                        space: Some(Space::Ip), // Override default space
1775                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1776                        source_key: Some("custom_embedding_key".to_string()),
1777                        hnsw: None,
1778                        spann: None,
1779                    },
1780                }),
1781            }),
1782            ..Default::default()
1783        };
1784        user_schema
1785            .keys
1786            .insert(EMBEDDING_KEY.to_string(), embedding_override);
1787
1788        let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1789
1790        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
1791        let vector_config = &embedding_config
1792            .float_list
1793            .as_ref()
1794            .unwrap()
1795            .vector_index
1796            .as_ref()
1797            .unwrap();
1798
1799        // Check user overrides took precedence
1800        assert!(!vector_config.enabled);
1801        assert_eq!(vector_config.config.space, Some(Space::Ip));
1802        assert_eq!(
1803            vector_config.config.source_key,
1804            Some("custom_embedding_key".to_string())
1805        );
1806    }
1807
1808    #[test]
1809    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
1810        let collection_config = InternalCollectionConfiguration {
1811            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
1812                space: Space::Cosine,
1813                ef_construction: 128,
1814                ef_search: 96,
1815                max_neighbors: 42,
1816                num_threads: 8,
1817                resize_factor: 1.5,
1818                sync_threshold: 2_000,
1819                batch_size: 256,
1820            }),
1821            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1822                EmbeddingFunctionNewConfiguration {
1823                    name: "custom".to_string(),
1824                    config: json!({"alpha": 1}),
1825                },
1826            )),
1827        };
1828
1829        let schema =
1830            Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1831        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1832
1833        assert_eq!(reconstructed, collection_config);
1834    }
1835
1836    #[test]
1837    fn test_convert_schema_to_collection_config_spann_roundtrip() {
1838        let spann_config = InternalSpannConfiguration {
1839            space: Space::Cosine,
1840            search_nprobe: 11,
1841            search_rng_factor: 1.7,
1842            write_nprobe: 5,
1843            nreplica_count: 3,
1844            split_threshold: 150,
1845            merge_threshold: 80,
1846            ef_construction: 120,
1847            ef_search: 90,
1848            max_neighbors: 40,
1849            ..Default::default()
1850        };
1851
1852        let collection_config = InternalCollectionConfiguration {
1853            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
1854            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1855                EmbeddingFunctionNewConfiguration {
1856                    name: "custom".to_string(),
1857                    config: json!({"beta": true}),
1858                },
1859            )),
1860        };
1861
1862        let schema =
1863            Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1864        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1865
1866        assert_eq!(reconstructed, collection_config);
1867    }
1868
1869    #[test]
1870    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
1871        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1872        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
1873            if let Some(float_list) = &mut embedding.float_list {
1874                if let Some(vector_index) = &mut float_list.vector_index {
1875                    vector_index.config.spann = Some(SpannIndexConfig {
1876                        search_nprobe: Some(1),
1877                        search_rng_factor: Some(1.0),
1878                        search_rng_epsilon: Some(0.1),
1879                        nreplica_count: Some(1),
1880                        write_rng_factor: Some(1.0),
1881                        write_rng_epsilon: Some(0.1),
1882                        split_threshold: Some(100),
1883                        num_samples_kmeans: Some(10),
1884                        initial_lambda: Some(0.5),
1885                        reassign_neighbor_count: Some(10),
1886                        merge_threshold: Some(50),
1887                        num_centers_to_merge_to: Some(3),
1888                        write_nprobe: Some(1),
1889                        ef_construction: Some(50),
1890                        ef_search: Some(40),
1891                        max_neighbors: Some(20),
1892                    });
1893                }
1894            }
1895        }
1896
1897        let result = InternalCollectionConfiguration::try_from(&schema);
1898        assert!(result.is_err());
1899    }
1900
1901    #[test]
1902    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
1903        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1904        let before = schema.clone();
1905        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
1906        assert!(!modified);
1907        assert_eq!(schema, before);
1908    }
1909
1910    #[test]
1911    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
1912        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1913        assert!(!schema.keys.contains_key("custom_field"));
1914
1915        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1916
1917        assert!(modified);
1918        let entry = schema
1919            .keys
1920            .get("custom_field")
1921            .expect("expected new key override to be inserted");
1922        assert_eq!(entry.boolean, schema.defaults.boolean);
1923        assert!(entry.string.is_none());
1924        assert!(entry.int.is_none());
1925        assert!(entry.float.is_none());
1926        assert!(entry.float_list.is_none());
1927        assert!(entry.sparse_vector.is_none());
1928    }
1929
1930    #[test]
1931    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
1932        let mut schema = Schema::new_default(KnnIndex::Hnsw);
1933        let initial_len = schema.keys.len();
1934        schema.keys.insert(
1935            "custom_field".to_string(),
1936            ValueTypes {
1937                string: schema.defaults.string.clone(),
1938                ..Default::default()
1939            },
1940        );
1941
1942        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1943
1944        assert!(modified);
1945        assert_eq!(schema.keys.len(), initial_len + 1);
1946        let entry = schema
1947            .keys
1948            .get("custom_field")
1949            .expect("expected key override to exist after ensure call");
1950        assert!(entry.string.is_some());
1951        assert_eq!(entry.boolean, schema.defaults.boolean);
1952    }
1953
1954    #[test]
1955    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
1956        let schema = Schema::new_default(KnnIndex::Spann);
1957        let result = schema.is_knn_key_indexing_enabled(
1958            "custom_sparse",
1959            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1960        );
1961
1962        let err = result.expect_err("expected indexing disabled error");
1963        match err {
1964            FilterValidationError::IndexingDisabled { key, value_type } => {
1965                assert_eq!(key, "custom_sparse");
1966                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
1967            }
1968            other => panic!("unexpected error variant: {other:?}"),
1969        }
1970    }
1971
1972    #[test]
1973    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
1974        let mut schema = Schema::new_default(KnnIndex::Spann);
1975        schema.keys.insert(
1976            "sparse_enabled".to_string(),
1977            ValueTypes {
1978                sparse_vector: Some(SparseVectorValueType {
1979                    sparse_vector_index: Some(SparseVectorIndexType {
1980                        enabled: true,
1981                        config: SparseVectorIndexConfig {
1982                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1983                            source_key: None,
1984                            bm25: None,
1985                        },
1986                    }),
1987                }),
1988                ..Default::default()
1989            },
1990        );
1991
1992        let result = schema.is_knn_key_indexing_enabled(
1993            "sparse_enabled",
1994            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1995        );
1996
1997        assert!(result.is_ok());
1998    }
1999
2000    #[test]
2001    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2002        let schema = Schema::new_default(KnnIndex::Spann);
2003        let result = schema.is_knn_key_indexing_enabled(
2004            EMBEDDING_KEY,
2005            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2006        );
2007
2008        assert!(result.is_ok());
2009    }
2010
2011    #[test]
2012    fn test_merge_hnsw_configs_field_level() {
2013        // Test field-level merging for HNSW configurations
2014        let default_hnsw = HnswIndexConfig {
2015            ef_construction: Some(200),
2016            max_neighbors: Some(16),
2017            ef_search: Some(10),
2018            num_threads: Some(4),
2019            batch_size: Some(100),
2020            sync_threshold: Some(1000),
2021            resize_factor: Some(1.2),
2022        };
2023
2024        let user_hnsw = HnswIndexConfig {
2025            ef_construction: Some(300), // Override
2026            max_neighbors: None,        // Will use default
2027            ef_search: Some(20),        // Override
2028            num_threads: None,          // Will use default
2029            batch_size: None,           // Will use default
2030            sync_threshold: Some(2000), // Override
2031            resize_factor: None,        // Will use default
2032        };
2033
2034        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2035
2036        // Check user overrides
2037        assert_eq!(result.ef_construction, Some(300));
2038        assert_eq!(result.ef_search, Some(20));
2039        assert_eq!(result.sync_threshold, Some(2000));
2040
2041        // Check defaults preserved
2042        assert_eq!(result.max_neighbors, Some(16));
2043        assert_eq!(result.num_threads, Some(4));
2044        assert_eq!(result.batch_size, Some(100));
2045        assert_eq!(result.resize_factor, Some(1.2));
2046    }
2047
2048    #[test]
2049    fn test_merge_spann_configs_field_level() {
2050        // Test field-level merging for SPANN configurations
2051        let default_spann = SpannIndexConfig {
2052            search_nprobe: Some(10),
2053            search_rng_factor: Some(1.0),  // Must be exactly 1.0
2054            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
2055            nreplica_count: Some(3),
2056            write_rng_factor: Some(1.0),  // Must be exactly 1.0
2057            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
2058            split_threshold: Some(100),   // Must be 50-200
2059            num_samples_kmeans: Some(100),
2060            initial_lambda: Some(100.0), // Must be exactly 100.0
2061            reassign_neighbor_count: Some(50),
2062            merge_threshold: Some(50),        // Must be 25-100
2063            num_centers_to_merge_to: Some(4), // Max is 8
2064            write_nprobe: Some(5),
2065            ef_construction: Some(100),
2066            ef_search: Some(10),
2067            max_neighbors: Some(16),
2068        };
2069
2070        let user_spann = SpannIndexConfig {
2071            search_nprobe: Some(20),       // Override
2072            search_rng_factor: None,       // Will use default
2073            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
2074            nreplica_count: None,          // Will use default
2075            write_rng_factor: None,
2076            write_rng_epsilon: None,
2077            split_threshold: Some(150), // Override (valid: 50-200)
2078            num_samples_kmeans: None,
2079            initial_lambda: None,
2080            reassign_neighbor_count: None,
2081            merge_threshold: None,
2082            num_centers_to_merge_to: None,
2083            write_nprobe: None,
2084            ef_construction: None,
2085            ef_search: None,
2086            max_neighbors: None,
2087        };
2088
2089        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
2090
2091        // Check user overrides
2092        assert_eq!(result.search_nprobe, Some(20));
2093        assert_eq!(result.search_rng_epsilon, Some(8.0));
2094        assert_eq!(result.split_threshold, Some(150));
2095
2096        // Check defaults preserved
2097        assert_eq!(result.search_rng_factor, Some(1.0));
2098        assert_eq!(result.nreplica_count, Some(3));
2099        assert_eq!(result.initial_lambda, Some(100.0));
2100    }
2101
2102    #[test]
2103    fn test_spann_index_config_into_internal_configuration() {
2104        let config = SpannIndexConfig {
2105            search_nprobe: Some(33),
2106            search_rng_factor: Some(1.2),
2107            search_rng_epsilon: None,
2108            nreplica_count: None,
2109            write_rng_factor: Some(1.5),
2110            write_rng_epsilon: None,
2111            split_threshold: Some(75),
2112            num_samples_kmeans: None,
2113            initial_lambda: Some(0.9),
2114            reassign_neighbor_count: Some(40),
2115            merge_threshold: None,
2116            num_centers_to_merge_to: Some(4),
2117            write_nprobe: Some(60),
2118            ef_construction: Some(180),
2119            ef_search: Some(170),
2120            max_neighbors: Some(32),
2121        };
2122
2123        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
2124        assert_eq!(with_space.space, Space::Cosine);
2125        assert_eq!(with_space.search_nprobe, 33);
2126        assert_eq!(with_space.search_rng_factor, 1.2);
2127        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2128        assert_eq!(with_space.write_rng_factor, 1.5);
2129        assert_eq!(with_space.write_nprobe, 60);
2130        assert_eq!(with_space.ef_construction, 180);
2131        assert_eq!(with_space.ef_search, 170);
2132        assert_eq!(with_space.max_neighbors, 32);
2133        assert_eq!(with_space.merge_threshold, default_merge_threshold());
2134
2135        let default_space_config: InternalSpannConfiguration = (None, &config).into();
2136        assert_eq!(default_space_config.space, default_space());
2137    }
2138
2139    #[test]
2140    fn test_merge_string_type_combinations() {
2141        // Test all combinations of default and user StringValueType
2142
2143        // Both Some - should merge
2144        let default = StringValueType {
2145            string_inverted_index: Some(StringInvertedIndexType {
2146                enabled: true,
2147                config: StringInvertedIndexConfig {},
2148            }),
2149            fts_index: Some(FtsIndexType {
2150                enabled: false,
2151                config: FtsIndexConfig {},
2152            }),
2153        };
2154
2155        let user = StringValueType {
2156            string_inverted_index: Some(StringInvertedIndexType {
2157                enabled: false, // Override
2158                config: StringInvertedIndexConfig {},
2159            }),
2160            fts_index: None, // Will use default
2161        };
2162
2163        let result = Schema::merge_string_type(Some(&default), Some(&user))
2164            .unwrap()
2165            .unwrap();
2166        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
2167        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
2168
2169        // Default Some, User None - should return default
2170        let result = Schema::merge_string_type(Some(&default), None)
2171            .unwrap()
2172            .unwrap();
2173        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2174
2175        // Default None, User Some - should return user
2176        let result = Schema::merge_string_type(None, Some(&user))
2177            .unwrap()
2178            .unwrap();
2179        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2180
2181        // Both None - should return None
2182        let result = Schema::merge_string_type(None, None).unwrap();
2183        assert!(result.is_none());
2184    }
2185
2186    #[test]
2187    fn test_merge_vector_index_config_comprehensive() {
2188        // Test comprehensive vector index config merging
2189        let default_config = VectorIndexConfig {
2190            space: Some(Space::Cosine),
2191            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2192            source_key: Some("default_key".to_string()),
2193            hnsw: Some(HnswIndexConfig {
2194                ef_construction: Some(200),
2195                max_neighbors: Some(16),
2196                ef_search: Some(10),
2197                num_threads: Some(4),
2198                batch_size: Some(100),
2199                sync_threshold: Some(1000),
2200                resize_factor: Some(1.2),
2201            }),
2202            spann: None,
2203        };
2204
2205        let user_config = VectorIndexConfig {
2206            space: Some(Space::L2),                   // Override
2207            embedding_function: None,                 // Will use default
2208            source_key: Some("user_key".to_string()), // Override
2209            hnsw: Some(HnswIndexConfig {
2210                ef_construction: Some(300), // Override
2211                max_neighbors: None,        // Will use default
2212                ef_search: None,            // Will use default
2213                num_threads: None,
2214                batch_size: None,
2215                sync_threshold: None,
2216                resize_factor: None,
2217            }),
2218            spann: Some(SpannIndexConfig {
2219                search_nprobe: Some(15),
2220                search_rng_factor: None,
2221                search_rng_epsilon: None,
2222                nreplica_count: None,
2223                write_rng_factor: None,
2224                write_rng_epsilon: None,
2225                split_threshold: None,
2226                num_samples_kmeans: None,
2227                initial_lambda: None,
2228                reassign_neighbor_count: None,
2229                merge_threshold: None,
2230                num_centers_to_merge_to: None,
2231                write_nprobe: None,
2232                ef_construction: None,
2233                ef_search: None,
2234                max_neighbors: None,
2235            }), // Add SPANN config
2236        };
2237
2238        let result = Schema::merge_vector_index_config(&default_config, &user_config);
2239
2240        // Check field-level merging
2241        assert_eq!(result.space, Some(Space::L2)); // User override
2242        assert_eq!(
2243            result.embedding_function,
2244            Some(EmbeddingFunctionConfiguration::Legacy)
2245        ); // Default preserved
2246        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
2247
2248        // Check HNSW merging
2249        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
2250        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
2251
2252        // Check SPANN was added from user
2253        assert!(result.spann.is_some());
2254        assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2255    }
2256
2257    #[test]
2258    fn test_merge_sparse_vector_index_config() {
2259        // Test sparse vector index config merging
2260        let default_config = SparseVectorIndexConfig {
2261            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2262            source_key: Some("default_sparse_key".to_string()),
2263            bm25: None,
2264        };
2265
2266        let user_config = SparseVectorIndexConfig {
2267            embedding_function: None,                        // Will use default
2268            source_key: Some("user_sparse_key".to_string()), // Override
2269            bm25: None,
2270        };
2271
2272        let result =
2273            Schema::merge_sparse_vector_index_config(&default_config, &user_config).unwrap();
2274
2275        // Check user override
2276        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2277        // Check default preserved
2278        assert_eq!(
2279            result.embedding_function,
2280            Some(EmbeddingFunctionConfiguration::Legacy)
2281        );
2282    }
2283
2284    #[test]
2285    fn test_complex_nested_merging_scenario() {
2286        // Test a complex scenario with multiple levels of merging
2287        let mut user_schema = Schema {
2288            defaults: ValueTypes::default(),
2289            keys: HashMap::new(),
2290        };
2291
2292        // Set up complex user defaults
2293        user_schema.defaults.string = Some(StringValueType {
2294            string_inverted_index: Some(StringInvertedIndexType {
2295                enabled: false,
2296                config: StringInvertedIndexConfig {},
2297            }),
2298            fts_index: Some(FtsIndexType {
2299                enabled: true,
2300                config: FtsIndexConfig {},
2301            }),
2302        });
2303
2304        user_schema.defaults.float_list = Some(FloatListValueType {
2305            vector_index: Some(VectorIndexType {
2306                enabled: true,
2307                config: VectorIndexConfig {
2308                    space: Some(Space::Ip),
2309                    embedding_function: None, // Will use default
2310                    source_key: Some("custom_vector_key".to_string()),
2311                    hnsw: Some(HnswIndexConfig {
2312                        ef_construction: Some(400),
2313                        max_neighbors: Some(32),
2314                        ef_search: None, // Will use default
2315                        num_threads: None,
2316                        batch_size: None,
2317                        sync_threshold: None,
2318                        resize_factor: None,
2319                    }),
2320                    spann: None,
2321                },
2322            }),
2323        });
2324
2325        // Set up key overrides
2326        let custom_key_override = ValueTypes {
2327            string: Some(StringValueType {
2328                fts_index: Some(FtsIndexType {
2329                    enabled: true,
2330                    config: FtsIndexConfig {},
2331                }),
2332                string_inverted_index: None,
2333            }),
2334            ..Default::default()
2335        };
2336        user_schema
2337            .keys
2338            .insert("custom_field".to_string(), custom_key_override);
2339
2340        // Use HNSW defaults for this test so we have HNSW config to merge with
2341        let result = {
2342            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2343            let merged_defaults =
2344                Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2345            let mut merged_keys = default_schema.keys.clone();
2346            for (key, user_value_types) in user_schema.keys {
2347                if let Some(default_value_types) = merged_keys.get(&key) {
2348                    let merged_value_types =
2349                        Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2350                    merged_keys.insert(key, merged_value_types);
2351                } else {
2352                    merged_keys.insert(key, user_value_types);
2353                }
2354            }
2355            Schema {
2356                defaults: merged_defaults,
2357                keys: merged_keys,
2358            }
2359        };
2360
2361        // Verify complex merging worked correctly
2362
2363        // Check defaults merging
2364        assert!(
2365            !result
2366                .defaults
2367                .string
2368                .as_ref()
2369                .unwrap()
2370                .string_inverted_index
2371                .as_ref()
2372                .unwrap()
2373                .enabled
2374        );
2375        assert!(
2376            result
2377                .defaults
2378                .string
2379                .as_ref()
2380                .unwrap()
2381                .fts_index
2382                .as_ref()
2383                .unwrap()
2384                .enabled
2385        );
2386
2387        let vector_config = &result
2388            .defaults
2389            .float_list
2390            .as_ref()
2391            .unwrap()
2392            .vector_index
2393            .as_ref()
2394            .unwrap()
2395            .config;
2396        assert_eq!(vector_config.space, Some(Space::Ip));
2397        assert_eq!(
2398            vector_config.embedding_function,
2399            Some(EmbeddingFunctionConfiguration::Legacy)
2400        ); // Default preserved
2401        assert_eq!(
2402            vector_config.source_key,
2403            Some("custom_vector_key".to_string())
2404        );
2405        assert_eq!(
2406            vector_config.hnsw.as_ref().unwrap().ef_construction,
2407            Some(400)
2408        );
2409        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2410        assert_eq!(
2411            vector_config.hnsw.as_ref().unwrap().ef_search,
2412            Some(default_search_ef())
2413        ); // Default preserved
2414
2415        // Check key overrides
2416        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
2417        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
2418        assert!(result.keys.contains_key("custom_field")); // User added
2419
2420        let custom_override = result.keys.get("custom_field").unwrap();
2421        assert!(
2422            custom_override
2423                .string
2424                .as_ref()
2425                .unwrap()
2426                .fts_index
2427                .as_ref()
2428                .unwrap()
2429                .enabled
2430        );
2431        assert!(custom_override
2432            .string
2433            .as_ref()
2434            .unwrap()
2435            .string_inverted_index
2436            .is_none());
2437    }
2438
2439    #[test]
2440    fn test_reconcile_with_collection_config_default_config() {
2441        // Test that when collection config is default, schema is returned as-is
2442        let schema = Schema::new_default(KnnIndex::Hnsw);
2443        let collection_config = InternalCollectionConfiguration::default_hnsw();
2444
2445        let result =
2446            Schema::reconcile_with_collection_config(schema.clone(), collection_config).unwrap();
2447        assert_eq!(result, schema);
2448    }
2449
2450    #[test]
2451    fn test_reconcile_with_collection_config_both_non_default() {
2452        // Test that when both schema and collection config are non-default, it returns an error
2453        let mut schema = Schema::new_default(KnnIndex::Hnsw);
2454        schema.defaults.string = Some(StringValueType {
2455            fts_index: Some(FtsIndexType {
2456                enabled: true,
2457                config: FtsIndexConfig {},
2458            }),
2459            string_inverted_index: None,
2460        });
2461
2462        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
2463        // Make collection config non-default by changing a parameter
2464        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
2465        {
2466            hnsw_config.ef_construction = 500; // Non-default value
2467        }
2468
2469        let result = Schema::reconcile_with_collection_config(schema, collection_config);
2470        assert!(result.is_err());
2471        assert_eq!(
2472            result.unwrap_err(),
2473            "Cannot set both collection config and schema at the same time"
2474        );
2475    }
2476
2477    #[test]
2478    fn test_reconcile_with_collection_config_hnsw_override() {
2479        // Test that non-default HNSW collection config overrides default schema
2480        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
2481
2482        let collection_config = InternalCollectionConfiguration {
2483            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2484                ef_construction: 300,
2485                max_neighbors: 32,
2486                ef_search: 50,
2487                num_threads: 8,
2488                batch_size: 200,
2489                sync_threshold: 2000,
2490                resize_factor: 1.5,
2491                space: Space::L2,
2492            }),
2493            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2494        };
2495
2496        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2497
2498        // Check that #embedding key override was created with the collection config settings
2499        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2500        let vector_index = embedding_override
2501            .float_list
2502            .as_ref()
2503            .unwrap()
2504            .vector_index
2505            .as_ref()
2506            .unwrap();
2507
2508        assert!(vector_index.enabled);
2509        assert_eq!(vector_index.config.space, Some(Space::L2));
2510        assert_eq!(
2511            vector_index.config.embedding_function,
2512            Some(EmbeddingFunctionConfiguration::Legacy)
2513        );
2514        assert_eq!(
2515            vector_index.config.source_key,
2516            Some(DOCUMENT_KEY.to_string())
2517        );
2518
2519        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
2520        assert_eq!(hnsw_config.ef_construction, Some(300));
2521        assert_eq!(hnsw_config.max_neighbors, Some(32));
2522        assert_eq!(hnsw_config.ef_search, Some(50));
2523        assert_eq!(hnsw_config.num_threads, Some(8));
2524        assert_eq!(hnsw_config.batch_size, Some(200));
2525        assert_eq!(hnsw_config.sync_threshold, Some(2000));
2526        assert_eq!(hnsw_config.resize_factor, Some(1.5));
2527
2528        assert!(vector_index.config.spann.is_none());
2529    }
2530
2531    #[test]
2532    fn test_reconcile_with_collection_config_spann_override() {
2533        // Test that non-default SPANN collection config overrides default schema
2534        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
2535
2536        let collection_config = InternalCollectionConfiguration {
2537            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
2538                search_nprobe: 20,
2539                search_rng_factor: 3.0,
2540                search_rng_epsilon: 0.2,
2541                nreplica_count: 5,
2542                write_rng_factor: 2.0,
2543                write_rng_epsilon: 0.1,
2544                split_threshold: 2000,
2545                num_samples_kmeans: 200,
2546                initial_lambda: 0.8,
2547                reassign_neighbor_count: 100,
2548                merge_threshold: 800,
2549                num_centers_to_merge_to: 20,
2550                write_nprobe: 10,
2551                ef_construction: 400,
2552                ef_search: 60,
2553                max_neighbors: 24,
2554                space: Space::Cosine,
2555            }),
2556            embedding_function: None,
2557        };
2558
2559        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2560
2561        // Check that #embedding key override was created with the collection config settings
2562        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2563        let vector_index = embedding_override
2564            .float_list
2565            .as_ref()
2566            .unwrap()
2567            .vector_index
2568            .as_ref()
2569            .unwrap();
2570
2571        assert!(vector_index.enabled);
2572        assert_eq!(vector_index.config.space, Some(Space::Cosine));
2573        assert_eq!(vector_index.config.embedding_function, None);
2574        assert_eq!(
2575            vector_index.config.source_key,
2576            Some(DOCUMENT_KEY.to_string())
2577        );
2578
2579        assert!(vector_index.config.hnsw.is_none());
2580
2581        let spann_config = vector_index.config.spann.as_ref().unwrap();
2582        assert_eq!(spann_config.search_nprobe, Some(20));
2583        assert_eq!(spann_config.search_rng_factor, Some(3.0));
2584        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
2585        assert_eq!(spann_config.nreplica_count, Some(5));
2586        assert_eq!(spann_config.write_rng_factor, Some(2.0));
2587        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
2588        assert_eq!(spann_config.split_threshold, Some(2000));
2589        assert_eq!(spann_config.num_samples_kmeans, Some(200));
2590        assert_eq!(spann_config.initial_lambda, Some(0.8));
2591        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
2592        assert_eq!(spann_config.merge_threshold, Some(800));
2593        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
2594        assert_eq!(spann_config.write_nprobe, Some(10));
2595        assert_eq!(spann_config.ef_construction, Some(400));
2596        assert_eq!(spann_config.ef_search, Some(60));
2597        assert_eq!(spann_config.max_neighbors, Some(24));
2598    }
2599
2600    #[test]
2601    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
2602        // Test that collection config updates BOTH defaults.float_list.vector_index
2603        // AND keys["embedding"].float_list.vector_index
2604        let schema = Schema::new_default(KnnIndex::Hnsw);
2605
2606        let collection_config = InternalCollectionConfiguration {
2607            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2608                ef_construction: 300,
2609                max_neighbors: 32,
2610                ef_search: 50,
2611                num_threads: 8,
2612                batch_size: 200,
2613                sync_threshold: 2000,
2614                resize_factor: 1.5,
2615                space: Space::L2,
2616            }),
2617            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2618        };
2619
2620        let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2621
2622        // Check that defaults.float_list.vector_index was updated
2623        let defaults_vector_index = result
2624            .defaults
2625            .float_list
2626            .as_ref()
2627            .unwrap()
2628            .vector_index
2629            .as_ref()
2630            .unwrap();
2631
2632        // Should be disabled in defaults (template for new keys)
2633        assert!(!defaults_vector_index.enabled);
2634        // But config should be updated
2635        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
2636        assert_eq!(
2637            defaults_vector_index.config.embedding_function,
2638            Some(EmbeddingFunctionConfiguration::Legacy)
2639        );
2640        assert_eq!(
2641            defaults_vector_index.config.source_key,
2642            Some(DOCUMENT_KEY.to_string())
2643        );
2644        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
2645        assert_eq!(defaults_hnsw.ef_construction, Some(300));
2646        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
2647
2648        // Check that #embedding key override was also updated
2649        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2650        let embedding_vector_index = embedding_override
2651            .float_list
2652            .as_ref()
2653            .unwrap()
2654            .vector_index
2655            .as_ref()
2656            .unwrap();
2657
2658        // Should be enabled on #embedding
2659        assert!(embedding_vector_index.enabled);
2660        // Config should match defaults
2661        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
2662        assert_eq!(
2663            embedding_vector_index.config.embedding_function,
2664            Some(EmbeddingFunctionConfiguration::Legacy)
2665        );
2666        assert_eq!(
2667            embedding_vector_index.config.source_key,
2668            Some(DOCUMENT_KEY.to_string())
2669        );
2670        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
2671        assert_eq!(embedding_hnsw.ef_construction, Some(300));
2672        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
2673    }
2674
2675    #[test]
2676    fn test_is_schema_default() {
2677        // Test that actual default schemas are correctly identified
2678        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
2679        assert!(Schema::is_schema_default(&default_hnsw_schema));
2680
2681        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
2682        assert!(Schema::is_schema_default(&default_spann_schema));
2683
2684        // Test that an empty schema is NOT considered default (since it doesn't match new_default structure)
2685        let empty_schema = Schema {
2686            defaults: ValueTypes::default(),
2687            keys: HashMap::new(),
2688        };
2689        assert!(!Schema::is_schema_default(&empty_schema));
2690
2691        // Test that a modified default schema is not considered default
2692        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
2693        // Make a clear modification - change the string inverted index enabled state
2694        if let Some(ref mut string_type) = modified_schema.defaults.string {
2695            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
2696                string_inverted.enabled = false; // Default is true, so this should make it non-default
2697            }
2698        }
2699        assert!(!Schema::is_schema_default(&modified_schema));
2700
2701        // Test that schema with additional key overrides is not default
2702        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
2703        schema_with_extra_overrides
2704            .keys
2705            .insert("custom_key".to_string(), ValueTypes::default());
2706        assert!(!Schema::is_schema_default(&schema_with_extra_overrides));
2707    }
2708
2709    #[test]
2710    fn test_add_merges_keys_by_value_type() {
2711        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2712        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2713
2714        let string_override = ValueTypes {
2715            string: Some(StringValueType {
2716                string_inverted_index: Some(StringInvertedIndexType {
2717                    enabled: true,
2718                    config: StringInvertedIndexConfig {},
2719                }),
2720                fts_index: None,
2721            }),
2722            ..Default::default()
2723        };
2724        schema_a
2725            .keys
2726            .insert("custom_field".to_string(), string_override);
2727
2728        let float_override = ValueTypes {
2729            float: Some(FloatValueType {
2730                float_inverted_index: Some(FloatInvertedIndexType {
2731                    enabled: true,
2732                    config: FloatInvertedIndexConfig {},
2733                }),
2734            }),
2735            ..Default::default()
2736        };
2737        schema_b
2738            .keys
2739            .insert("custom_field".to_string(), float_override);
2740
2741        let merged = schema_a.merge(&schema_b).unwrap();
2742        let merged_override = merged.keys.get("custom_field").unwrap();
2743
2744        assert!(merged_override.string.is_some());
2745        assert!(merged_override.float.is_some());
2746        assert!(
2747            merged_override
2748                .string
2749                .as_ref()
2750                .unwrap()
2751                .string_inverted_index
2752                .as_ref()
2753                .unwrap()
2754                .enabled
2755        );
2756        assert!(
2757            merged_override
2758                .float
2759                .as_ref()
2760                .unwrap()
2761                .float_inverted_index
2762                .as_ref()
2763                .unwrap()
2764                .enabled
2765        );
2766    }
2767
2768    #[test]
2769    fn test_add_rejects_different_defaults() {
2770        let schema_a = Schema::new_default(KnnIndex::Hnsw);
2771        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2772
2773        if let Some(string_type) = schema_b.defaults.string.as_mut() {
2774            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
2775                string_index.enabled = false;
2776            }
2777        }
2778
2779        let err = schema_a.merge(&schema_b).unwrap_err();
2780        match err {
2781            SchemaError::InvalidSchema { reason } => {
2782                assert_eq!(reason, "Cannot merge schemas with differing defaults")
2783            }
2784            _ => panic!("Expected InvalidSchema error"),
2785        }
2786    }
2787
2788    #[test]
2789    fn test_add_detects_conflicting_value_type_configuration() {
2790        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2791        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2792
2793        let string_override_enabled = ValueTypes {
2794            string: Some(StringValueType {
2795                string_inverted_index: Some(StringInvertedIndexType {
2796                    enabled: true,
2797                    config: StringInvertedIndexConfig {},
2798                }),
2799                fts_index: None,
2800            }),
2801            ..Default::default()
2802        };
2803        schema_a
2804            .keys
2805            .insert("custom_field".to_string(), string_override_enabled);
2806
2807        let string_override_disabled = ValueTypes {
2808            string: Some(StringValueType {
2809                string_inverted_index: Some(StringInvertedIndexType {
2810                    enabled: false,
2811                    config: StringInvertedIndexConfig {},
2812                }),
2813                fts_index: None,
2814            }),
2815            ..Default::default()
2816        };
2817        schema_b
2818            .keys
2819            .insert("custom_field".to_string(), string_override_disabled);
2820
2821        let err = schema_a.merge(&schema_b).unwrap_err();
2822        match err {
2823            SchemaError::InvalidSchema { reason } => {
2824                assert!(reason.contains("Conflicting configuration"));
2825            }
2826            _ => panic!("Expected InvalidSchema error"),
2827        }
2828    }
2829
2830    // TODO(Sanket): Remove this test once deployed
2831    #[test]
2832    fn test_backward_compatibility_aliases() {
2833        // Test that old format with # and $ prefixes and key_overrides can be deserialized
2834        let old_format_json = r###"{
2835            "defaults": {
2836                "#string": {
2837                    "$fts_index": {
2838                        "enabled": true,
2839                        "config": {}
2840                    }
2841                },
2842                "#int": {
2843                    "$int_inverted_index": {
2844                        "enabled": true,
2845                        "config": {}
2846                    }
2847                },
2848                "#float_list": {
2849                    "$vector_index": {
2850                        "enabled": true,
2851                        "config": {
2852                            "spann": {
2853                                "search_nprobe": 10
2854                            }
2855                        }
2856                    }
2857                }
2858            },
2859            "key_overrides": {
2860                "#document": {
2861                    "#string": {
2862                        "$fts_index": {
2863                            "enabled": false,
2864                            "config": {}
2865                        }
2866                    }
2867                }
2868            }
2869        }"###;
2870
2871        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
2872
2873        // Test that new format without prefixes and keys can be deserialized
2874        let new_format_json = r###"{
2875            "defaults": {
2876                "string": {
2877                    "fts_index": {
2878                        "enabled": true,
2879                        "config": {}
2880                    }
2881                },
2882                "int": {
2883                    "int_inverted_index": {
2884                        "enabled": true,
2885                        "config": {}
2886                    }
2887                },
2888                "float_list": {
2889                    "vector_index": {
2890                        "enabled": true,
2891                        "config": {
2892                            "spann": {
2893                                "search_nprobe": 10
2894                            }
2895                        }
2896                    }
2897                }
2898            },
2899            "keys": {
2900                "#document": {
2901                    "string": {
2902                        "fts_index": {
2903                            "enabled": false,
2904                            "config": {}
2905                        }
2906                    }
2907                }
2908            }
2909        }"###;
2910
2911        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
2912
2913        // Both should deserialize to the same structure
2914        assert_eq!(schema_from_old, schema_from_new);
2915
2916        // Verify the deserialized content is correct
2917        assert!(schema_from_old.defaults.string.is_some());
2918        assert!(schema_from_old
2919            .defaults
2920            .string
2921            .as_ref()
2922            .unwrap()
2923            .fts_index
2924            .is_some());
2925        assert!(
2926            schema_from_old
2927                .defaults
2928                .string
2929                .as_ref()
2930                .unwrap()
2931                .fts_index
2932                .as_ref()
2933                .unwrap()
2934                .enabled
2935        );
2936
2937        assert!(schema_from_old.defaults.int.is_some());
2938        assert!(schema_from_old
2939            .defaults
2940            .int
2941            .as_ref()
2942            .unwrap()
2943            .int_inverted_index
2944            .is_some());
2945
2946        assert!(schema_from_old.defaults.float_list.is_some());
2947        assert!(schema_from_old
2948            .defaults
2949            .float_list
2950            .as_ref()
2951            .unwrap()
2952            .vector_index
2953            .is_some());
2954
2955        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
2956        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
2957        assert!(doc_override.string.is_some());
2958        assert!(
2959            !doc_override
2960                .string
2961                .as_ref()
2962                .unwrap()
2963                .fts_index
2964                .as_ref()
2965                .unwrap()
2966                .enabled
2967        );
2968
2969        // Test that serialization always outputs the new format (without prefixes)
2970        let serialized = serde_json::to_string(&schema_from_old).unwrap();
2971
2972        // Should contain new format keys
2973        assert!(serialized.contains(r#""keys":"#));
2974        assert!(serialized.contains(r#""string":"#));
2975        assert!(serialized.contains(r#""fts_index":"#));
2976        assert!(serialized.contains(r#""int_inverted_index":"#));
2977        assert!(serialized.contains(r#""vector_index":"#));
2978
2979        // Should NOT contain old format keys
2980        assert!(!serialized.contains(r#""key_overrides":"#));
2981        assert!(!serialized.contains(r###""#string":"###));
2982        assert!(!serialized.contains(r###""$fts_index":"###));
2983        assert!(!serialized.contains(r###""$int_inverted_index":"###));
2984        assert!(!serialized.contains(r###""$vector_index":"###));
2985    }
2986
2987    #[test]
2988    fn test_hnsw_index_config_validation() {
2989        use validator::Validate;
2990
2991        // Valid configuration - should pass
2992        let valid_config = HnswIndexConfig {
2993            batch_size: Some(10),
2994            sync_threshold: Some(100),
2995            ef_construction: Some(100),
2996            max_neighbors: Some(16),
2997            ..Default::default()
2998        };
2999        assert!(valid_config.validate().is_ok());
3000
3001        // Invalid: batch_size too small (min 2)
3002        let invalid_batch_size = HnswIndexConfig {
3003            batch_size: Some(1),
3004            ..Default::default()
3005        };
3006        assert!(invalid_batch_size.validate().is_err());
3007
3008        // Invalid: sync_threshold too small (min 2)
3009        let invalid_sync_threshold = HnswIndexConfig {
3010            sync_threshold: Some(1),
3011            ..Default::default()
3012        };
3013        assert!(invalid_sync_threshold.validate().is_err());
3014
3015        // Valid: boundary values (exactly 2) should pass
3016        let boundary_config = HnswIndexConfig {
3017            batch_size: Some(2),
3018            sync_threshold: Some(2),
3019            ..Default::default()
3020        };
3021        assert!(boundary_config.validate().is_ok());
3022
3023        // Valid: None values should pass validation
3024        let all_none_config = HnswIndexConfig {
3025            ..Default::default()
3026        };
3027        assert!(all_none_config.validate().is_ok());
3028
3029        // Valid: fields without validation can be any value
3030        let other_fields_config = HnswIndexConfig {
3031            ef_construction: Some(1),
3032            max_neighbors: Some(1),
3033            ef_search: Some(1),
3034            num_threads: Some(1),
3035            resize_factor: Some(0.1),
3036            ..Default::default()
3037        };
3038        assert!(other_fields_config.validate().is_ok());
3039    }
3040
3041    #[test]
3042    fn test_spann_index_config_validation() {
3043        use validator::Validate;
3044
3045        // Valid configuration - should pass
3046        let valid_config = SpannIndexConfig {
3047            write_nprobe: Some(32),
3048            nreplica_count: Some(4),
3049            split_threshold: Some(100),
3050            merge_threshold: Some(50),
3051            reassign_neighbor_count: Some(32),
3052            num_centers_to_merge_to: Some(4),
3053            ef_construction: Some(100),
3054            ef_search: Some(100),
3055            max_neighbors: Some(32),
3056            search_rng_factor: Some(1.0),
3057            write_rng_factor: Some(1.0),
3058            search_rng_epsilon: Some(7.5),
3059            write_rng_epsilon: Some(7.5),
3060            ..Default::default()
3061        };
3062        assert!(valid_config.validate().is_ok());
3063
3064        // Invalid: write_nprobe too large (max 64)
3065        let invalid_write_nprobe = SpannIndexConfig {
3066            write_nprobe: Some(200),
3067            ..Default::default()
3068        };
3069        assert!(invalid_write_nprobe.validate().is_err());
3070
3071        // Invalid: split_threshold too small (min 50)
3072        let invalid_split_threshold = SpannIndexConfig {
3073            split_threshold: Some(10),
3074            ..Default::default()
3075        };
3076        assert!(invalid_split_threshold.validate().is_err());
3077
3078        // Invalid: split_threshold too large (max 200)
3079        let invalid_split_threshold_high = SpannIndexConfig {
3080            split_threshold: Some(250),
3081            ..Default::default()
3082        };
3083        assert!(invalid_split_threshold_high.validate().is_err());
3084
3085        // Invalid: nreplica_count too large (max 8)
3086        let invalid_nreplica = SpannIndexConfig {
3087            nreplica_count: Some(10),
3088            ..Default::default()
3089        };
3090        assert!(invalid_nreplica.validate().is_err());
3091
3092        // Invalid: reassign_neighbor_count too large (max 64)
3093        let invalid_reassign = SpannIndexConfig {
3094            reassign_neighbor_count: Some(100),
3095            ..Default::default()
3096        };
3097        assert!(invalid_reassign.validate().is_err());
3098
3099        // Invalid: merge_threshold out of range (min 25, max 100)
3100        let invalid_merge_threshold_low = SpannIndexConfig {
3101            merge_threshold: Some(5),
3102            ..Default::default()
3103        };
3104        assert!(invalid_merge_threshold_low.validate().is_err());
3105
3106        let invalid_merge_threshold_high = SpannIndexConfig {
3107            merge_threshold: Some(150),
3108            ..Default::default()
3109        };
3110        assert!(invalid_merge_threshold_high.validate().is_err());
3111
3112        // Invalid: num_centers_to_merge_to too large (max 8)
3113        let invalid_num_centers = SpannIndexConfig {
3114            num_centers_to_merge_to: Some(10),
3115            ..Default::default()
3116        };
3117        assert!(invalid_num_centers.validate().is_err());
3118
3119        // Invalid: ef_construction too large (max 200)
3120        let invalid_ef_construction = SpannIndexConfig {
3121            ef_construction: Some(300),
3122            ..Default::default()
3123        };
3124        assert!(invalid_ef_construction.validate().is_err());
3125
3126        // Invalid: ef_search too large (max 200)
3127        let invalid_ef_search = SpannIndexConfig {
3128            ef_search: Some(300),
3129            ..Default::default()
3130        };
3131        assert!(invalid_ef_search.validate().is_err());
3132
3133        // Invalid: max_neighbors too large (max 64)
3134        let invalid_max_neighbors = SpannIndexConfig {
3135            max_neighbors: Some(100),
3136            ..Default::default()
3137        };
3138        assert!(invalid_max_neighbors.validate().is_err());
3139
3140        // Invalid: search_nprobe too large (max 128)
3141        let invalid_search_nprobe = SpannIndexConfig {
3142            search_nprobe: Some(200),
3143            ..Default::default()
3144        };
3145        assert!(invalid_search_nprobe.validate().is_err());
3146
3147        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
3148        let invalid_search_rng_factor_low = SpannIndexConfig {
3149            search_rng_factor: Some(0.9),
3150            ..Default::default()
3151        };
3152        assert!(invalid_search_rng_factor_low.validate().is_err());
3153
3154        let invalid_search_rng_factor_high = SpannIndexConfig {
3155            search_rng_factor: Some(1.1),
3156            ..Default::default()
3157        };
3158        assert!(invalid_search_rng_factor_high.validate().is_err());
3159
3160        // Valid: search_rng_factor exactly 1.0
3161        let valid_search_rng_factor = SpannIndexConfig {
3162            search_rng_factor: Some(1.0),
3163            ..Default::default()
3164        };
3165        assert!(valid_search_rng_factor.validate().is_ok());
3166
3167        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
3168        let invalid_search_rng_epsilon_low = SpannIndexConfig {
3169            search_rng_epsilon: Some(4.0),
3170            ..Default::default()
3171        };
3172        assert!(invalid_search_rng_epsilon_low.validate().is_err());
3173
3174        let invalid_search_rng_epsilon_high = SpannIndexConfig {
3175            search_rng_epsilon: Some(11.0),
3176            ..Default::default()
3177        };
3178        assert!(invalid_search_rng_epsilon_high.validate().is_err());
3179
3180        // Valid: search_rng_epsilon within range
3181        let valid_search_rng_epsilon = SpannIndexConfig {
3182            search_rng_epsilon: Some(7.5),
3183            ..Default::default()
3184        };
3185        assert!(valid_search_rng_epsilon.validate().is_ok());
3186
3187        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
3188        let invalid_write_rng_factor_low = SpannIndexConfig {
3189            write_rng_factor: Some(0.9),
3190            ..Default::default()
3191        };
3192        assert!(invalid_write_rng_factor_low.validate().is_err());
3193
3194        let invalid_write_rng_factor_high = SpannIndexConfig {
3195            write_rng_factor: Some(1.1),
3196            ..Default::default()
3197        };
3198        assert!(invalid_write_rng_factor_high.validate().is_err());
3199
3200        // Valid: write_rng_factor exactly 1.0
3201        let valid_write_rng_factor = SpannIndexConfig {
3202            write_rng_factor: Some(1.0),
3203            ..Default::default()
3204        };
3205        assert!(valid_write_rng_factor.validate().is_ok());
3206
3207        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
3208        let invalid_write_rng_epsilon_low = SpannIndexConfig {
3209            write_rng_epsilon: Some(4.0),
3210            ..Default::default()
3211        };
3212        assert!(invalid_write_rng_epsilon_low.validate().is_err());
3213
3214        let invalid_write_rng_epsilon_high = SpannIndexConfig {
3215            write_rng_epsilon: Some(11.0),
3216            ..Default::default()
3217        };
3218        assert!(invalid_write_rng_epsilon_high.validate().is_err());
3219
3220        // Valid: write_rng_epsilon within range
3221        let valid_write_rng_epsilon = SpannIndexConfig {
3222            write_rng_epsilon: Some(7.5),
3223            ..Default::default()
3224        };
3225        assert!(valid_write_rng_epsilon.validate().is_ok());
3226
3227        // Invalid: num_samples_kmeans too large (max 1000)
3228        let invalid_num_samples_kmeans = SpannIndexConfig {
3229            num_samples_kmeans: Some(1500),
3230            ..Default::default()
3231        };
3232        assert!(invalid_num_samples_kmeans.validate().is_err());
3233
3234        // Valid: num_samples_kmeans within range
3235        let valid_num_samples_kmeans = SpannIndexConfig {
3236            num_samples_kmeans: Some(500),
3237            ..Default::default()
3238        };
3239        assert!(valid_num_samples_kmeans.validate().is_ok());
3240
3241        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
3242        let invalid_initial_lambda_high = SpannIndexConfig {
3243            initial_lambda: Some(150.0),
3244            ..Default::default()
3245        };
3246        assert!(invalid_initial_lambda_high.validate().is_err());
3247
3248        let invalid_initial_lambda_low = SpannIndexConfig {
3249            initial_lambda: Some(50.0),
3250            ..Default::default()
3251        };
3252        assert!(invalid_initial_lambda_low.validate().is_err());
3253
3254        // Valid: initial_lambda exactly 100.0
3255        let valid_initial_lambda = SpannIndexConfig {
3256            initial_lambda: Some(100.0),
3257            ..Default::default()
3258        };
3259        assert!(valid_initial_lambda.validate().is_ok());
3260
3261        // Valid: None values should pass validation
3262        let all_none_config = SpannIndexConfig {
3263            ..Default::default()
3264        };
3265        assert!(all_none_config.validate().is_ok());
3266    }
3267}