1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11 EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12 UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18 default_batch_size, default_center_drift_threshold, default_construction_ef,
19 default_construction_ef_spann, default_initial_lambda, default_m, default_m_spann,
20 default_merge_threshold, default_nreplica_count, default_num_centers_to_merge_to,
21 default_num_samples_kmeans, default_num_threads, default_quantize,
22 default_reassign_neighbor_count, default_resize_factor, default_search_ef,
23 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
24 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
25 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
26 HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
27 InternalUpdateCollectionConfiguration, KnnIndex, Segment, UpdateCollectionConfiguration,
28 CHROMA_KEY,
29};
30
31impl ChromaError for SchemaError {
32 fn code(&self) -> ErrorCodes {
33 match self {
34 SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
37 SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
38 SchemaError::DefaultsMismatch => ErrorCodes::Internal,
41 SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
42 SchemaError::InvalidConfigurationUpdate { .. } => ErrorCodes::Internal,
43
44 SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
47 SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
48 SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
49 SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
50 SchemaError::Builder(e) => e.code(),
51 }
52 }
53}
54
55#[derive(Debug, Error)]
56pub enum SchemaError {
57 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
58 MissingIndexConfiguration { key: String, value_type: String },
59 #[error("Schema reconciliation failed: {reason}")]
60 InvalidSchema { reason: String },
61 #[error("Cannot set both collection config and schema simultaneously")]
62 ConfigAndSchemaConflict,
63 #[error("Cannot merge schemas with differing defaults")]
64 DefaultsMismatch,
65 #[error("Conflicting configuration for {context}")]
66 ConfigurationConflict { context: String },
67 #[error("Invalid HNSW configuration: {0}")]
68 InvalidHnswConfig(validator::ValidationErrors),
69 #[error("Invalid SPANN configuration: {0}")]
70 InvalidSpannConfig(validator::ValidationErrors),
71 #[error("Invalid schema input: {reason}")]
72 InvalidUserInput { reason: String },
73 #[error("Invalid configuration update: {message}")]
74 InvalidConfigurationUpdate { message: String },
75 #[error(transparent)]
76 Builder(#[from] SchemaBuilderError),
77}
78
79#[derive(Debug, Error)]
80pub enum SchemaBuilderError {
81 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
82 VectorIndexMustBeGlobal { key: String },
83 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
84 FtsIndexMustBeGlobal { key: String },
85 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
86 SpecialKeyModificationNotAllowed { key: String },
87 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
88 SparseVectorRequiresKey,
89 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
90 MultipleSparseVectorIndexes { existing_key: String },
91 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
92 VectorIndexDeletionNotSupported,
93 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
94 FtsIndexDeletionNotSupported,
95 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
96 SparseVectorIndexDeletionNotSupported,
97}
98
99#[derive(Debug, Error)]
100pub enum FilterValidationError {
101 #[error(
102 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
103 )]
104 IndexingDisabled {
105 key: String,
106 value_type: MetadataValueType,
107 },
108 #[error(transparent)]
109 Schema(#[from] SchemaError),
110}
111
112impl ChromaError for SchemaBuilderError {
113 fn code(&self) -> ErrorCodes {
114 ErrorCodes::InvalidArgument
115 }
116}
117
118impl ChromaError for FilterValidationError {
119 fn code(&self) -> ErrorCodes {
120 match self {
121 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
122 FilterValidationError::Schema(_) => ErrorCodes::Internal,
123 }
124 }
125}
126
127pub const STRING_VALUE_NAME: &str = "string";
134pub const INT_VALUE_NAME: &str = "int";
135pub const BOOL_VALUE_NAME: &str = "bool";
136pub const FLOAT_VALUE_NAME: &str = "float";
137pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
138pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
139
140pub const FTS_INDEX_NAME: &str = "fts_index";
142pub const VECTOR_INDEX_NAME: &str = "vector_index";
143pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
144pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
145pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
146pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
147pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
148
149pub const DOCUMENT_KEY: &str = "#document";
151pub const EMBEDDING_KEY: &str = "#embedding";
152
153static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
155 Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
156 .expect("The CMEK pattern for GCP should be valid")
157});
158
159#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
164#[serde(rename_all = "snake_case")]
165pub enum Cmek {
166 Gcp(Arc<String>),
170}
171
172impl Cmek {
173 pub fn gcp(resource: String) -> Self {
183 Cmek::Gcp(Arc::new(resource))
184 }
185
186 pub fn validate_pattern(&self) -> bool {
192 match self {
193 Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
194 }
195 }
196}
197
198impl TryFrom<chroma_proto::Cmek> for Cmek {
199 type Error = ConversionError;
200
201 fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
202 match proto.provider {
203 Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
204 None => Err(ConversionError::DecodeError),
205 }
206 }
207}
208
209impl From<Cmek> for chroma_proto::Cmek {
210 fn from(cmek: Cmek) -> Self {
211 match cmek {
212 Cmek::Gcp(resource) => chroma_proto::Cmek {
213 provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
214 },
215 }
216 }
217}
218
219#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
228#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
229pub struct Schema {
230 pub defaults: ValueTypes,
232 #[serde(rename = "keys", alias = "key_overrides")]
235 pub keys: HashMap<String, ValueTypes>,
236 #[serde(skip_serializing_if = "Option::is_none")]
238 #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
239 pub cmek: Option<Cmek>,
240 #[serde(skip_serializing_if = "Option::is_none")]
242 pub source_attached_function_id: Option<String>,
243}
244
245impl Schema {
246 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
247 if let Some(vector_update) = &configuration.vector_index {
248 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
249 Self::apply_vector_index_update(default_vector_index, vector_update);
250 }
251 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
252 Self::apply_vector_index_update(embedding_vector_index, vector_update);
253 }
254 }
255
256 if let Some(embedding_function) = configuration.embedding_function.as_ref() {
257 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
258 default_vector_index.config.embedding_function = Some(embedding_function.clone());
259 }
260 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
261 embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
262 }
263 }
264 }
265
266 pub fn apply_update_configuration(
276 &mut self,
277 config: &UpdateCollectionConfiguration,
278 ) -> Result<(), SchemaError> {
279 if config.hnsw.is_some() {
281 return Err(SchemaError::InvalidConfigurationUpdate {
282 message: "HNSW configuration updates are not supported".to_string(),
283 });
284 }
285
286 if let Some(ref spann_update) = config.spann {
288 let defaults_spann = self
289 .defaults_vector_index_mut()
290 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
291 message: "schema missing defaults.float_list.vector_index".to_string(),
292 })?
293 .config
294 .spann
295 .as_mut()
296 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
297 message: "schema missing defaults spann config".to_string(),
298 })?;
299
300 if let Some(search_nprobe) = spann_update.search_nprobe {
301 defaults_spann.search_nprobe = Some(search_nprobe);
302 }
303 if let Some(ef_search) = spann_update.ef_search {
304 defaults_spann.ef_search = Some(ef_search);
305 }
306
307 let embedding_spann = self
308 .embedding_vector_index_mut()
309 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
310 message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
311 })?
312 .config
313 .spann
314 .as_mut()
315 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
316 message: "schema missing #embedding spann config".to_string(),
317 })?;
318
319 if let Some(search_nprobe) = spann_update.search_nprobe {
320 embedding_spann.search_nprobe = Some(search_nprobe);
321 }
322 if let Some(ef_search) = spann_update.ef_search {
323 embedding_spann.ef_search = Some(ef_search);
324 }
325 }
326
327 if let Some(ref ef) = config.embedding_function {
329 self.defaults_vector_index_mut()
330 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
331 message: "schema missing defaults.float_list.vector_index".to_string(),
332 })?
333 .config
334 .embedding_function = Some(ef.clone());
335
336 self.embedding_vector_index_mut()
337 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
338 message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
339 })?
340 .config
341 .embedding_function = Some(ef.clone());
342 }
343
344 Ok(())
345 }
346
347 fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
348 self.defaults
349 .float_list
350 .as_mut()
351 .and_then(|float_list| float_list.vector_index.as_mut())
352 }
353
354 fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
355 self.keys
356 .get_mut(EMBEDDING_KEY)
357 .and_then(|value_types| value_types.float_list.as_mut())
358 .and_then(|float_list| float_list.vector_index.as_mut())
359 }
360
361 fn apply_vector_index_update(
362 vector_index: &mut VectorIndexType,
363 update: &UpdateVectorIndexConfiguration,
364 ) {
365 match update {
366 UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
367 if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
368 if let Some(ef_search) = hnsw_update.ef_search {
369 hnsw_config.ef_search = Some(ef_search);
370 }
371 if let Some(max_neighbors) = hnsw_update.max_neighbors {
372 hnsw_config.max_neighbors = Some(max_neighbors);
373 }
374 if let Some(num_threads) = hnsw_update.num_threads {
375 hnsw_config.num_threads = Some(num_threads);
376 }
377 if let Some(resize_factor) = hnsw_update.resize_factor {
378 hnsw_config.resize_factor = Some(resize_factor);
379 }
380 if let Some(sync_threshold) = hnsw_update.sync_threshold {
381 hnsw_config.sync_threshold = Some(sync_threshold);
382 }
383 if let Some(batch_size) = hnsw_update.batch_size {
384 hnsw_config.batch_size = Some(batch_size);
385 }
386 }
387 }
388 UpdateVectorIndexConfiguration::Hnsw(None) => {}
389 UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
390 if let Some(spann_config) = vector_index.config.spann.as_mut() {
391 if let Some(search_nprobe) = spann_update.search_nprobe {
392 spann_config.search_nprobe = Some(search_nprobe);
393 }
394 if let Some(ef_search) = spann_update.ef_search {
395 spann_config.ef_search = Some(ef_search);
396 }
397 }
398 }
399 UpdateVectorIndexConfiguration::Spann(None) => {}
400 }
401 }
402
403 pub fn is_sparse_index_enabled(&self) -> bool {
404 let defaults_enabled = self
405 .defaults
406 .sparse_vector
407 .as_ref()
408 .and_then(|sv| sv.sparse_vector_index.as_ref())
409 .is_some_and(|idx| idx.enabled);
410 let key_enabled = self.keys.values().any(|value_types| {
411 value_types
412 .sparse_vector
413 .as_ref()
414 .and_then(|sv| sv.sparse_vector_index.as_ref())
415 .is_some_and(|idx| idx.enabled)
416 });
417 defaults_enabled || key_enabled
418 }
419}
420
421impl Default for Schema {
422 fn default() -> Self {
439 let defaults = ValueTypes {
441 string: Some(StringValueType {
442 fts_index: Some(FtsIndexType {
443 enabled: false,
444 config: FtsIndexConfig {},
445 }),
446 string_inverted_index: Some(StringInvertedIndexType {
447 enabled: true,
448 config: StringInvertedIndexConfig {},
449 }),
450 }),
451 float_list: Some(FloatListValueType {
452 vector_index: Some(VectorIndexType {
453 enabled: false,
454 config: VectorIndexConfig {
455 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
457 source_key: None,
458 hnsw: None, spann: None, },
461 }),
462 }),
463 sparse_vector: Some(SparseVectorValueType {
464 sparse_vector_index: Some(SparseVectorIndexType {
465 enabled: false,
466 config: SparseVectorIndexConfig {
467 embedding_function: None,
468 source_key: None,
469 bm25: None,
470 },
471 }),
472 }),
473 int: Some(IntValueType {
474 int_inverted_index: Some(IntInvertedIndexType {
475 enabled: true,
476 config: IntInvertedIndexConfig {},
477 }),
478 }),
479 float: Some(FloatValueType {
480 float_inverted_index: Some(FloatInvertedIndexType {
481 enabled: true,
482 config: FloatInvertedIndexConfig {},
483 }),
484 }),
485 boolean: Some(BoolValueType {
486 bool_inverted_index: Some(BoolInvertedIndexType {
487 enabled: true,
488 config: BoolInvertedIndexConfig {},
489 }),
490 }),
491 };
492
493 let mut keys = HashMap::new();
495
496 keys.insert(
498 DOCUMENT_KEY.to_string(),
499 ValueTypes {
500 string: Some(StringValueType {
501 fts_index: Some(FtsIndexType {
502 enabled: true,
503 config: FtsIndexConfig {},
504 }),
505 string_inverted_index: Some(StringInvertedIndexType {
506 enabled: false,
507 config: StringInvertedIndexConfig {},
508 }),
509 }),
510 ..Default::default()
511 },
512 );
513
514 keys.insert(
516 EMBEDDING_KEY.to_string(),
517 ValueTypes {
518 float_list: Some(FloatListValueType {
519 vector_index: Some(VectorIndexType {
520 enabled: true,
521 config: VectorIndexConfig {
522 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
524 source_key: Some(DOCUMENT_KEY.to_string()),
525 hnsw: None, spann: None, },
528 }),
529 }),
530 ..Default::default()
531 },
532 );
533
534 Schema {
535 defaults,
536 keys,
537 cmek: None,
538 source_attached_function_id: None,
539 }
540 }
541}
542
543pub fn is_embedding_function_default(
544 embedding_function: &Option<EmbeddingFunctionConfiguration>,
545) -> bool {
546 match embedding_function {
547 None => true,
548 Some(embedding_function) => embedding_function.is_default(),
549 }
550}
551
552pub fn is_space_default(space: &Option<Space>) -> bool {
554 match space {
555 None => true, Some(s) => *s == default_space(), }
558}
559
560pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
562 hnsw_config.ef_construction == Some(default_construction_ef())
563 && hnsw_config.ef_search == Some(default_search_ef())
564 && hnsw_config.max_neighbors == Some(default_m())
565 && hnsw_config.num_threads == Some(default_num_threads())
566 && hnsw_config.batch_size == Some(default_batch_size())
567 && hnsw_config.sync_threshold == Some(default_sync_threshold())
568 && hnsw_config.resize_factor == Some(default_resize_factor())
569}
570
571#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
578#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
579pub struct ValueTypes {
580 #[serde(
581 rename = "string",
582 alias = "#string",
583 skip_serializing_if = "Option::is_none"
584 )] pub string: Option<StringValueType>,
586
587 #[serde(
588 rename = "float_list",
589 alias = "#float_list",
590 skip_serializing_if = "Option::is_none"
591 )]
592 pub float_list: Option<FloatListValueType>,
594
595 #[serde(
596 rename = "sparse_vector",
597 alias = "#sparse_vector",
598 skip_serializing_if = "Option::is_none"
599 )]
600 pub sparse_vector: Option<SparseVectorValueType>,
602
603 #[serde(
604 rename = "int",
605 alias = "#int",
606 skip_serializing_if = "Option::is_none"
607 )] pub int: Option<IntValueType>,
609
610 #[serde(
611 rename = "float",
612 alias = "#float",
613 skip_serializing_if = "Option::is_none"
614 )] pub float: Option<FloatValueType>,
616
617 #[serde(
618 rename = "bool",
619 alias = "#bool",
620 skip_serializing_if = "Option::is_none"
621 )] pub boolean: Option<BoolValueType>,
623}
624
625#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
627#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
628pub struct StringValueType {
629 #[serde(
630 rename = "fts_index",
631 alias = "$fts_index",
632 skip_serializing_if = "Option::is_none"
633 )] pub fts_index: Option<FtsIndexType>,
635
636 #[serde(
637 rename = "string_inverted_index", alias = "$string_inverted_index",
639 skip_serializing_if = "Option::is_none"
640 )]
641 pub string_inverted_index: Option<StringInvertedIndexType>,
642}
643
644#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
646#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
647pub struct FloatListValueType {
648 #[serde(
649 rename = "vector_index",
650 alias = "$vector_index",
651 skip_serializing_if = "Option::is_none"
652 )] pub vector_index: Option<VectorIndexType>,
654}
655
656#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
658#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
659pub struct SparseVectorValueType {
660 #[serde(
661 rename = "sparse_vector_index", alias = "$sparse_vector_index",
663 skip_serializing_if = "Option::is_none"
664 )]
665 pub sparse_vector_index: Option<SparseVectorIndexType>,
666}
667
668#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
671pub struct IntValueType {
672 #[serde(
673 rename = "int_inverted_index",
674 alias = "$int_inverted_index",
675 skip_serializing_if = "Option::is_none"
676 )]
677 pub int_inverted_index: Option<IntInvertedIndexType>,
679}
680
681#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
683#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
684pub struct FloatValueType {
685 #[serde(
686 rename = "float_inverted_index", alias = "$float_inverted_index",
688 skip_serializing_if = "Option::is_none"
689 )]
690 pub float_inverted_index: Option<FloatInvertedIndexType>,
691}
692
693#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
695#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
696pub struct BoolValueType {
697 #[serde(
698 rename = "bool_inverted_index", alias = "$bool_inverted_index",
700 skip_serializing_if = "Option::is_none"
701 )]
702 pub bool_inverted_index: Option<BoolInvertedIndexType>,
703}
704
705#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
707#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
708pub struct FtsIndexType {
709 pub enabled: bool,
710 pub config: FtsIndexConfig,
711}
712
713#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
714#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
715pub struct VectorIndexType {
716 pub enabled: bool,
717 pub config: VectorIndexConfig,
718}
719
720#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
721#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
722pub struct SparseVectorIndexType {
723 pub enabled: bool,
724 pub config: SparseVectorIndexConfig,
725}
726
727#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
728#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
729pub struct StringInvertedIndexType {
730 pub enabled: bool,
731 pub config: StringInvertedIndexConfig,
732}
733
734#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
735#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
736pub struct IntInvertedIndexType {
737 pub enabled: bool,
738 pub config: IntInvertedIndexConfig,
739}
740
741#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
742#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
743pub struct FloatInvertedIndexType {
744 pub enabled: bool,
745 pub config: FloatInvertedIndexConfig,
746}
747
748#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
749#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
750pub struct BoolInvertedIndexType {
751 pub enabled: bool,
752 pub config: BoolInvertedIndexConfig,
753}
754
755impl Schema {
756 pub fn new_default(default_knn_index: KnnIndex) -> Self {
758 let vector_config = VectorIndexType {
760 enabled: false,
761 config: VectorIndexConfig {
762 space: Some(default_space()),
763 embedding_function: None,
764 source_key: None,
765 hnsw: match default_knn_index {
766 KnnIndex::Hnsw => Some(HnswIndexConfig {
767 ef_construction: Some(default_construction_ef()),
768 max_neighbors: Some(default_m()),
769 ef_search: Some(default_search_ef()),
770 num_threads: Some(default_num_threads()),
771 batch_size: Some(default_batch_size()),
772 sync_threshold: Some(default_sync_threshold()),
773 resize_factor: Some(default_resize_factor()),
774 }),
775 KnnIndex::Spann => None,
776 },
777 spann: match default_knn_index {
778 KnnIndex::Hnsw => None,
779 KnnIndex::Spann => Some(SpannIndexConfig {
780 search_nprobe: Some(default_search_nprobe()),
781 search_rng_factor: Some(default_search_rng_factor()),
782 search_rng_epsilon: Some(default_search_rng_epsilon()),
783 nreplica_count: Some(default_nreplica_count()),
784 write_rng_factor: Some(default_write_rng_factor()),
785 write_rng_epsilon: Some(default_write_rng_epsilon()),
786 split_threshold: Some(default_split_threshold()),
787 num_samples_kmeans: Some(default_num_samples_kmeans()),
788 initial_lambda: Some(default_initial_lambda()),
789 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
790 merge_threshold: Some(default_merge_threshold()),
791 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
792 write_nprobe: Some(default_write_nprobe()),
793 ef_construction: Some(default_construction_ef_spann()),
794 ef_search: Some(default_search_ef_spann()),
795 max_neighbors: Some(default_m_spann()),
796 center_drift_threshold: None,
797 quantize: default_quantize(),
798 }),
799 },
800 },
801 };
802
803 let defaults = ValueTypes {
805 string: Some(StringValueType {
806 string_inverted_index: Some(StringInvertedIndexType {
807 enabled: true,
808 config: StringInvertedIndexConfig {},
809 }),
810 fts_index: Some(FtsIndexType {
811 enabled: false,
812 config: FtsIndexConfig {},
813 }),
814 }),
815 float: Some(FloatValueType {
816 float_inverted_index: Some(FloatInvertedIndexType {
817 enabled: true,
818 config: FloatInvertedIndexConfig {},
819 }),
820 }),
821 int: Some(IntValueType {
822 int_inverted_index: Some(IntInvertedIndexType {
823 enabled: true,
824 config: IntInvertedIndexConfig {},
825 }),
826 }),
827 boolean: Some(BoolValueType {
828 bool_inverted_index: Some(BoolInvertedIndexType {
829 enabled: true,
830 config: BoolInvertedIndexConfig {},
831 }),
832 }),
833 float_list: Some(FloatListValueType {
834 vector_index: Some(vector_config),
835 }),
836 sparse_vector: Some(SparseVectorValueType {
837 sparse_vector_index: Some(SparseVectorIndexType {
838 enabled: false,
839 config: SparseVectorIndexConfig {
840 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
841 source_key: None,
842 bm25: Some(false),
843 },
844 }),
845 }),
846 };
847
848 let mut keys = HashMap::new();
850
851 let embedding_defaults = ValueTypes {
853 float_list: Some(FloatListValueType {
854 vector_index: Some(VectorIndexType {
855 enabled: true,
856 config: VectorIndexConfig {
857 space: Some(default_space()),
858 embedding_function: None,
859 source_key: Some(DOCUMENT_KEY.to_string()),
860 hnsw: match default_knn_index {
861 KnnIndex::Hnsw => Some(HnswIndexConfig {
862 ef_construction: Some(default_construction_ef()),
863 max_neighbors: Some(default_m()),
864 ef_search: Some(default_search_ef()),
865 num_threads: Some(default_num_threads()),
866 batch_size: Some(default_batch_size()),
867 sync_threshold: Some(default_sync_threshold()),
868 resize_factor: Some(default_resize_factor()),
869 }),
870 KnnIndex::Spann => None,
871 },
872 spann: match default_knn_index {
873 KnnIndex::Hnsw => None,
874 KnnIndex::Spann => Some(SpannIndexConfig {
875 search_nprobe: Some(default_search_nprobe()),
876 search_rng_factor: Some(default_search_rng_factor()),
877 search_rng_epsilon: Some(default_search_rng_epsilon()),
878 nreplica_count: Some(default_nreplica_count()),
879 write_rng_factor: Some(default_write_rng_factor()),
880 write_rng_epsilon: Some(default_write_rng_epsilon()),
881 split_threshold: Some(default_split_threshold()),
882 num_samples_kmeans: Some(default_num_samples_kmeans()),
883 initial_lambda: Some(default_initial_lambda()),
884 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
885 merge_threshold: Some(default_merge_threshold()),
886 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
887 write_nprobe: Some(default_write_nprobe()),
888 ef_construction: Some(default_construction_ef_spann()),
889 ef_search: Some(default_search_ef_spann()),
890 max_neighbors: Some(default_m_spann()),
891 center_drift_threshold: None,
892 quantize: default_quantize(),
893 }),
894 },
895 },
896 }),
897 }),
898 ..Default::default()
899 };
900 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
901
902 let document_defaults = ValueTypes {
904 string: Some(StringValueType {
905 fts_index: Some(FtsIndexType {
906 enabled: true,
907 config: FtsIndexConfig {},
908 }),
909 string_inverted_index: Some(StringInvertedIndexType {
910 enabled: false,
911 config: StringInvertedIndexConfig {},
912 }),
913 }),
914 ..Default::default()
915 };
916 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
917
918 Schema {
919 defaults,
920 keys,
921 cmek: None,
922 source_attached_function_id: None,
923 }
924 }
925
926 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
927 let to_internal = |vector_index: &VectorIndexType| {
928 let space = vector_index.config.space.clone();
929 vector_index
930 .config
931 .spann
932 .clone()
933 .map(|config| (space.as_ref(), &config).into())
934 };
935
936 self.keys
937 .get(EMBEDDING_KEY)
938 .and_then(|value_types| value_types.float_list.as_ref())
939 .and_then(|float_list| float_list.vector_index.as_ref())
940 .and_then(to_internal)
941 .or_else(|| {
942 self.defaults
943 .float_list
944 .as_ref()
945 .and_then(|float_list| float_list.vector_index.as_ref())
946 .and_then(to_internal)
947 })
948 }
949
950 pub fn is_quantization_enabled(&self) -> bool {
952 let check_spann = |vector_index: &VectorIndexType| {
953 vector_index
954 .config
955 .spann
956 .as_ref()
957 .map(|config| config.quantize)
958 .unwrap_or(false)
959 };
960
961 self.keys
962 .get(EMBEDDING_KEY)
963 .and_then(|value_types| value_types.float_list.as_ref())
964 .and_then(|float_list| float_list.vector_index.as_ref())
965 .map(check_spann)
966 .unwrap_or_else(|| {
967 self.defaults
968 .float_list
969 .as_ref()
970 .and_then(|float_list| float_list.vector_index.as_ref())
971 .map(check_spann)
972 .unwrap_or(false)
973 })
974 }
975
976 pub fn get_spann_config_mut(&mut self) -> Option<&mut SpannIndexConfig> {
979 if let Some(value_types) = self.keys.get_mut(EMBEDDING_KEY) {
981 if let Some(float_list) = &mut value_types.float_list {
982 if let Some(vector_index) = &mut float_list.vector_index {
983 if let Some(spann_config) = &mut vector_index.config.spann {
984 return Some(spann_config);
985 }
986 }
987 }
988 }
989
990 if let Some(float_list) = &mut self.defaults.float_list {
992 if let Some(vector_index) = &mut float_list.vector_index {
993 if let Some(spann_config) = &mut vector_index.config.spann {
994 return Some(spann_config);
995 }
996 }
997 }
998
999 None
1000 }
1001
1002 pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
1003 let to_internal = |vector_index: &VectorIndexType| {
1004 if vector_index.config.spann.is_some() {
1005 return None;
1006 }
1007 let space = vector_index.config.space.as_ref();
1008 let hnsw_config = vector_index.config.hnsw.as_ref();
1009 Some((space, hnsw_config).into())
1010 };
1011
1012 self.keys
1013 .get(EMBEDDING_KEY)
1014 .and_then(|value_types| value_types.float_list.as_ref())
1015 .and_then(|float_list| float_list.vector_index.as_ref())
1016 .and_then(to_internal)
1017 .or_else(|| {
1018 self.defaults
1019 .float_list
1020 .as_ref()
1021 .and_then(|float_list| float_list.vector_index.as_ref())
1022 .and_then(to_internal)
1023 })
1024 }
1025
1026 pub fn get_internal_hnsw_config_with_legacy_fallback(
1027 &self,
1028 segment: &Segment,
1029 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
1030 if let Some(config) = self.get_internal_hnsw_config() {
1031 let config_from_metadata =
1032 InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
1033
1034 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
1035 return Ok(Some(config_from_metadata));
1036 }
1037
1038 return Ok(Some(config));
1039 }
1040
1041 Ok(None)
1042 }
1043
1044 pub fn reconcile_with_defaults(
1051 user_schema: Option<&Schema>,
1052 knn_index: KnnIndex,
1053 ) -> Result<Self, SchemaError> {
1054 let default_schema = Schema::new_default(knn_index);
1055
1056 match user_schema {
1057 Some(user) => {
1058 let merged_defaults =
1060 Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
1061
1062 let mut merged_keys = default_schema.keys.clone();
1064 for (key, user_value_types) in &user.keys {
1065 if let Some(default_value_types) = merged_keys.get(key) {
1066 let merged_value_types = Self::merge_value_types(
1068 default_value_types,
1069 user_value_types,
1070 knn_index,
1071 )?;
1072 merged_keys.insert(key.clone(), merged_value_types);
1073 } else {
1074 merged_keys.insert(key.clone(), user_value_types.clone());
1076 }
1077 }
1078
1079 Ok(Schema {
1080 defaults: merged_defaults,
1081 keys: merged_keys,
1082 cmek: user.cmek.clone().or(default_schema.cmek.clone()),
1083 source_attached_function_id: user
1084 .source_attached_function_id
1085 .clone()
1086 .or(default_schema.source_attached_function_id.clone()),
1087 })
1088 }
1089 None => Ok(default_schema),
1090 }
1091 }
1092
1093 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
1095 if self.defaults != other.defaults {
1096 return Err(SchemaError::DefaultsMismatch);
1097 }
1098
1099 let mut keys = self.keys.clone();
1100
1101 for (key, other_value_types) in &other.keys {
1102 if let Some(existing) = keys.get(key).cloned() {
1103 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
1104 keys.insert(key.clone(), merged);
1105 } else {
1106 keys.insert(key.clone(), other_value_types.clone());
1107 }
1108 }
1109
1110 Ok(Schema {
1111 defaults: self.defaults.clone(),
1112 keys,
1113 cmek: other.cmek.clone().or(self.cmek.clone()),
1114 source_attached_function_id: other
1115 .source_attached_function_id
1116 .clone()
1117 .or(self.source_attached_function_id.clone()),
1118 })
1119 }
1120
1121 fn merge_override_value_types(
1122 key: &str,
1123 left: &ValueTypes,
1124 right: &ValueTypes,
1125 ) -> Result<ValueTypes, SchemaError> {
1126 Ok(ValueTypes {
1127 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
1128 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
1129 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
1130 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
1131 float_list: Self::merge_float_list_override(
1132 key,
1133 left.float_list.as_ref(),
1134 right.float_list.as_ref(),
1135 )?,
1136 sparse_vector: Self::merge_sparse_vector_override(
1137 key,
1138 left.sparse_vector.as_ref(),
1139 right.sparse_vector.as_ref(),
1140 )?,
1141 })
1142 }
1143
1144 fn merge_string_override(
1145 key: &str,
1146 left: Option<&StringValueType>,
1147 right: Option<&StringValueType>,
1148 ) -> Result<Option<StringValueType>, SchemaError> {
1149 match (left, right) {
1150 (Some(l), Some(r)) => Ok(Some(StringValueType {
1151 string_inverted_index: Self::merge_index_or_error(
1152 l.string_inverted_index.as_ref(),
1153 r.string_inverted_index.as_ref(),
1154 &format!("key '{key}' string.string_inverted_index"),
1155 )?,
1156 fts_index: Self::merge_index_or_error(
1157 l.fts_index.as_ref(),
1158 r.fts_index.as_ref(),
1159 &format!("key '{key}' string.fts_index"),
1160 )?,
1161 })),
1162 (Some(l), None) => Ok(Some(l.clone())),
1163 (None, Some(r)) => Ok(Some(r.clone())),
1164 (None, None) => Ok(None),
1165 }
1166 }
1167
1168 fn merge_float_override(
1169 key: &str,
1170 left: Option<&FloatValueType>,
1171 right: Option<&FloatValueType>,
1172 ) -> Result<Option<FloatValueType>, SchemaError> {
1173 match (left, right) {
1174 (Some(l), Some(r)) => Ok(Some(FloatValueType {
1175 float_inverted_index: Self::merge_index_or_error(
1176 l.float_inverted_index.as_ref(),
1177 r.float_inverted_index.as_ref(),
1178 &format!("key '{key}' float.float_inverted_index"),
1179 )?,
1180 })),
1181 (Some(l), None) => Ok(Some(l.clone())),
1182 (None, Some(r)) => Ok(Some(r.clone())),
1183 (None, None) => Ok(None),
1184 }
1185 }
1186
1187 fn merge_int_override(
1188 key: &str,
1189 left: Option<&IntValueType>,
1190 right: Option<&IntValueType>,
1191 ) -> Result<Option<IntValueType>, SchemaError> {
1192 match (left, right) {
1193 (Some(l), Some(r)) => Ok(Some(IntValueType {
1194 int_inverted_index: Self::merge_index_or_error(
1195 l.int_inverted_index.as_ref(),
1196 r.int_inverted_index.as_ref(),
1197 &format!("key '{key}' int.int_inverted_index"),
1198 )?,
1199 })),
1200 (Some(l), None) => Ok(Some(l.clone())),
1201 (None, Some(r)) => Ok(Some(r.clone())),
1202 (None, None) => Ok(None),
1203 }
1204 }
1205
1206 fn merge_bool_override(
1207 key: &str,
1208 left: Option<&BoolValueType>,
1209 right: Option<&BoolValueType>,
1210 ) -> Result<Option<BoolValueType>, SchemaError> {
1211 match (left, right) {
1212 (Some(l), Some(r)) => Ok(Some(BoolValueType {
1213 bool_inverted_index: Self::merge_index_or_error(
1214 l.bool_inverted_index.as_ref(),
1215 r.bool_inverted_index.as_ref(),
1216 &format!("key '{key}' bool.bool_inverted_index"),
1217 )?,
1218 })),
1219 (Some(l), None) => Ok(Some(l.clone())),
1220 (None, Some(r)) => Ok(Some(r.clone())),
1221 (None, None) => Ok(None),
1222 }
1223 }
1224
1225 fn merge_float_list_override(
1226 key: &str,
1227 left: Option<&FloatListValueType>,
1228 right: Option<&FloatListValueType>,
1229 ) -> Result<Option<FloatListValueType>, SchemaError> {
1230 match (left, right) {
1231 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1232 vector_index: Self::merge_index_or_error(
1233 l.vector_index.as_ref(),
1234 r.vector_index.as_ref(),
1235 &format!("key '{key}' float_list.vector_index"),
1236 )?,
1237 })),
1238 (Some(l), None) => Ok(Some(l.clone())),
1239 (None, Some(r)) => Ok(Some(r.clone())),
1240 (None, None) => Ok(None),
1241 }
1242 }
1243
1244 fn merge_sparse_vector_override(
1245 key: &str,
1246 left: Option<&SparseVectorValueType>,
1247 right: Option<&SparseVectorValueType>,
1248 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1249 match (left, right) {
1250 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1251 sparse_vector_index: Self::merge_index_or_error(
1252 l.sparse_vector_index.as_ref(),
1253 r.sparse_vector_index.as_ref(),
1254 &format!("key '{key}' sparse_vector.sparse_vector_index"),
1255 )?,
1256 })),
1257 (Some(l), None) => Ok(Some(l.clone())),
1258 (None, Some(r)) => Ok(Some(r.clone())),
1259 (None, None) => Ok(None),
1260 }
1261 }
1262
1263 fn merge_index_or_error<T: Clone + PartialEq>(
1264 left: Option<&T>,
1265 right: Option<&T>,
1266 context: &str,
1267 ) -> Result<Option<T>, SchemaError> {
1268 match (left, right) {
1269 (Some(l), Some(r)) => {
1270 if l == r {
1271 Ok(Some(l.clone()))
1272 } else {
1273 Err(SchemaError::ConfigurationConflict {
1274 context: context.to_string(),
1275 })
1276 }
1277 }
1278 (Some(l), None) => Ok(Some(l.clone())),
1279 (None, Some(r)) => Ok(Some(r.clone())),
1280 (None, None) => Ok(None),
1281 }
1282 }
1283
1284 fn merge_value_types(
1287 default: &ValueTypes,
1288 user: &ValueTypes,
1289 knn_index: KnnIndex,
1290 ) -> Result<ValueTypes, SchemaError> {
1291 let float_list = Self::merge_float_list_type(
1293 default.float_list.as_ref(),
1294 user.float_list.as_ref(),
1295 knn_index,
1296 )?;
1297
1298 if let Some(ref fl) = float_list {
1300 Self::validate_float_list_value_type(fl)?;
1301 }
1302
1303 Ok(ValueTypes {
1304 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1305 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1306 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1307 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1308 float_list,
1309 sparse_vector: Self::merge_sparse_vector_type(
1310 default.sparse_vector.as_ref(),
1311 user.sparse_vector.as_ref(),
1312 )?,
1313 })
1314 }
1315
1316 fn merge_string_type(
1318 default: Option<&StringValueType>,
1319 user: Option<&StringValueType>,
1320 ) -> Result<Option<StringValueType>, SchemaError> {
1321 match (default, user) {
1322 (Some(default), Some(user)) => Ok(Some(StringValueType {
1323 string_inverted_index: Self::merge_string_inverted_index_type(
1324 default.string_inverted_index.as_ref(),
1325 user.string_inverted_index.as_ref(),
1326 )?,
1327 fts_index: Self::merge_fts_index_type(
1328 default.fts_index.as_ref(),
1329 user.fts_index.as_ref(),
1330 )?,
1331 })),
1332 (Some(default), None) => Ok(Some(default.clone())),
1333 (None, Some(user)) => Ok(Some(user.clone())),
1334 (None, None) => Ok(None),
1335 }
1336 }
1337
1338 fn merge_float_type(
1340 default: Option<&FloatValueType>,
1341 user: Option<&FloatValueType>,
1342 ) -> Result<Option<FloatValueType>, SchemaError> {
1343 match (default, user) {
1344 (Some(default), Some(user)) => Ok(Some(FloatValueType {
1345 float_inverted_index: Self::merge_float_inverted_index_type(
1346 default.float_inverted_index.as_ref(),
1347 user.float_inverted_index.as_ref(),
1348 )?,
1349 })),
1350 (Some(default), None) => Ok(Some(default.clone())),
1351 (None, Some(user)) => Ok(Some(user.clone())),
1352 (None, None) => Ok(None),
1353 }
1354 }
1355
1356 fn merge_int_type(
1358 default: Option<&IntValueType>,
1359 user: Option<&IntValueType>,
1360 ) -> Result<Option<IntValueType>, SchemaError> {
1361 match (default, user) {
1362 (Some(default), Some(user)) => Ok(Some(IntValueType {
1363 int_inverted_index: Self::merge_int_inverted_index_type(
1364 default.int_inverted_index.as_ref(),
1365 user.int_inverted_index.as_ref(),
1366 )?,
1367 })),
1368 (Some(default), None) => Ok(Some(default.clone())),
1369 (None, Some(user)) => Ok(Some(user.clone())),
1370 (None, None) => Ok(None),
1371 }
1372 }
1373
1374 fn merge_bool_type(
1376 default: Option<&BoolValueType>,
1377 user: Option<&BoolValueType>,
1378 ) -> Result<Option<BoolValueType>, SchemaError> {
1379 match (default, user) {
1380 (Some(default), Some(user)) => Ok(Some(BoolValueType {
1381 bool_inverted_index: Self::merge_bool_inverted_index_type(
1382 default.bool_inverted_index.as_ref(),
1383 user.bool_inverted_index.as_ref(),
1384 )?,
1385 })),
1386 (Some(default), None) => Ok(Some(default.clone())),
1387 (None, Some(user)) => Ok(Some(user.clone())),
1388 (None, None) => Ok(None),
1389 }
1390 }
1391
1392 fn merge_float_list_type(
1394 default: Option<&FloatListValueType>,
1395 user: Option<&FloatListValueType>,
1396 knn_index: KnnIndex,
1397 ) -> Result<Option<FloatListValueType>, SchemaError> {
1398 match (default, user) {
1399 (Some(default), Some(user)) => Ok(Some(FloatListValueType {
1400 vector_index: Self::merge_vector_index_type(
1401 default.vector_index.as_ref(),
1402 user.vector_index.as_ref(),
1403 knn_index,
1404 )?,
1405 })),
1406 (Some(default), None) => Ok(Some(default.clone())),
1407 (None, Some(user)) => Ok(Some(user.clone())),
1408 (None, None) => Ok(None),
1409 }
1410 }
1411
1412 fn merge_sparse_vector_type(
1414 default: Option<&SparseVectorValueType>,
1415 user: Option<&SparseVectorValueType>,
1416 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1417 match (default, user) {
1418 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1419 sparse_vector_index: Self::merge_sparse_vector_index_type(
1420 default.sparse_vector_index.as_ref(),
1421 user.sparse_vector_index.as_ref(),
1422 )?,
1423 })),
1424 (Some(default), None) => Ok(Some(default.clone())),
1425 (None, Some(user)) => Ok(Some(user.clone())),
1426 (None, None) => Ok(None),
1427 }
1428 }
1429
1430 fn merge_string_inverted_index_type(
1432 default: Option<&StringInvertedIndexType>,
1433 user: Option<&StringInvertedIndexType>,
1434 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1435 match (default, user) {
1436 (Some(_default), Some(user)) => {
1437 Ok(Some(StringInvertedIndexType {
1438 enabled: user.enabled, config: user.config.clone(), }))
1441 }
1442 (Some(default), None) => Ok(Some(default.clone())),
1443 (None, Some(user)) => Ok(Some(user.clone())),
1444 (None, None) => Ok(None),
1445 }
1446 }
1447
1448 fn merge_fts_index_type(
1449 default: Option<&FtsIndexType>,
1450 user: Option<&FtsIndexType>,
1451 ) -> Result<Option<FtsIndexType>, SchemaError> {
1452 match (default, user) {
1453 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1454 enabled: user.enabled,
1455 config: user.config.clone(),
1456 })),
1457 (Some(default), None) => Ok(Some(default.clone())),
1458 (None, Some(user)) => Ok(Some(user.clone())),
1459 (None, None) => Ok(None),
1460 }
1461 }
1462
1463 fn merge_float_inverted_index_type(
1464 default: Option<&FloatInvertedIndexType>,
1465 user: Option<&FloatInvertedIndexType>,
1466 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1467 match (default, user) {
1468 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1469 enabled: user.enabled,
1470 config: user.config.clone(),
1471 })),
1472 (Some(default), None) => Ok(Some(default.clone())),
1473 (None, Some(user)) => Ok(Some(user.clone())),
1474 (None, None) => Ok(None),
1475 }
1476 }
1477
1478 fn merge_int_inverted_index_type(
1479 default: Option<&IntInvertedIndexType>,
1480 user: Option<&IntInvertedIndexType>,
1481 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1482 match (default, user) {
1483 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1484 enabled: user.enabled,
1485 config: user.config.clone(),
1486 })),
1487 (Some(default), None) => Ok(Some(default.clone())),
1488 (None, Some(user)) => Ok(Some(user.clone())),
1489 (None, None) => Ok(None),
1490 }
1491 }
1492
1493 fn merge_bool_inverted_index_type(
1494 default: Option<&BoolInvertedIndexType>,
1495 user: Option<&BoolInvertedIndexType>,
1496 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1497 match (default, user) {
1498 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1499 enabled: user.enabled,
1500 config: user.config.clone(),
1501 })),
1502 (Some(default), None) => Ok(Some(default.clone())),
1503 (None, Some(user)) => Ok(Some(user.clone())),
1504 (None, None) => Ok(None),
1505 }
1506 }
1507
1508 fn merge_vector_index_type(
1509 default: Option<&VectorIndexType>,
1510 user: Option<&VectorIndexType>,
1511 knn_index: KnnIndex,
1512 ) -> Result<Option<VectorIndexType>, SchemaError> {
1513 match (default, user) {
1514 (Some(default), Some(user)) => Ok(Some(VectorIndexType {
1515 enabled: user.enabled,
1516 config: Self::merge_vector_index_config(&default.config, &user.config, knn_index)?,
1517 })),
1518 (Some(default), None) => Ok(Some(default.clone())),
1519 (None, Some(user)) => Ok(Some(user.clone())),
1520 (None, None) => Ok(None),
1521 }
1522 }
1523
1524 fn merge_sparse_vector_index_type(
1525 default: Option<&SparseVectorIndexType>,
1526 user: Option<&SparseVectorIndexType>,
1527 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1528 match (default, user) {
1529 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1530 enabled: user.enabled,
1531 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1532 })),
1533 (Some(default), None) => Ok(Some(default.clone())),
1534 (None, Some(user)) => Ok(Some(user.clone())),
1535 (None, None) => Ok(None),
1536 }
1537 }
1538
1539 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1542 if let Some(vector_index) = &float_list.vector_index {
1543 if let Some(hnsw) = &vector_index.config.hnsw {
1544 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1545 }
1546 if let Some(spann) = &vector_index.config.spann {
1547 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1548 }
1549 }
1550 Ok(())
1551 }
1552
1553 fn merge_vector_index_config(
1555 default: &VectorIndexConfig,
1556 user: &VectorIndexConfig,
1557 knn_index: KnnIndex,
1558 ) -> Result<VectorIndexConfig, SchemaError> {
1559 match knn_index {
1560 KnnIndex::Hnsw => Ok(VectorIndexConfig {
1561 space: user.space.clone().or(default.space.clone()),
1562 embedding_function: user
1563 .embedding_function
1564 .clone()
1565 .or(default.embedding_function.clone()),
1566 source_key: user.source_key.clone().or(default.source_key.clone()),
1567 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1568 spann: None,
1569 }),
1570 KnnIndex::Spann => Ok(VectorIndexConfig {
1571 space: user.space.clone().or(default.space.clone()),
1572 embedding_function: user
1573 .embedding_function
1574 .clone()
1575 .or(default.embedding_function.clone()),
1576 source_key: user.source_key.clone().or(default.source_key.clone()),
1577 hnsw: None,
1578 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref())?,
1579 }),
1580 }
1581 }
1582
1583 fn merge_sparse_vector_index_config(
1585 default: &SparseVectorIndexConfig,
1586 user: &SparseVectorIndexConfig,
1587 ) -> SparseVectorIndexConfig {
1588 SparseVectorIndexConfig {
1589 embedding_function: user
1590 .embedding_function
1591 .clone()
1592 .or(default.embedding_function.clone()),
1593 source_key: user.source_key.clone().or(default.source_key.clone()),
1594 bm25: user.bm25.or(default.bm25),
1595 }
1596 }
1597
1598 fn merge_hnsw_configs(
1600 default_hnsw: Option<&HnswIndexConfig>,
1601 user_hnsw: Option<&HnswIndexConfig>,
1602 ) -> Option<HnswIndexConfig> {
1603 match (default_hnsw, user_hnsw) {
1604 (Some(default), Some(user)) => Some(HnswIndexConfig {
1605 ef_construction: user.ef_construction.or(default.ef_construction),
1606 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1607 ef_search: user.ef_search.or(default.ef_search),
1608 num_threads: user.num_threads.or(default.num_threads),
1609 batch_size: user.batch_size.or(default.batch_size),
1610 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1611 resize_factor: user.resize_factor.or(default.resize_factor),
1612 }),
1613 (Some(default), None) => Some(default.clone()),
1614 (None, Some(user)) => Some(user.clone()),
1615 (None, None) => None,
1616 }
1617 }
1618
1619 fn merge_spann_configs(
1621 default_spann: Option<&SpannIndexConfig>,
1622 user_spann: Option<&SpannIndexConfig>,
1623 ) -> Result<Option<SpannIndexConfig>, SchemaError> {
1624 match (default_spann, user_spann) {
1625 (Some(default), Some(user)) => {
1626 if user.quantize != default_quantize() || default.quantize != default_quantize() {
1628 return Err(SchemaError::InvalidUserInput {
1629 reason: "quantize field cannot be set to true in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1630 });
1631 }
1632 Ok(Some(SpannIndexConfig {
1633 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1634 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1635 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1636 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1637 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1638 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1639 split_threshold: user.split_threshold.or(default.split_threshold),
1640 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1641 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1642 reassign_neighbor_count: user
1643 .reassign_neighbor_count
1644 .or(default.reassign_neighbor_count),
1645 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1646 num_centers_to_merge_to: user
1647 .num_centers_to_merge_to
1648 .or(default.num_centers_to_merge_to),
1649 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1650 ef_construction: user.ef_construction.or(default.ef_construction),
1651 ef_search: user.ef_search.or(default.ef_search),
1652 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1653 center_drift_threshold: user
1654 .center_drift_threshold
1655 .or(default.center_drift_threshold),
1656 quantize: default_quantize(), }))
1658 }
1659 (Some(default), None) => {
1660 if default.quantize != default_quantize() {
1662 return Err(SchemaError::InvalidUserInput {
1663 reason: "quantize field cannot be set to true in default schema. Quantization can only be enabled via frontend configuration.".to_string(),
1664 });
1665 }
1666 Ok(Some(default.clone()))
1667 }
1668 (None, Some(user)) => {
1669 if user.quantize != default_quantize() {
1671 return Err(SchemaError::InvalidUserInput {
1672 reason: "quantize field cannot be set to true in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1673 });
1674 }
1675 Ok(Some(user.clone()))
1676 }
1677 (None, None) => Ok(None),
1678 }
1679 }
1680
1681 pub fn reconcile_with_collection_config(
1689 schema: &Schema,
1690 collection_config: &InternalCollectionConfiguration,
1691 default_knn_index: KnnIndex,
1692 ) -> Result<Schema, SchemaError> {
1693 if collection_config.is_default() {
1695 if schema.is_default() {
1696 let mut new_schema = Schema::new_default(default_knn_index);
1699
1700 if collection_config.embedding_function.is_some() {
1701 if let Some(float_list) = &mut new_schema.defaults.float_list {
1702 if let Some(vector_index) = &mut float_list.vector_index {
1703 vector_index.config.embedding_function =
1704 collection_config.embedding_function.clone();
1705 }
1706 }
1707 if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1708 if let Some(float_list) = &mut embedding_types.float_list {
1709 if let Some(vector_index) = &mut float_list.vector_index {
1710 vector_index.config.embedding_function =
1711 collection_config.embedding_function.clone();
1712 }
1713 }
1714 }
1715 }
1716 return Ok(new_schema);
1717 } else {
1718 return Ok(schema.clone());
1720 }
1721 }
1722
1723 Self::try_from(collection_config)
1726 }
1727
1728 pub fn reconcile_schema_and_config(
1729 schema: Option<&Schema>,
1730 configuration: Option<&InternalCollectionConfiguration>,
1731 knn_index: KnnIndex,
1732 ) -> Result<Schema, SchemaError> {
1733 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1735 if !user_schema.is_default() && !config.is_default() {
1736 return Err(SchemaError::ConfigAndSchemaConflict);
1737 }
1738 }
1739
1740 let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1741 if let Some(config) = configuration {
1742 Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1743 } else {
1744 Ok(reconciled_schema)
1745 }
1746 }
1747
1748 pub fn default_with_embedding_function(
1749 embedding_function: EmbeddingFunctionConfiguration,
1750 ) -> Schema {
1751 let mut schema = Schema::new_default(KnnIndex::Spann);
1752 if let Some(float_list) = &mut schema.defaults.float_list {
1753 if let Some(vector_index) = &mut float_list.vector_index {
1754 vector_index.config.embedding_function = Some(embedding_function.clone());
1755 }
1756 }
1757 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1758 if let Some(float_list) = &mut embedding_types.float_list {
1759 if let Some(vector_index) = &mut float_list.vector_index {
1760 vector_index.config.embedding_function = Some(embedding_function);
1761 }
1762 }
1763 }
1764 schema
1765 }
1766
1767 pub fn is_default(&self) -> bool {
1769 if !Self::is_value_types_default(&self.defaults) {
1771 return false;
1772 }
1773
1774 for key in self.keys.keys() {
1775 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1776 return false;
1777 }
1778 }
1779
1780 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1782 if !Self::is_embedding_value_types_default(embedding_value) {
1783 return false;
1784 }
1785 }
1786
1787 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1789 if !Self::is_document_value_types_default(document_value) {
1790 return false;
1791 }
1792 }
1793
1794 if self.cmek.is_some() {
1796 return false;
1797 }
1798
1799 true
1800 }
1801
1802 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1804 if let Some(string) = &value_types.string {
1806 if let Some(string_inverted) = &string.string_inverted_index {
1807 if !string_inverted.enabled {
1808 return false;
1809 }
1810 }
1812 if let Some(fts) = &string.fts_index {
1813 if fts.enabled {
1814 return false;
1815 }
1816 }
1818 }
1819
1820 if let Some(float) = &value_types.float {
1822 if let Some(float_inverted) = &float.float_inverted_index {
1823 if !float_inverted.enabled {
1824 return false;
1825 }
1826 }
1828 }
1829
1830 if let Some(int) = &value_types.int {
1832 if let Some(int_inverted) = &int.int_inverted_index {
1833 if !int_inverted.enabled {
1834 return false;
1835 }
1836 }
1838 }
1839
1840 if let Some(boolean) = &value_types.boolean {
1842 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1843 if !bool_inverted.enabled {
1844 return false;
1845 }
1846 }
1848 }
1849
1850 if let Some(float_list) = &value_types.float_list {
1852 if let Some(vector_index) = &float_list.vector_index {
1853 if vector_index.enabled {
1854 return false;
1855 }
1856 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1857 return false;
1858 }
1859 if !is_space_default(&vector_index.config.space) {
1860 return false;
1861 }
1862 if vector_index.config.source_key.is_some() {
1864 return false;
1865 }
1866 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1869 (Some(hnsw_config), None) => {
1870 if !hnsw_config.is_default() {
1871 return false;
1872 }
1873 }
1874 (None, Some(spann_config)) => {
1875 if !spann_config.is_default() {
1876 return false;
1877 }
1878 }
1879 (Some(_), Some(_)) => return false, (None, None) => {}
1881 }
1882 }
1883 }
1884
1885 if let Some(sparse_vector) = &value_types.sparse_vector {
1887 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1888 if sparse_index.enabled {
1889 return false;
1890 }
1891 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1893 return false;
1894 }
1895 if sparse_index.config.source_key.is_some() {
1896 return false;
1897 }
1898 if let Some(bm25) = &sparse_index.config.bm25 {
1899 if bm25 != &false {
1900 return false;
1901 }
1902 }
1903 }
1904 }
1905
1906 true
1907 }
1908
1909 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1911 if value_types.string.is_some()
1913 || value_types.float.is_some()
1914 || value_types.int.is_some()
1915 || value_types.boolean.is_some()
1916 || value_types.sparse_vector.is_some()
1917 {
1918 return false;
1919 }
1920
1921 if let Some(float_list) = &value_types.float_list {
1923 if let Some(vector_index) = &float_list.vector_index {
1924 if !vector_index.enabled {
1925 return false;
1926 }
1927 if !is_space_default(&vector_index.config.space) {
1928 return false;
1929 }
1930 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1932 return false;
1933 }
1934 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1936 return false;
1937 }
1938 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1941 (Some(hnsw_config), None) => {
1942 if !hnsw_config.is_default() {
1943 return false;
1944 }
1945 }
1946 (None, Some(spann_config)) => {
1947 if !spann_config.is_default() {
1948 return false;
1949 }
1950 }
1951 (Some(_), Some(_)) => return false, (None, None) => {}
1953 }
1954 }
1955 }
1956
1957 true
1958 }
1959
1960 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1962 if value_types.float_list.is_some()
1964 || value_types.float.is_some()
1965 || value_types.int.is_some()
1966 || value_types.boolean.is_some()
1967 || value_types.sparse_vector.is_some()
1968 {
1969 return false;
1970 }
1971
1972 if let Some(string) = &value_types.string {
1974 if let Some(fts) = &string.fts_index {
1975 if !fts.enabled {
1976 return false;
1977 }
1978 }
1980 if let Some(string_inverted) = &string.string_inverted_index {
1981 if string_inverted.enabled {
1982 return false;
1983 }
1984 }
1986 }
1987
1988 true
1989 }
1990
1991 pub fn is_metadata_type_index_enabled(
1993 &self,
1994 key: &str,
1995 value_type: MetadataValueType,
1996 ) -> Result<bool, SchemaError> {
1997 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1998
1999 match value_type {
2000 MetadataValueType::Bool => match &v_type.boolean {
2001 Some(bool_type) => match &bool_type.bool_inverted_index {
2002 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2003 None => Err(SchemaError::MissingIndexConfiguration {
2004 key: key.to_string(),
2005 value_type: "bool".to_string(),
2006 }),
2007 },
2008 None => match &self.defaults.boolean {
2009 Some(bool_type) => match &bool_type.bool_inverted_index {
2010 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2011 None => Err(SchemaError::MissingIndexConfiguration {
2012 key: key.to_string(),
2013 value_type: "bool".to_string(),
2014 }),
2015 },
2016 None => Err(SchemaError::MissingIndexConfiguration {
2017 key: key.to_string(),
2018 value_type: "bool".to_string(),
2019 }),
2020 },
2021 },
2022 MetadataValueType::Int => match &v_type.int {
2023 Some(int_type) => match &int_type.int_inverted_index {
2024 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2025 None => Err(SchemaError::MissingIndexConfiguration {
2026 key: key.to_string(),
2027 value_type: "int".to_string(),
2028 }),
2029 },
2030 None => match &self.defaults.int {
2031 Some(int_type) => match &int_type.int_inverted_index {
2032 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2033 None => Err(SchemaError::MissingIndexConfiguration {
2034 key: key.to_string(),
2035 value_type: "int".to_string(),
2036 }),
2037 },
2038 None => Err(SchemaError::MissingIndexConfiguration {
2039 key: key.to_string(),
2040 value_type: "int".to_string(),
2041 }),
2042 },
2043 },
2044 MetadataValueType::Float => match &v_type.float {
2045 Some(float_type) => match &float_type.float_inverted_index {
2046 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2047 None => Err(SchemaError::MissingIndexConfiguration {
2048 key: key.to_string(),
2049 value_type: "float".to_string(),
2050 }),
2051 },
2052 None => match &self.defaults.float {
2053 Some(float_type) => match &float_type.float_inverted_index {
2054 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2055 None => Err(SchemaError::MissingIndexConfiguration {
2056 key: key.to_string(),
2057 value_type: "float".to_string(),
2058 }),
2059 },
2060 None => Err(SchemaError::MissingIndexConfiguration {
2061 key: key.to_string(),
2062 value_type: "float".to_string(),
2063 }),
2064 },
2065 },
2066 MetadataValueType::Str => match &v_type.string {
2067 Some(string_type) => match &string_type.string_inverted_index {
2068 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2069 None => Err(SchemaError::MissingIndexConfiguration {
2070 key: key.to_string(),
2071 value_type: "string".to_string(),
2072 }),
2073 },
2074 None => match &self.defaults.string {
2075 Some(string_type) => match &string_type.string_inverted_index {
2076 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2077 None => Err(SchemaError::MissingIndexConfiguration {
2078 key: key.to_string(),
2079 value_type: "string".to_string(),
2080 }),
2081 },
2082 None => Err(SchemaError::MissingIndexConfiguration {
2083 key: key.to_string(),
2084 value_type: "string".to_string(),
2085 }),
2086 },
2087 },
2088 MetadataValueType::SparseVector => match &v_type.sparse_vector {
2089 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2090 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2091 None => Err(SchemaError::MissingIndexConfiguration {
2092 key: key.to_string(),
2093 value_type: "sparse_vector".to_string(),
2094 }),
2095 },
2096 None => match &self.defaults.sparse_vector {
2097 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2098 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2099 None => Err(SchemaError::MissingIndexConfiguration {
2100 key: key.to_string(),
2101 value_type: "sparse_vector".to_string(),
2102 }),
2103 },
2104 None => Err(SchemaError::MissingIndexConfiguration {
2105 key: key.to_string(),
2106 value_type: "sparse_vector".to_string(),
2107 }),
2108 },
2109 },
2110 MetadataValueType::BoolArray => {
2112 self.is_metadata_type_index_enabled(key, MetadataValueType::Bool)
2113 }
2114 MetadataValueType::IntArray => {
2115 self.is_metadata_type_index_enabled(key, MetadataValueType::Int)
2116 }
2117 MetadataValueType::FloatArray => {
2118 self.is_metadata_type_index_enabled(key, MetadataValueType::Float)
2119 }
2120 MetadataValueType::StringArray => {
2121 self.is_metadata_type_index_enabled(key, MetadataValueType::Str)
2122 }
2123 }
2124 }
2125
2126 pub fn is_metadata_where_indexing_enabled(
2127 &self,
2128 where_clause: &Where,
2129 ) -> Result<(), FilterValidationError> {
2130 match where_clause {
2131 Where::Composite(composite) => {
2132 for child in &composite.children {
2133 self.is_metadata_where_indexing_enabled(child)?;
2134 }
2135 Ok(())
2136 }
2137 Where::Document(_) => Ok(()),
2138 Where::Metadata(expression) => {
2139 let value_type = match &expression.comparison {
2140 MetadataComparison::Primitive(_, value) => value.value_type(),
2141 MetadataComparison::Set(_, set_value) => set_value.value_type(),
2142 MetadataComparison::ArrayContains(_, value) => value.value_type(),
2143 };
2144 let is_enabled = self
2145 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
2146 .map_err(FilterValidationError::Schema)?;
2147 if !is_enabled {
2148 return Err(FilterValidationError::IndexingDisabled {
2149 key: expression.key.clone(),
2150 value_type,
2151 });
2152 }
2153 Ok(())
2154 }
2155 }
2156 }
2157
2158 pub fn is_knn_key_indexing_enabled(
2159 &self,
2160 key: &str,
2161 query: &QueryVector,
2162 ) -> Result<(), FilterValidationError> {
2163 match query {
2164 QueryVector::Sparse(_) => {
2165 let is_enabled = self
2166 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
2167 .map_err(FilterValidationError::Schema)?;
2168 if !is_enabled {
2169 return Err(FilterValidationError::IndexingDisabled {
2170 key: key.to_string(),
2171 value_type: MetadataValueType::SparseVector,
2172 });
2173 }
2174 Ok(())
2175 }
2176 QueryVector::Dense(_) => {
2177 Ok(())
2180 }
2181 }
2182 }
2183
2184 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2185 if key.starts_with(CHROMA_KEY) {
2186 return false;
2187 }
2188 let value_types = self.keys.entry(key.to_string()).or_default();
2189 match value_type {
2190 MetadataValueType::Bool => {
2191 if value_types.boolean.is_none() {
2192 value_types.boolean = self.defaults.boolean.clone();
2193 return true;
2194 }
2195 }
2196 MetadataValueType::Int => {
2197 if value_types.int.is_none() {
2198 value_types.int = self.defaults.int.clone();
2199 return true;
2200 }
2201 }
2202 MetadataValueType::Float => {
2203 if value_types.float.is_none() {
2204 value_types.float = self.defaults.float.clone();
2205 return true;
2206 }
2207 }
2208 MetadataValueType::Str => {
2209 if value_types.string.is_none() {
2210 value_types.string = self.defaults.string.clone();
2211 return true;
2212 }
2213 }
2214 MetadataValueType::SparseVector => {
2215 if value_types.sparse_vector.is_none() {
2216 value_types.sparse_vector = self.defaults.sparse_vector.clone();
2217 return true;
2218 }
2219 }
2220 MetadataValueType::BoolArray => {
2222 if value_types.boolean.is_none() {
2223 value_types.boolean = self.defaults.boolean.clone();
2224 return true;
2225 }
2226 }
2227 MetadataValueType::IntArray => {
2228 if value_types.int.is_none() {
2229 value_types.int = self.defaults.int.clone();
2230 return true;
2231 }
2232 }
2233 MetadataValueType::FloatArray => {
2234 if value_types.float.is_none() {
2235 value_types.float = self.defaults.float.clone();
2236 return true;
2237 }
2238 }
2239 MetadataValueType::StringArray => {
2240 if value_types.string.is_none() {
2241 value_types.string = self.defaults.string.clone();
2242 return true;
2243 }
2244 }
2245 }
2246 false
2247 }
2248
2249 pub fn create_index(
2289 mut self,
2290 key: Option<&str>,
2291 config: IndexConfig,
2292 ) -> Result<Self, SchemaBuilderError> {
2293 match (&key, &config) {
2295 (None, IndexConfig::Vector(cfg)) => {
2296 self._set_vector_index_config_builder(cfg.clone());
2297 return Ok(self);
2298 }
2299 (None, IndexConfig::Fts(cfg)) => {
2300 self._set_fts_index_config_builder(cfg.clone());
2301 return Ok(self);
2302 }
2303 (Some(k), IndexConfig::Vector(_)) => {
2304 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2305 }
2306 (Some(k), IndexConfig::Fts(_)) => {
2307 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2308 }
2309 _ => {}
2310 }
2311
2312 if let Some(k) = key {
2314 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2315 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2316 key: k.to_string(),
2317 });
2318 }
2319 }
2320
2321 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2323 return Err(SchemaBuilderError::SparseVectorRequiresKey);
2324 }
2325
2326 match key {
2328 Some(k) => self._set_index_for_key_builder(k, config, true)?,
2329 None => self._set_index_in_defaults_builder(config, true)?,
2330 }
2331
2332 Ok(self)
2333 }
2334
2335 pub fn delete_index(
2363 mut self,
2364 key: Option<&str>,
2365 config: IndexConfig,
2366 ) -> Result<Self, SchemaBuilderError> {
2367 if let Some(k) = key {
2369 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2370 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2371 key: k.to_string(),
2372 });
2373 }
2374 }
2375
2376 match &config {
2378 IndexConfig::Vector(_) => {
2379 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2380 }
2381 IndexConfig::Fts(_) => {
2382 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2383 }
2384 IndexConfig::SparseVector(_) => {
2385 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2386 }
2387 _ => {}
2388 }
2389
2390 match key {
2392 Some(k) => self._set_index_for_key_builder(k, config, false)?,
2393 None => self._set_index_in_defaults_builder(config, false)?,
2394 }
2395
2396 Ok(self)
2397 }
2398
2399 pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2417 self.cmek = Some(cmek);
2418 self
2419 }
2420
2421 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2423 if let Some(float_list) = &mut self.defaults.float_list {
2425 if let Some(vector_index) = &mut float_list.vector_index {
2426 vector_index.config = config.clone();
2427 }
2428 }
2429
2430 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2432 if let Some(float_list) = &mut embedding_types.float_list {
2433 if let Some(vector_index) = &mut float_list.vector_index {
2434 let mut updated_config = config;
2435 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2437 vector_index.config = updated_config;
2438 }
2439 }
2440 }
2441 }
2442
2443 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2445 if let Some(string) = &mut self.defaults.string {
2447 if let Some(fts_index) = &mut string.fts_index {
2448 fts_index.config = config.clone();
2449 }
2450 }
2451
2452 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2454 if let Some(string) = &mut document_types.string {
2455 if let Some(fts_index) = &mut string.fts_index {
2456 fts_index.config = config;
2457 }
2458 }
2459 }
2460 }
2461
2462 fn _set_index_for_key_builder(
2464 &mut self,
2465 key: &str,
2466 config: IndexConfig,
2467 enabled: bool,
2468 ) -> Result<(), SchemaBuilderError> {
2469 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2471 let existing_key = self
2473 .keys
2474 .iter()
2475 .find(|(k, v)| {
2476 k.as_str() != key
2477 && v.sparse_vector
2478 .as_ref()
2479 .and_then(|sv| sv.sparse_vector_index.as_ref())
2480 .map(|idx| idx.enabled)
2481 .unwrap_or(false)
2482 })
2483 .map(|(k, _)| k.clone());
2484
2485 if let Some(existing_key) = existing_key {
2486 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2487 }
2488 }
2489
2490 let value_types = self.keys.entry(key.to_string()).or_default();
2492
2493 match config {
2495 IndexConfig::Vector(_) => {
2496 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2497 key: key.to_string(),
2498 });
2499 }
2500 IndexConfig::Fts(_) => {
2501 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2502 key: key.to_string(),
2503 });
2504 }
2505 IndexConfig::SparseVector(cfg) => {
2506 value_types.sparse_vector = Some(SparseVectorValueType {
2507 sparse_vector_index: Some(SparseVectorIndexType {
2508 enabled,
2509 config: cfg,
2510 }),
2511 });
2512 }
2513 IndexConfig::StringInverted(cfg) => {
2514 if value_types.string.is_none() {
2515 value_types.string = Some(StringValueType {
2516 fts_index: None,
2517 string_inverted_index: None,
2518 });
2519 }
2520 if let Some(string) = &mut value_types.string {
2521 string.string_inverted_index = Some(StringInvertedIndexType {
2522 enabled,
2523 config: cfg,
2524 });
2525 }
2526 }
2527 IndexConfig::IntInverted(cfg) => {
2528 value_types.int = Some(IntValueType {
2529 int_inverted_index: Some(IntInvertedIndexType {
2530 enabled,
2531 config: cfg,
2532 }),
2533 });
2534 }
2535 IndexConfig::FloatInverted(cfg) => {
2536 value_types.float = Some(FloatValueType {
2537 float_inverted_index: Some(FloatInvertedIndexType {
2538 enabled,
2539 config: cfg,
2540 }),
2541 });
2542 }
2543 IndexConfig::BoolInverted(cfg) => {
2544 value_types.boolean = Some(BoolValueType {
2545 bool_inverted_index: Some(BoolInvertedIndexType {
2546 enabled,
2547 config: cfg,
2548 }),
2549 });
2550 }
2551 }
2552
2553 Ok(())
2554 }
2555
2556 fn _set_index_in_defaults_builder(
2558 &mut self,
2559 config: IndexConfig,
2560 enabled: bool,
2561 ) -> Result<(), SchemaBuilderError> {
2562 match config {
2563 IndexConfig::Vector(_) => {
2564 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2565 key: "defaults".to_string(),
2566 });
2567 }
2568 IndexConfig::Fts(_) => {
2569 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2570 key: "defaults".to_string(),
2571 });
2572 }
2573 IndexConfig::SparseVector(cfg) => {
2574 self.defaults.sparse_vector = Some(SparseVectorValueType {
2575 sparse_vector_index: Some(SparseVectorIndexType {
2576 enabled,
2577 config: cfg,
2578 }),
2579 });
2580 }
2581 IndexConfig::StringInverted(cfg) => {
2582 if self.defaults.string.is_none() {
2583 self.defaults.string = Some(StringValueType {
2584 fts_index: None,
2585 string_inverted_index: None,
2586 });
2587 }
2588 if let Some(string) = &mut self.defaults.string {
2589 string.string_inverted_index = Some(StringInvertedIndexType {
2590 enabled,
2591 config: cfg,
2592 });
2593 }
2594 }
2595 IndexConfig::IntInverted(cfg) => {
2596 self.defaults.int = Some(IntValueType {
2597 int_inverted_index: Some(IntInvertedIndexType {
2598 enabled,
2599 config: cfg,
2600 }),
2601 });
2602 }
2603 IndexConfig::FloatInverted(cfg) => {
2604 self.defaults.float = Some(FloatValueType {
2605 float_inverted_index: Some(FloatInvertedIndexType {
2606 enabled,
2607 config: cfg,
2608 }),
2609 });
2610 }
2611 IndexConfig::BoolInverted(cfg) => {
2612 self.defaults.boolean = Some(BoolValueType {
2613 bool_inverted_index: Some(BoolInvertedIndexType {
2614 enabled,
2615 config: cfg,
2616 }),
2617 });
2618 }
2619 }
2620
2621 Ok(())
2622 }
2623}
2624
2625#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2630#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2631#[serde(deny_unknown_fields)]
2632pub struct VectorIndexConfig {
2633 #[serde(skip_serializing_if = "Option::is_none")]
2635 pub space: Option<Space>,
2636 #[serde(skip_serializing_if = "Option::is_none")]
2638 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2639 #[serde(skip_serializing_if = "Option::is_none")]
2641 pub source_key: Option<String>,
2642 #[serde(skip_serializing_if = "Option::is_none")]
2644 pub hnsw: Option<HnswIndexConfig>,
2645 #[serde(skip_serializing_if = "Option::is_none")]
2647 pub spann: Option<SpannIndexConfig>,
2648}
2649
2650#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2652#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2653#[serde(deny_unknown_fields)]
2654pub struct HnswIndexConfig {
2655 #[serde(skip_serializing_if = "Option::is_none")]
2656 pub ef_construction: Option<usize>,
2657 #[serde(skip_serializing_if = "Option::is_none")]
2658 pub max_neighbors: Option<usize>,
2659 #[serde(skip_serializing_if = "Option::is_none")]
2660 pub ef_search: Option<usize>,
2661 #[serde(skip_serializing_if = "Option::is_none")]
2662 pub num_threads: Option<usize>,
2663 #[serde(skip_serializing_if = "Option::is_none")]
2664 #[validate(range(min = 2))]
2665 pub batch_size: Option<usize>,
2666 #[serde(skip_serializing_if = "Option::is_none")]
2667 #[validate(range(min = 2))]
2668 pub sync_threshold: Option<usize>,
2669 #[serde(skip_serializing_if = "Option::is_none")]
2670 pub resize_factor: Option<f64>,
2671}
2672
2673impl HnswIndexConfig {
2674 pub fn is_default(&self) -> bool {
2678 if let Some(ef_construction) = self.ef_construction {
2679 if ef_construction != default_construction_ef() {
2680 return false;
2681 }
2682 }
2683 if let Some(max_neighbors) = self.max_neighbors {
2684 if max_neighbors != default_m() {
2685 return false;
2686 }
2687 }
2688 if let Some(ef_search) = self.ef_search {
2689 if ef_search != default_search_ef() {
2690 return false;
2691 }
2692 }
2693 if let Some(batch_size) = self.batch_size {
2694 if batch_size != default_batch_size() {
2695 return false;
2696 }
2697 }
2698 if let Some(sync_threshold) = self.sync_threshold {
2699 if sync_threshold != default_sync_threshold() {
2700 return false;
2701 }
2702 }
2703 if let Some(resize_factor) = self.resize_factor {
2704 if resize_factor != default_resize_factor() {
2705 return false;
2706 }
2707 }
2708 true
2710 }
2711}
2712
2713#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2715#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2716#[serde(deny_unknown_fields)]
2717pub struct SpannIndexConfig {
2718 #[serde(skip_serializing_if = "Option::is_none")]
2719 #[validate(range(max = 128))]
2720 pub search_nprobe: Option<u32>,
2721 #[serde(skip_serializing_if = "Option::is_none")]
2722 #[validate(range(min = 1.0, max = 1.0))]
2723 pub search_rng_factor: Option<f32>,
2724 #[serde(skip_serializing_if = "Option::is_none")]
2725 #[validate(range(min = 5.0, max = 10.0))]
2726 pub search_rng_epsilon: Option<f32>,
2727 #[serde(skip_serializing_if = "Option::is_none")]
2728 #[validate(range(max = 8))]
2729 pub nreplica_count: Option<u32>,
2730 #[serde(skip_serializing_if = "Option::is_none")]
2731 #[validate(range(min = 1.0, max = 1.0))]
2732 pub write_rng_factor: Option<f32>,
2733 #[serde(skip_serializing_if = "Option::is_none")]
2734 #[validate(range(min = 5.0, max = 10.0))]
2735 pub write_rng_epsilon: Option<f32>,
2736 #[serde(skip_serializing_if = "Option::is_none")]
2737 #[validate(range(min = 50, max = 200))]
2738 pub split_threshold: Option<u32>,
2739 #[serde(skip_serializing_if = "Option::is_none")]
2740 #[validate(range(max = 1000))]
2741 pub num_samples_kmeans: Option<usize>,
2742 #[serde(skip_serializing_if = "Option::is_none")]
2743 #[validate(range(min = 100.0, max = 100.0))]
2744 pub initial_lambda: Option<f32>,
2745 #[serde(skip_serializing_if = "Option::is_none")]
2746 #[validate(range(max = 64))]
2747 pub reassign_neighbor_count: Option<u32>,
2748 #[serde(skip_serializing_if = "Option::is_none")]
2749 #[validate(range(min = 25, max = 100))]
2750 pub merge_threshold: Option<u32>,
2751 #[serde(skip_serializing_if = "Option::is_none")]
2752 #[validate(range(max = 8))]
2753 pub num_centers_to_merge_to: Option<u32>,
2754 #[serde(skip_serializing_if = "Option::is_none")]
2755 #[validate(range(max = 64))]
2756 pub write_nprobe: Option<u32>,
2757 #[serde(skip_serializing_if = "Option::is_none")]
2758 #[validate(range(max = 200))]
2759 pub ef_construction: Option<usize>,
2760 #[serde(skip_serializing_if = "Option::is_none")]
2761 #[validate(range(max = 200))]
2762 pub ef_search: Option<usize>,
2763 #[serde(skip_serializing_if = "Option::is_none")]
2764 #[validate(range(max = 64))]
2765 pub max_neighbors: Option<usize>,
2766 #[serde(skip_serializing_if = "Option::is_none")]
2767 #[validate(range(min = 0.1, max = 1.0))]
2768 pub center_drift_threshold: Option<f32>,
2769 #[serde(default = "default_quantize", skip_serializing_if = "is_false")]
2771 pub quantize: bool,
2772}
2773
2774fn is_false(v: &bool) -> bool {
2775 !*v
2776}
2777
2778impl SpannIndexConfig {
2779 pub fn is_default(&self) -> bool {
2782 if let Some(search_nprobe) = self.search_nprobe {
2783 if search_nprobe != default_search_nprobe() {
2784 return false;
2785 }
2786 }
2787 if let Some(search_rng_factor) = self.search_rng_factor {
2788 if search_rng_factor != default_search_rng_factor() {
2789 return false;
2790 }
2791 }
2792 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2793 if search_rng_epsilon != default_search_rng_epsilon() {
2794 return false;
2795 }
2796 }
2797 if let Some(nreplica_count) = self.nreplica_count {
2798 if nreplica_count != default_nreplica_count() {
2799 return false;
2800 }
2801 }
2802 if let Some(write_rng_factor) = self.write_rng_factor {
2803 if write_rng_factor != default_write_rng_factor() {
2804 return false;
2805 }
2806 }
2807 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2808 if write_rng_epsilon != default_write_rng_epsilon() {
2809 return false;
2810 }
2811 }
2812 if let Some(split_threshold) = self.split_threshold {
2813 if split_threshold != default_split_threshold() {
2814 return false;
2815 }
2816 }
2817 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2818 if num_samples_kmeans != default_num_samples_kmeans() {
2819 return false;
2820 }
2821 }
2822 if let Some(initial_lambda) = self.initial_lambda {
2823 if initial_lambda != default_initial_lambda() {
2824 return false;
2825 }
2826 }
2827 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2828 if reassign_neighbor_count != default_reassign_neighbor_count() {
2829 return false;
2830 }
2831 }
2832 if let Some(merge_threshold) = self.merge_threshold {
2833 if merge_threshold != default_merge_threshold() {
2834 return false;
2835 }
2836 }
2837 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2838 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2839 return false;
2840 }
2841 }
2842 if let Some(write_nprobe) = self.write_nprobe {
2843 if write_nprobe != default_write_nprobe() {
2844 return false;
2845 }
2846 }
2847 if let Some(ef_construction) = self.ef_construction {
2848 if ef_construction != default_construction_ef_spann() {
2849 return false;
2850 }
2851 }
2852 if let Some(ef_search) = self.ef_search {
2853 if ef_search != default_search_ef_spann() {
2854 return false;
2855 }
2856 }
2857 if let Some(max_neighbors) = self.max_neighbors {
2858 if max_neighbors != default_m_spann() {
2859 return false;
2860 }
2861 }
2862 if let Some(center_drift_threshold) = self.center_drift_threshold {
2863 if center_drift_threshold != default_center_drift_threshold() {
2864 return false;
2865 }
2866 }
2867 if self.quantize != default_quantize() {
2868 return false;
2869 }
2870 true
2871 }
2872}
2873
2874#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2875#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2876#[serde(deny_unknown_fields)]
2877pub struct SparseVectorIndexConfig {
2878 #[serde(skip_serializing_if = "Option::is_none")]
2880 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2881 #[serde(skip_serializing_if = "Option::is_none")]
2883 pub source_key: Option<String>,
2884 #[serde(skip_serializing_if = "Option::is_none")]
2886 pub bm25: Option<bool>,
2887}
2888
2889#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2890#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2891#[serde(deny_unknown_fields)]
2892pub struct FtsIndexConfig {
2893 }
2895
2896#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2897#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2898#[serde(deny_unknown_fields)]
2899pub struct StringInvertedIndexConfig {
2900 }
2902
2903#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2904#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2905#[serde(deny_unknown_fields)]
2906pub struct IntInvertedIndexConfig {
2907 }
2909
2910#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2911#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2912#[serde(deny_unknown_fields)]
2913pub struct FloatInvertedIndexConfig {
2914 }
2916
2917#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2918#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2919#[serde(deny_unknown_fields)]
2920pub struct BoolInvertedIndexConfig {
2921 }
2923
2924#[derive(Clone, Debug)]
2930#[allow(clippy::large_enum_variant)]
2931pub enum IndexConfig {
2932 Vector(VectorIndexConfig),
2933 SparseVector(SparseVectorIndexConfig),
2934 Fts(FtsIndexConfig),
2935 StringInverted(StringInvertedIndexConfig),
2936 IntInverted(IntInvertedIndexConfig),
2937 FloatInverted(FloatInvertedIndexConfig),
2938 BoolInverted(BoolInvertedIndexConfig),
2939}
2940
2941impl From<VectorIndexConfig> for IndexConfig {
2943 fn from(config: VectorIndexConfig) -> Self {
2944 IndexConfig::Vector(config)
2945 }
2946}
2947
2948impl From<SparseVectorIndexConfig> for IndexConfig {
2949 fn from(config: SparseVectorIndexConfig) -> Self {
2950 IndexConfig::SparseVector(config)
2951 }
2952}
2953
2954impl From<FtsIndexConfig> for IndexConfig {
2955 fn from(config: FtsIndexConfig) -> Self {
2956 IndexConfig::Fts(config)
2957 }
2958}
2959
2960impl From<StringInvertedIndexConfig> for IndexConfig {
2961 fn from(config: StringInvertedIndexConfig) -> Self {
2962 IndexConfig::StringInverted(config)
2963 }
2964}
2965
2966impl From<IntInvertedIndexConfig> for IndexConfig {
2967 fn from(config: IntInvertedIndexConfig) -> Self {
2968 IndexConfig::IntInverted(config)
2969 }
2970}
2971
2972impl From<FloatInvertedIndexConfig> for IndexConfig {
2973 fn from(config: FloatInvertedIndexConfig) -> Self {
2974 IndexConfig::FloatInverted(config)
2975 }
2976}
2977
2978impl From<BoolInvertedIndexConfig> for IndexConfig {
2979 fn from(config: BoolInvertedIndexConfig) -> Self {
2980 IndexConfig::BoolInverted(config)
2981 }
2982}
2983
2984impl TryFrom<&InternalCollectionConfiguration> for Schema {
2985 type Error = SchemaError;
2986
2987 fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2988 let mut schema = match &config.vector_index {
2990 VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2991 VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2992 };
2993 let vector_config = match &config.vector_index {
2995 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2996 space: Some(hnsw_config.space.clone()),
2997 embedding_function: config.embedding_function.clone(),
2998 source_key: None,
2999 hnsw: Some(HnswIndexConfig {
3000 ef_construction: Some(hnsw_config.ef_construction),
3001 max_neighbors: Some(hnsw_config.max_neighbors),
3002 ef_search: Some(hnsw_config.ef_search),
3003 num_threads: Some(hnsw_config.num_threads),
3004 batch_size: Some(hnsw_config.batch_size),
3005 sync_threshold: Some(hnsw_config.sync_threshold),
3006 resize_factor: Some(hnsw_config.resize_factor),
3007 }),
3008 spann: None,
3009 },
3010 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
3011 space: Some(spann_config.space.clone()),
3012 embedding_function: config.embedding_function.clone(),
3013 source_key: None,
3014 hnsw: None,
3015 spann: Some(SpannIndexConfig {
3016 search_nprobe: Some(spann_config.search_nprobe),
3017 search_rng_factor: Some(spann_config.search_rng_factor),
3018 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
3019 nreplica_count: Some(spann_config.nreplica_count),
3020 write_rng_factor: Some(spann_config.write_rng_factor),
3021 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
3022 split_threshold: Some(spann_config.split_threshold),
3023 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
3024 initial_lambda: Some(spann_config.initial_lambda),
3025 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
3026 merge_threshold: Some(spann_config.merge_threshold),
3027 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
3028 write_nprobe: Some(spann_config.write_nprobe),
3029 ef_construction: Some(spann_config.ef_construction),
3030 ef_search: Some(spann_config.ef_search),
3031 max_neighbors: Some(spann_config.max_neighbors),
3032 center_drift_threshold: None,
3033 quantize: default_quantize(),
3034 }),
3035 },
3036 };
3037
3038 if let Some(float_list) = &mut schema.defaults.float_list {
3041 if let Some(vector_index) = &mut float_list.vector_index {
3042 vector_index.config = vector_config.clone();
3043 }
3044 }
3045
3046 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
3050 if let Some(float_list) = &mut embedding_types.float_list {
3051 if let Some(vector_index) = &mut float_list.vector_index {
3052 let mut vector_config = vector_config;
3053 vector_config.source_key = Some(DOCUMENT_KEY.to_string());
3054 vector_index.config = vector_config;
3055 }
3056 }
3057 }
3058
3059 Ok(schema)
3060 }
3061}
3062
3063#[cfg(test)]
3064mod tests {
3065 use super::*;
3066 use crate::hnsw_configuration::Space;
3067 use crate::metadata::SparseVector;
3068 use crate::{
3069 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
3070 };
3071 use serde_json::json;
3072
3073 #[test]
3074 fn test_reconcile_with_defaults_none_user_schema() {
3075 let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
3077 let expected = Schema::new_default(KnnIndex::Spann);
3078 assert_eq!(result, expected);
3079 }
3080
3081 #[test]
3082 fn test_reconcile_with_defaults_empty_user_schema() {
3083 let user_schema = Schema {
3085 defaults: ValueTypes::default(),
3086 keys: HashMap::new(),
3087 cmek: None,
3088 source_attached_function_id: None,
3089 };
3090
3091 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3092 let expected = Schema::new_default(KnnIndex::Spann);
3093 assert_eq!(result, expected);
3094 }
3095
3096 #[test]
3097 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
3098 let mut user_schema = Schema {
3100 defaults: ValueTypes::default(),
3101 keys: HashMap::new(),
3102 cmek: None,
3103 source_attached_function_id: None,
3104 };
3105
3106 user_schema.defaults.string = Some(StringValueType {
3107 string_inverted_index: Some(StringInvertedIndexType {
3108 enabled: false, config: StringInvertedIndexConfig {},
3110 }),
3111 fts_index: None,
3112 });
3113
3114 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3115
3116 assert!(
3118 !result
3119 .defaults
3120 .string
3121 .as_ref()
3122 .unwrap()
3123 .string_inverted_index
3124 .as_ref()
3125 .unwrap()
3126 .enabled
3127 );
3128 assert!(result.defaults.float.is_some());
3130 assert!(result.defaults.int.is_some());
3131 }
3132
3133 #[test]
3134 fn test_reconcile_with_defaults_user_overrides_vector_config() {
3135 let mut user_schema = Schema {
3137 defaults: ValueTypes::default(),
3138 keys: HashMap::new(),
3139 cmek: None,
3140 source_attached_function_id: None,
3141 };
3142
3143 user_schema.defaults.float_list = Some(FloatListValueType {
3144 vector_index: Some(VectorIndexType {
3145 enabled: true, config: VectorIndexConfig {
3147 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
3151 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
3155 batch_size: None,
3156 sync_threshold: None,
3157 resize_factor: None,
3158 }),
3159 spann: None,
3160 },
3161 }),
3162 });
3163
3164 let result = {
3166 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3167 let merged_defaults = Schema::merge_value_types(
3168 &default_schema.defaults,
3169 &user_schema.defaults,
3170 KnnIndex::Hnsw,
3171 )
3172 .unwrap();
3173 let mut merged_keys = default_schema.keys.clone();
3174 for (key, user_value_types) in user_schema.keys {
3175 if let Some(default_value_types) = merged_keys.get(&key) {
3176 let merged_value_types = Schema::merge_value_types(
3177 default_value_types,
3178 &user_value_types,
3179 KnnIndex::Hnsw,
3180 )
3181 .unwrap();
3182 merged_keys.insert(key, merged_value_types);
3183 } else {
3184 merged_keys.insert(key, user_value_types);
3185 }
3186 }
3187 Schema {
3188 defaults: merged_defaults,
3189 keys: merged_keys,
3190 cmek: None,
3191 source_attached_function_id: None,
3192 }
3193 };
3194
3195 let vector_config = &result
3196 .defaults
3197 .float_list
3198 .as_ref()
3199 .unwrap()
3200 .vector_index
3201 .as_ref()
3202 .unwrap()
3203 .config;
3204
3205 assert_eq!(vector_config.space, Some(Space::L2));
3207 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
3208 assert_eq!(
3209 vector_config.hnsw.as_ref().unwrap().ef_construction,
3210 Some(500)
3211 );
3212
3213 assert_eq!(vector_config.embedding_function, None);
3215 assert_eq!(
3217 vector_config.hnsw.as_ref().unwrap().max_neighbors,
3218 Some(default_m())
3219 );
3220 }
3221
3222 #[test]
3223 fn test_reconcile_with_defaults_keys() {
3224 let mut user_schema = Schema {
3226 defaults: ValueTypes::default(),
3227 keys: HashMap::new(),
3228 cmek: None,
3229 source_attached_function_id: None,
3230 };
3231
3232 let custom_key_types = ValueTypes {
3234 string: Some(StringValueType {
3235 fts_index: Some(FtsIndexType {
3236 enabled: true,
3237 config: FtsIndexConfig {},
3238 }),
3239 string_inverted_index: Some(StringInvertedIndexType {
3240 enabled: false,
3241 config: StringInvertedIndexConfig {},
3242 }),
3243 }),
3244 ..Default::default()
3245 };
3246 user_schema
3247 .keys
3248 .insert("custom_key".to_string(), custom_key_types);
3249
3250 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3251
3252 assert!(result.keys.contains_key(EMBEDDING_KEY));
3254 assert!(result.keys.contains_key(DOCUMENT_KEY));
3255
3256 assert!(result.keys.contains_key("custom_key"));
3258 let custom_override = result.keys.get("custom_key").unwrap();
3259 assert!(
3260 custom_override
3261 .string
3262 .as_ref()
3263 .unwrap()
3264 .fts_index
3265 .as_ref()
3266 .unwrap()
3267 .enabled
3268 );
3269 }
3270
3271 #[test]
3272 fn test_reconcile_with_defaults_override_existing_key() {
3273 let mut user_schema = Schema {
3275 defaults: ValueTypes::default(),
3276 keys: HashMap::new(),
3277 cmek: None,
3278 source_attached_function_id: None,
3279 };
3280
3281 let embedding_override = ValueTypes {
3283 float_list: Some(FloatListValueType {
3284 vector_index: Some(VectorIndexType {
3285 enabled: false, config: VectorIndexConfig {
3287 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3289 source_key: Some("custom_embedding_key".to_string()),
3290 hnsw: None,
3291 spann: None,
3292 },
3293 }),
3294 }),
3295 ..Default::default()
3296 };
3297 user_schema
3298 .keys
3299 .insert(EMBEDDING_KEY.to_string(), embedding_override);
3300
3301 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3302
3303 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3304 let vector_config = &embedding_config
3305 .float_list
3306 .as_ref()
3307 .unwrap()
3308 .vector_index
3309 .as_ref()
3310 .unwrap();
3311
3312 assert!(!vector_config.enabled);
3314 assert_eq!(vector_config.config.space, Some(Space::Ip));
3315 assert_eq!(
3316 vector_config.config.source_key,
3317 Some("custom_embedding_key".to_string())
3318 );
3319 }
3320
3321 #[test]
3322 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3323 let collection_config = InternalCollectionConfiguration {
3324 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3325 space: Space::Cosine,
3326 ef_construction: 128,
3327 ef_search: 96,
3328 max_neighbors: 42,
3329 num_threads: 8,
3330 resize_factor: 1.5,
3331 sync_threshold: 2_000,
3332 batch_size: 256,
3333 }),
3334 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3335 EmbeddingFunctionNewConfiguration {
3336 name: "custom".to_string(),
3337 config: json!({"alpha": 1}),
3338 },
3339 )),
3340 };
3341
3342 let schema = Schema::try_from(&collection_config).unwrap();
3343 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3344
3345 assert_eq!(reconstructed, collection_config);
3346 }
3347
3348 #[test]
3349 fn test_convert_schema_to_collection_config_spann_roundtrip() {
3350 let spann_config = InternalSpannConfiguration {
3351 space: Space::Cosine,
3352 search_nprobe: 11,
3353 search_rng_factor: 1.7,
3354 write_nprobe: 5,
3355 nreplica_count: 3,
3356 split_threshold: 150,
3357 merge_threshold: 80,
3358 ef_construction: 120,
3359 ef_search: 90,
3360 max_neighbors: 40,
3361 ..Default::default()
3362 };
3363
3364 let collection_config = InternalCollectionConfiguration {
3365 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3366 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3367 EmbeddingFunctionNewConfiguration {
3368 name: "custom".to_string(),
3369 config: json!({"beta": true}),
3370 },
3371 )),
3372 };
3373
3374 let schema = Schema::try_from(&collection_config).unwrap();
3375 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3376
3377 assert_eq!(reconstructed, collection_config);
3378 }
3379
3380 #[test]
3381 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3382 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3383 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3384 if let Some(float_list) = &mut embedding.float_list {
3385 if let Some(vector_index) = &mut float_list.vector_index {
3386 vector_index.config.spann = Some(SpannIndexConfig {
3387 search_nprobe: Some(1),
3388 search_rng_factor: Some(1.0),
3389 search_rng_epsilon: Some(0.1),
3390 nreplica_count: Some(1),
3391 write_rng_factor: Some(1.0),
3392 write_rng_epsilon: Some(0.1),
3393 split_threshold: Some(100),
3394 num_samples_kmeans: Some(10),
3395 initial_lambda: Some(0.5),
3396 reassign_neighbor_count: Some(10),
3397 merge_threshold: Some(50),
3398 num_centers_to_merge_to: Some(3),
3399 write_nprobe: Some(1),
3400 ef_construction: Some(50),
3401 ef_search: Some(40),
3402 max_neighbors: Some(20),
3403 center_drift_threshold: None,
3404 quantize: false,
3405 });
3406 }
3407 }
3408 }
3409
3410 let result = InternalCollectionConfiguration::try_from(&schema);
3411 assert!(result.is_err());
3412 }
3413
3414 #[test]
3415 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3416 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3417 let before = schema.clone();
3418 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3419 assert!(!modified);
3420 assert_eq!(schema, before);
3421 }
3422
3423 #[test]
3424 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3425 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3426 assert!(!schema.keys.contains_key("custom_field"));
3427
3428 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3429
3430 assert!(modified);
3431 let entry = schema
3432 .keys
3433 .get("custom_field")
3434 .expect("expected new key override to be inserted");
3435 assert_eq!(entry.boolean, schema.defaults.boolean);
3436 assert!(entry.string.is_none());
3437 assert!(entry.int.is_none());
3438 assert!(entry.float.is_none());
3439 assert!(entry.float_list.is_none());
3440 assert!(entry.sparse_vector.is_none());
3441 }
3442
3443 #[test]
3444 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3445 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3446 let initial_len = schema.keys.len();
3447 schema.keys.insert(
3448 "custom_field".to_string(),
3449 ValueTypes {
3450 string: schema.defaults.string.clone(),
3451 ..Default::default()
3452 },
3453 );
3454
3455 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3456
3457 assert!(modified);
3458 assert_eq!(schema.keys.len(), initial_len + 1);
3459 let entry = schema
3460 .keys
3461 .get("custom_field")
3462 .expect("expected key override to exist after ensure call");
3463 assert!(entry.string.is_some());
3464 assert_eq!(entry.boolean, schema.defaults.boolean);
3465 }
3466
3467 #[test]
3468 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3469 let schema = Schema::new_default(KnnIndex::Spann);
3470 let result = schema.is_knn_key_indexing_enabled(
3471 "custom_sparse",
3472 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3473 );
3474
3475 let err = result.expect_err("expected indexing disabled error");
3476 match err {
3477 FilterValidationError::IndexingDisabled { key, value_type } => {
3478 assert_eq!(key, "custom_sparse");
3479 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3480 }
3481 other => panic!("unexpected error variant: {other:?}"),
3482 }
3483 }
3484
3485 #[test]
3486 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3487 let mut schema = Schema::new_default(KnnIndex::Spann);
3488 schema.keys.insert(
3489 "sparse_enabled".to_string(),
3490 ValueTypes {
3491 sparse_vector: Some(SparseVectorValueType {
3492 sparse_vector_index: Some(SparseVectorIndexType {
3493 enabled: true,
3494 config: SparseVectorIndexConfig {
3495 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3496 source_key: None,
3497 bm25: None,
3498 },
3499 }),
3500 }),
3501 ..Default::default()
3502 },
3503 );
3504
3505 let result = schema.is_knn_key_indexing_enabled(
3506 "sparse_enabled",
3507 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3508 );
3509
3510 assert!(result.is_ok());
3511 }
3512
3513 #[test]
3514 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3515 let schema = Schema::new_default(KnnIndex::Spann);
3516 let result = schema.is_knn_key_indexing_enabled(
3517 EMBEDDING_KEY,
3518 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3519 );
3520
3521 assert!(result.is_ok());
3522 }
3523
3524 #[test]
3525 fn test_merge_hnsw_configs_field_level() {
3526 let default_hnsw = HnswIndexConfig {
3528 ef_construction: Some(200),
3529 max_neighbors: Some(16),
3530 ef_search: Some(10),
3531 num_threads: Some(4),
3532 batch_size: Some(100),
3533 sync_threshold: Some(1000),
3534 resize_factor: Some(1.2),
3535 };
3536
3537 let user_hnsw = HnswIndexConfig {
3538 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
3546
3547 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3548
3549 assert_eq!(result.ef_construction, Some(300));
3551 assert_eq!(result.ef_search, Some(20));
3552 assert_eq!(result.sync_threshold, Some(2000));
3553
3554 assert_eq!(result.max_neighbors, Some(16));
3556 assert_eq!(result.num_threads, Some(4));
3557 assert_eq!(result.batch_size, Some(100));
3558 assert_eq!(result.resize_factor, Some(1.2));
3559 }
3560
3561 #[test]
3562 fn test_merge_spann_configs_field_level() {
3563 let default_spann = SpannIndexConfig {
3565 search_nprobe: Some(10),
3566 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
3569 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
3573 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
3575 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
3578 ef_construction: Some(100),
3579 ef_search: Some(10),
3580 max_neighbors: Some(16),
3581 center_drift_threshold: None,
3582 quantize: false,
3583 };
3584
3585 let user_spann = SpannIndexConfig {
3586 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
3591 write_rng_epsilon: None,
3592 split_threshold: Some(150), num_samples_kmeans: None,
3594 initial_lambda: None,
3595 reassign_neighbor_count: None,
3596 merge_threshold: None,
3597 num_centers_to_merge_to: None,
3598 write_nprobe: None,
3599 ef_construction: None,
3600 ef_search: None,
3601 max_neighbors: None,
3602 center_drift_threshold: None,
3603 quantize: false,
3604 };
3605
3606 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann))
3607 .unwrap()
3608 .unwrap();
3609
3610 assert_eq!(result.search_nprobe, Some(20));
3612 assert_eq!(result.search_rng_epsilon, Some(8.0));
3613 assert_eq!(result.split_threshold, Some(150));
3614
3615 assert_eq!(result.search_rng_factor, Some(1.0));
3617 assert_eq!(result.nreplica_count, Some(3));
3618 assert_eq!(result.initial_lambda, Some(100.0));
3619 }
3620
3621 #[test]
3622 fn test_merge_spann_configs_rejects_quantize_true() {
3623 let default_spann = SpannIndexConfig {
3625 search_nprobe: Some(10),
3626 search_rng_factor: Some(1.0),
3627 search_rng_epsilon: Some(7.0),
3628 nreplica_count: Some(3),
3629 write_rng_factor: Some(1.0),
3630 write_rng_epsilon: Some(6.0),
3631 split_threshold: Some(100),
3632 num_samples_kmeans: Some(100),
3633 initial_lambda: Some(100.0),
3634 reassign_neighbor_count: Some(50),
3635 merge_threshold: Some(50),
3636 num_centers_to_merge_to: Some(4),
3637 write_nprobe: Some(5),
3638 ef_construction: Some(100),
3639 ef_search: Some(10),
3640 max_neighbors: Some(16),
3641 center_drift_threshold: None,
3642 quantize: false,
3643 };
3644
3645 let user_spann_with_quantize = SpannIndexConfig {
3646 search_nprobe: Some(20),
3647 search_rng_factor: None,
3648 search_rng_epsilon: Some(8.0),
3649 nreplica_count: None,
3650 write_rng_factor: None,
3651 write_rng_epsilon: None,
3652 split_threshold: Some(150),
3653 num_samples_kmeans: None,
3654 initial_lambda: None,
3655 reassign_neighbor_count: None,
3656 merge_threshold: None,
3657 num_centers_to_merge_to: None,
3658 write_nprobe: None,
3659 ef_construction: None,
3660 ef_search: None,
3661 max_neighbors: None,
3662 center_drift_threshold: None,
3663 quantize: true, };
3665
3666 let result =
3668 Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann_with_quantize));
3669 assert!(result.is_err());
3670 match result {
3671 Err(SchemaError::InvalidUserInput { reason }) => {
3672 assert!(reason.contains("quantize field cannot be set to true"));
3673 }
3674 _ => panic!("Expected InvalidUserInput error"),
3675 }
3676
3677 let default_spann_with_quantize = SpannIndexConfig {
3679 search_nprobe: Some(10),
3680 search_rng_factor: Some(1.0),
3681 search_rng_epsilon: Some(7.0),
3682 nreplica_count: Some(3),
3683 write_rng_factor: Some(1.0),
3684 write_rng_epsilon: Some(6.0),
3685 split_threshold: Some(100),
3686 num_samples_kmeans: Some(100),
3687 initial_lambda: Some(100.0),
3688 reassign_neighbor_count: Some(50),
3689 merge_threshold: Some(50),
3690 num_centers_to_merge_to: Some(4),
3691 write_nprobe: Some(5),
3692 ef_construction: Some(100),
3693 ef_search: Some(10),
3694 max_neighbors: Some(16),
3695 center_drift_threshold: None,
3696 quantize: true, };
3698
3699 let result = Schema::merge_spann_configs(Some(&default_spann_with_quantize), None);
3700 assert!(result.is_err());
3701 match result {
3702 Err(SchemaError::InvalidUserInput { reason }) => {
3703 assert!(reason.contains("quantize field cannot be set to true"));
3704 }
3705 _ => panic!("Expected InvalidUserInput error"),
3706 }
3707
3708 let result = Schema::merge_spann_configs(None, Some(&user_spann_with_quantize));
3710 assert!(result.is_err());
3711 match result {
3712 Err(SchemaError::InvalidUserInput { reason }) => {
3713 assert!(reason.contains("quantize field cannot be set to true"));
3714 }
3715 _ => panic!("Expected InvalidUserInput error"),
3716 }
3717 }
3718
3719 #[test]
3720 fn test_spann_index_config_into_internal_configuration() {
3721 let config = SpannIndexConfig {
3722 search_nprobe: Some(33),
3723 search_rng_factor: Some(1.2),
3724 search_rng_epsilon: None,
3725 nreplica_count: None,
3726 write_rng_factor: Some(1.5),
3727 write_rng_epsilon: None,
3728 split_threshold: Some(75),
3729 num_samples_kmeans: None,
3730 initial_lambda: Some(0.9),
3731 reassign_neighbor_count: Some(40),
3732 merge_threshold: None,
3733 num_centers_to_merge_to: Some(4),
3734 write_nprobe: Some(60),
3735 ef_construction: Some(180),
3736 ef_search: Some(170),
3737 max_neighbors: Some(32),
3738 center_drift_threshold: None,
3739 quantize: false,
3740 };
3741
3742 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3743 assert_eq!(with_space.space, Space::Cosine);
3744 assert_eq!(with_space.search_nprobe, 33);
3745 assert_eq!(with_space.search_rng_factor, 1.2);
3746 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3747 assert_eq!(with_space.write_rng_factor, 1.5);
3748 assert_eq!(with_space.write_nprobe, 60);
3749 assert_eq!(with_space.ef_construction, 180);
3750 assert_eq!(with_space.ef_search, 170);
3751 assert_eq!(with_space.max_neighbors, 32);
3752 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3753
3754 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3755 assert_eq!(default_space_config.space, default_space());
3756 }
3757
3758 #[test]
3759 fn test_merge_string_type_combinations() {
3760 let default = StringValueType {
3764 string_inverted_index: Some(StringInvertedIndexType {
3765 enabled: true,
3766 config: StringInvertedIndexConfig {},
3767 }),
3768 fts_index: Some(FtsIndexType {
3769 enabled: false,
3770 config: FtsIndexConfig {},
3771 }),
3772 };
3773
3774 let user = StringValueType {
3775 string_inverted_index: Some(StringInvertedIndexType {
3776 enabled: false, config: StringInvertedIndexConfig {},
3778 }),
3779 fts_index: None, };
3781
3782 let result = Schema::merge_string_type(Some(&default), Some(&user))
3783 .unwrap()
3784 .unwrap();
3785 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3790 .unwrap()
3791 .unwrap();
3792 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3793
3794 let result = Schema::merge_string_type(None, Some(&user))
3796 .unwrap()
3797 .unwrap();
3798 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3799
3800 let result = Schema::merge_string_type(None, None).unwrap();
3802 assert!(result.is_none());
3803 }
3804
3805 #[test]
3806 fn test_merge_vector_index_config_comprehensive() {
3807 let default_config = VectorIndexConfig {
3809 space: Some(Space::Cosine),
3810 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3811 source_key: Some("default_key".to_string()),
3812 hnsw: Some(HnswIndexConfig {
3813 ef_construction: Some(200),
3814 max_neighbors: Some(16),
3815 ef_search: Some(10),
3816 num_threads: Some(4),
3817 batch_size: Some(100),
3818 sync_threshold: Some(1000),
3819 resize_factor: Some(1.2),
3820 }),
3821 spann: None,
3822 };
3823
3824 let user_config = VectorIndexConfig {
3825 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3829 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3833 batch_size: None,
3834 sync_threshold: None,
3835 resize_factor: None,
3836 }),
3837 spann: Some(SpannIndexConfig {
3838 search_nprobe: Some(15),
3839 search_rng_factor: None,
3840 search_rng_epsilon: None,
3841 nreplica_count: None,
3842 write_rng_factor: None,
3843 write_rng_epsilon: None,
3844 split_threshold: None,
3845 num_samples_kmeans: None,
3846 initial_lambda: None,
3847 reassign_neighbor_count: None,
3848 merge_threshold: None,
3849 num_centers_to_merge_to: None,
3850 write_nprobe: None,
3851 ef_construction: None,
3852 ef_search: None,
3853 max_neighbors: None,
3854 center_drift_threshold: None,
3855 quantize: false,
3856 }), };
3858
3859 let result =
3860 Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw)
3861 .expect("merge should succeed");
3862
3863 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3866 result.embedding_function,
3867 Some(EmbeddingFunctionConfiguration::Legacy)
3868 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_none());
3877 }
3878
3879 #[test]
3880 fn test_merge_sparse_vector_index_config() {
3881 let default_config = SparseVectorIndexConfig {
3883 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3884 source_key: Some("default_sparse_key".to_string()),
3885 bm25: None,
3886 };
3887
3888 let user_config = SparseVectorIndexConfig {
3889 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
3892 };
3893
3894 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3895
3896 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3898 assert_eq!(
3900 result.embedding_function,
3901 Some(EmbeddingFunctionConfiguration::Legacy)
3902 );
3903 }
3904
3905 #[test]
3906 fn test_complex_nested_merging_scenario() {
3907 let mut user_schema = Schema {
3909 defaults: ValueTypes::default(),
3910 keys: HashMap::new(),
3911 cmek: None,
3912 source_attached_function_id: None,
3913 };
3914
3915 user_schema.defaults.string = Some(StringValueType {
3917 string_inverted_index: Some(StringInvertedIndexType {
3918 enabled: false,
3919 config: StringInvertedIndexConfig {},
3920 }),
3921 fts_index: Some(FtsIndexType {
3922 enabled: true,
3923 config: FtsIndexConfig {},
3924 }),
3925 });
3926
3927 user_schema.defaults.float_list = Some(FloatListValueType {
3928 vector_index: Some(VectorIndexType {
3929 enabled: true,
3930 config: VectorIndexConfig {
3931 space: Some(Space::Ip),
3932 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
3934 hnsw: Some(HnswIndexConfig {
3935 ef_construction: Some(400),
3936 max_neighbors: Some(32),
3937 ef_search: None, num_threads: None,
3939 batch_size: None,
3940 sync_threshold: None,
3941 resize_factor: None,
3942 }),
3943 spann: None,
3944 },
3945 }),
3946 });
3947
3948 let custom_key_override = ValueTypes {
3950 string: Some(StringValueType {
3951 fts_index: Some(FtsIndexType {
3952 enabled: true,
3953 config: FtsIndexConfig {},
3954 }),
3955 string_inverted_index: None,
3956 }),
3957 ..Default::default()
3958 };
3959 user_schema
3960 .keys
3961 .insert("custom_field".to_string(), custom_key_override);
3962
3963 let result = {
3965 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3966 let merged_defaults = Schema::merge_value_types(
3967 &default_schema.defaults,
3968 &user_schema.defaults,
3969 KnnIndex::Hnsw,
3970 )
3971 .unwrap();
3972 let mut merged_keys = default_schema.keys.clone();
3973 for (key, user_value_types) in user_schema.keys {
3974 if let Some(default_value_types) = merged_keys.get(&key) {
3975 let merged_value_types = Schema::merge_value_types(
3976 default_value_types,
3977 &user_value_types,
3978 KnnIndex::Hnsw,
3979 )
3980 .unwrap();
3981 merged_keys.insert(key, merged_value_types);
3982 } else {
3983 merged_keys.insert(key, user_value_types);
3984 }
3985 }
3986 Schema {
3987 defaults: merged_defaults,
3988 keys: merged_keys,
3989 cmek: None,
3990 source_attached_function_id: None,
3991 }
3992 };
3993
3994 assert!(
3998 !result
3999 .defaults
4000 .string
4001 .as_ref()
4002 .unwrap()
4003 .string_inverted_index
4004 .as_ref()
4005 .unwrap()
4006 .enabled
4007 );
4008 assert!(
4009 result
4010 .defaults
4011 .string
4012 .as_ref()
4013 .unwrap()
4014 .fts_index
4015 .as_ref()
4016 .unwrap()
4017 .enabled
4018 );
4019
4020 let vector_config = &result
4021 .defaults
4022 .float_list
4023 .as_ref()
4024 .unwrap()
4025 .vector_index
4026 .as_ref()
4027 .unwrap()
4028 .config;
4029 assert_eq!(vector_config.space, Some(Space::Ip));
4030 assert_eq!(vector_config.embedding_function, None); assert_eq!(
4032 vector_config.source_key,
4033 Some("custom_vector_key".to_string())
4034 );
4035 assert_eq!(
4036 vector_config.hnsw.as_ref().unwrap().ef_construction,
4037 Some(400)
4038 );
4039 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
4040 assert_eq!(
4041 vector_config.hnsw.as_ref().unwrap().ef_search,
4042 Some(default_search_ef())
4043 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
4051 assert!(
4052 custom_override
4053 .string
4054 .as_ref()
4055 .unwrap()
4056 .fts_index
4057 .as_ref()
4058 .unwrap()
4059 .enabled
4060 );
4061 assert!(custom_override
4062 .string
4063 .as_ref()
4064 .unwrap()
4065 .string_inverted_index
4066 .is_none());
4067 }
4068
4069 #[test]
4070 fn test_reconcile_with_collection_config_default_config() {
4071 let collection_config = InternalCollectionConfiguration::default_hnsw();
4073 let schema = Schema::try_from(&collection_config).unwrap();
4074
4075 let result =
4076 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4077 .unwrap();
4078 assert_eq!(result, schema);
4079 }
4080
4081 #[test]
4083 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
4084 let collection_config = InternalCollectionConfiguration::default_hnsw();
4085 let schema = Schema::new_default(KnnIndex::Hnsw);
4086 let result =
4087 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4088 .unwrap();
4089
4090 assert!(result.defaults.float_list.is_some());
4092 assert!(result
4093 .defaults
4094 .float_list
4095 .as_ref()
4096 .unwrap()
4097 .vector_index
4098 .as_ref()
4099 .unwrap()
4100 .config
4101 .hnsw
4102 .is_some());
4103 assert!(result
4104 .defaults
4105 .float_list
4106 .as_ref()
4107 .unwrap()
4108 .vector_index
4109 .as_ref()
4110 .unwrap()
4111 .config
4112 .spann
4113 .is_none());
4114 }
4115
4116 #[test]
4117 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
4118 let collection_config = InternalCollectionConfiguration::default_hnsw();
4119 let schema = Schema::new_default(KnnIndex::Hnsw);
4120 let result =
4121 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4122 .unwrap();
4123
4124 assert!(result.defaults.float_list.is_some());
4126 assert!(result
4127 .defaults
4128 .float_list
4129 .as_ref()
4130 .unwrap()
4131 .vector_index
4132 .as_ref()
4133 .unwrap()
4134 .config
4135 .spann
4136 .is_some());
4137 assert!(result
4138 .defaults
4139 .float_list
4140 .as_ref()
4141 .unwrap()
4142 .vector_index
4143 .as_ref()
4144 .unwrap()
4145 .config
4146 .hnsw
4147 .is_none());
4148 }
4149
4150 #[test]
4151 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
4152 let collection_config = InternalCollectionConfiguration::default_hnsw();
4153 let schema = Schema::new_default(KnnIndex::Spann);
4154 let result =
4155 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4156 .unwrap();
4157
4158 assert!(result.defaults.float_list.is_some());
4160 assert!(result
4161 .defaults
4162 .float_list
4163 .as_ref()
4164 .unwrap()
4165 .vector_index
4166 .as_ref()
4167 .unwrap()
4168 .config
4169 .hnsw
4170 .is_some());
4171 assert!(result
4172 .defaults
4173 .float_list
4174 .as_ref()
4175 .unwrap()
4176 .vector_index
4177 .as_ref()
4178 .unwrap()
4179 .config
4180 .spann
4181 .is_none());
4182 }
4183
4184 #[test]
4185 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
4186 let collection_config = InternalCollectionConfiguration::default_hnsw();
4187 let schema = Schema::new_default(KnnIndex::Spann);
4188 let result =
4189 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4190 .unwrap();
4191
4192 assert!(result.defaults.float_list.is_some());
4194 assert!(result
4195 .defaults
4196 .float_list
4197 .as_ref()
4198 .unwrap()
4199 .vector_index
4200 .as_ref()
4201 .unwrap()
4202 .config
4203 .spann
4204 .is_some());
4205 assert!(result
4206 .defaults
4207 .float_list
4208 .as_ref()
4209 .unwrap()
4210 .vector_index
4211 .as_ref()
4212 .unwrap()
4213 .config
4214 .hnsw
4215 .is_none());
4216 }
4217
4218 #[test]
4219 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
4220 let collection_config = InternalCollectionConfiguration::default_spann();
4221 let schema = Schema::new_default(KnnIndex::Spann);
4222 let result =
4223 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4224 .unwrap();
4225
4226 assert!(result.defaults.float_list.is_some());
4228 assert!(result
4229 .defaults
4230 .float_list
4231 .as_ref()
4232 .unwrap()
4233 .vector_index
4234 .as_ref()
4235 .unwrap()
4236 .config
4237 .hnsw
4238 .is_some());
4239 assert!(result
4240 .defaults
4241 .float_list
4242 .as_ref()
4243 .unwrap()
4244 .vector_index
4245 .as_ref()
4246 .unwrap()
4247 .config
4248 .spann
4249 .is_none());
4250 }
4251
4252 #[test]
4253 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
4254 let collection_config = InternalCollectionConfiguration::default_spann();
4255 let schema = Schema::new_default(KnnIndex::Spann);
4256 let result =
4257 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4258 .unwrap();
4259
4260 assert!(result.defaults.float_list.is_some());
4262 assert!(result
4263 .defaults
4264 .float_list
4265 .as_ref()
4266 .unwrap()
4267 .vector_index
4268 .as_ref()
4269 .unwrap()
4270 .config
4271 .spann
4272 .is_some());
4273 assert!(result
4274 .defaults
4275 .float_list
4276 .as_ref()
4277 .unwrap()
4278 .vector_index
4279 .as_ref()
4280 .unwrap()
4281 .config
4282 .hnsw
4283 .is_none());
4284 assert_eq!(
4286 result
4287 .defaults
4288 .float_list
4289 .as_ref()
4290 .unwrap()
4291 .vector_index
4292 .as_ref()
4293 .unwrap()
4294 .config
4295 .source_key,
4296 None
4297 );
4298 }
4299
4300 #[test]
4301 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
4302 let collection_config = InternalCollectionConfiguration::default_spann();
4303 let schema = Schema::new_default(KnnIndex::Hnsw);
4304 let result =
4305 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4306 .unwrap();
4307
4308 assert!(result.defaults.float_list.is_some());
4310 assert!(result
4311 .defaults
4312 .float_list
4313 .as_ref()
4314 .unwrap()
4315 .vector_index
4316 .as_ref()
4317 .unwrap()
4318 .config
4319 .hnsw
4320 .is_some());
4321 assert!(result
4322 .defaults
4323 .float_list
4324 .as_ref()
4325 .unwrap()
4326 .vector_index
4327 .as_ref()
4328 .unwrap()
4329 .config
4330 .spann
4331 .is_none());
4332 }
4333
4334 #[test]
4335 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
4336 let collection_config = InternalCollectionConfiguration::default_spann();
4337 let schema = Schema::new_default(KnnIndex::Hnsw);
4338 let result =
4339 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4340 .unwrap();
4341
4342 assert!(result.defaults.float_list.is_some());
4344 assert!(result
4345 .defaults
4346 .float_list
4347 .as_ref()
4348 .unwrap()
4349 .vector_index
4350 .as_ref()
4351 .unwrap()
4352 .config
4353 .spann
4354 .is_some());
4355 assert!(result
4356 .defaults
4357 .float_list
4358 .as_ref()
4359 .unwrap()
4360 .vector_index
4361 .as_ref()
4362 .unwrap()
4363 .config
4364 .hnsw
4365 .is_none());
4366 }
4367
4368 #[test]
4369 fn test_defaults_source_key_not_document() {
4370 let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4372 let schema_spann = Schema::new_default(KnnIndex::Spann);
4373
4374 let defaults_hnsw = schema_hnsw
4376 .defaults
4377 .float_list
4378 .as_ref()
4379 .unwrap()
4380 .vector_index
4381 .as_ref()
4382 .unwrap();
4383 assert_eq!(defaults_hnsw.config.source_key, None);
4384
4385 let defaults_spann = schema_spann
4387 .defaults
4388 .float_list
4389 .as_ref()
4390 .unwrap()
4391 .vector_index
4392 .as_ref()
4393 .unwrap();
4394 assert_eq!(defaults_spann.config.source_key, None);
4395
4396 let collection_config_hnsw = InternalCollectionConfiguration {
4399 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4400 ef_construction: 300,
4401 max_neighbors: 32,
4402 ef_search: 50,
4403 num_threads: 8,
4404 batch_size: 200,
4405 sync_threshold: 2000,
4406 resize_factor: 1.5,
4407 space: Space::L2,
4408 }),
4409 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4410 };
4411 let result_hnsw = Schema::reconcile_with_collection_config(
4412 &schema_hnsw,
4413 &collection_config_hnsw,
4414 KnnIndex::Hnsw,
4415 )
4416 .unwrap();
4417 let reconciled_defaults_hnsw = result_hnsw
4418 .defaults
4419 .float_list
4420 .as_ref()
4421 .unwrap()
4422 .vector_index
4423 .as_ref()
4424 .unwrap();
4425 assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4426
4427 let collection_config_spann = InternalCollectionConfiguration {
4428 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4429 search_nprobe: 20,
4430 search_rng_factor: 3.0,
4431 search_rng_epsilon: 0.2,
4432 nreplica_count: 5,
4433 write_rng_factor: 2.0,
4434 write_rng_epsilon: 0.1,
4435 split_threshold: 2000,
4436 num_samples_kmeans: 200,
4437 initial_lambda: 0.8,
4438 reassign_neighbor_count: 100,
4439 merge_threshold: 800,
4440 num_centers_to_merge_to: 20,
4441 write_nprobe: 10,
4442 ef_construction: 400,
4443 ef_search: 60,
4444 max_neighbors: 24,
4445 space: Space::Cosine,
4446 }),
4447 embedding_function: None,
4448 };
4449 let result_spann = Schema::reconcile_with_collection_config(
4450 &schema_spann,
4451 &collection_config_spann,
4452 KnnIndex::Spann,
4453 )
4454 .unwrap();
4455 let reconciled_defaults_spann = result_spann
4456 .defaults
4457 .float_list
4458 .as_ref()
4459 .unwrap()
4460 .vector_index
4461 .as_ref()
4462 .unwrap();
4463 assert_eq!(reconciled_defaults_spann.config.source_key, None);
4464
4465 let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4467 let embedding_vector_index_hnsw = embedding_hnsw
4468 .float_list
4469 .as_ref()
4470 .unwrap()
4471 .vector_index
4472 .as_ref()
4473 .unwrap();
4474 assert_eq!(
4475 embedding_vector_index_hnsw.config.source_key,
4476 Some(DOCUMENT_KEY.to_string())
4477 );
4478
4479 let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4480 let embedding_vector_index_spann = embedding_spann
4481 .float_list
4482 .as_ref()
4483 .unwrap()
4484 .vector_index
4485 .as_ref()
4486 .unwrap();
4487 assert_eq!(
4488 embedding_vector_index_spann.config.source_key,
4489 Some(DOCUMENT_KEY.to_string())
4490 );
4491 }
4492
4493 #[test]
4494 fn test_try_from_source_key() {
4495 let collection_config_hnsw = InternalCollectionConfiguration {
4500 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4501 ef_construction: 300,
4502 max_neighbors: 32,
4503 ef_search: 50,
4504 num_threads: 8,
4505 batch_size: 200,
4506 sync_threshold: 2000,
4507 resize_factor: 1.5,
4508 space: Space::L2,
4509 }),
4510 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4511 };
4512 let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4513
4514 let defaults_hnsw = schema_hnsw
4516 .defaults
4517 .float_list
4518 .as_ref()
4519 .unwrap()
4520 .vector_index
4521 .as_ref()
4522 .unwrap();
4523 assert_eq!(defaults_hnsw.config.source_key, None);
4524
4525 let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4527 let embedding_vector_index_hnsw = embedding_hnsw
4528 .float_list
4529 .as_ref()
4530 .unwrap()
4531 .vector_index
4532 .as_ref()
4533 .unwrap();
4534 assert_eq!(
4535 embedding_vector_index_hnsw.config.source_key,
4536 Some(DOCUMENT_KEY.to_string())
4537 );
4538
4539 let collection_config_spann = InternalCollectionConfiguration {
4541 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4542 search_nprobe: 20,
4543 search_rng_factor: 3.0,
4544 search_rng_epsilon: 0.2,
4545 nreplica_count: 5,
4546 write_rng_factor: 2.0,
4547 write_rng_epsilon: 0.1,
4548 split_threshold: 2000,
4549 num_samples_kmeans: 200,
4550 initial_lambda: 0.8,
4551 reassign_neighbor_count: 100,
4552 merge_threshold: 800,
4553 num_centers_to_merge_to: 20,
4554 write_nprobe: 10,
4555 ef_construction: 400,
4556 ef_search: 60,
4557 max_neighbors: 24,
4558 space: Space::Cosine,
4559 }),
4560 embedding_function: None,
4561 };
4562 let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4563
4564 let defaults_spann = schema_spann
4566 .defaults
4567 .float_list
4568 .as_ref()
4569 .unwrap()
4570 .vector_index
4571 .as_ref()
4572 .unwrap();
4573 assert_eq!(defaults_spann.config.source_key, None);
4574
4575 let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4577 let embedding_vector_index_spann = embedding_spann
4578 .float_list
4579 .as_ref()
4580 .unwrap()
4581 .vector_index
4582 .as_ref()
4583 .unwrap();
4584 assert_eq!(
4585 embedding_vector_index_spann.config.source_key,
4586 Some(DOCUMENT_KEY.to_string())
4587 );
4588 }
4589
4590 #[test]
4591 fn test_default_hnsw_with_default_embedding_function() {
4592 use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4596
4597 let collection_config = InternalCollectionConfiguration {
4598 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4599 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4600 EmbeddingFunctionNewConfiguration {
4601 name: "default".to_string(),
4602 config: serde_json::json!({}),
4603 },
4604 )),
4605 };
4606
4607 assert!(collection_config.is_default());
4609
4610 let schema = Schema::new_default(KnnIndex::Hnsw);
4611 let result =
4612 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4613 .unwrap();
4614
4615 let defaults = result
4617 .defaults
4618 .float_list
4619 .as_ref()
4620 .unwrap()
4621 .vector_index
4622 .as_ref()
4623 .unwrap();
4624 assert_eq!(defaults.config.source_key, None);
4625
4626 let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4628 let embedding_vector_index = embedding
4629 .float_list
4630 .as_ref()
4631 .unwrap()
4632 .vector_index
4633 .as_ref()
4634 .unwrap();
4635 assert_eq!(
4636 embedding_vector_index.config.source_key,
4637 Some(DOCUMENT_KEY.to_string())
4638 );
4639
4640 let vector_index_config = defaults.config.clone();
4642 assert!(vector_index_config.spann.is_some());
4643 assert!(vector_index_config.hnsw.is_none());
4644
4645 assert_eq!(
4647 embedding_vector_index.config.embedding_function,
4648 Some(EmbeddingFunctionConfiguration::Known(
4649 EmbeddingFunctionNewConfiguration {
4650 name: "default".to_string(),
4651 config: serde_json::json!({}),
4652 },
4653 ))
4654 );
4655 assert_eq!(
4656 defaults.config.embedding_function,
4657 Some(EmbeddingFunctionConfiguration::Known(
4658 EmbeddingFunctionNewConfiguration {
4659 name: "default".to_string(),
4660 config: serde_json::json!({}),
4661 },
4662 ))
4663 );
4664 }
4665
4666 #[test]
4667 fn test_reconcile_with_collection_config_both_non_default() {
4668 let mut schema = Schema::new_default(KnnIndex::Hnsw);
4670 schema.defaults.string = Some(StringValueType {
4671 fts_index: Some(FtsIndexType {
4672 enabled: true,
4673 config: FtsIndexConfig {},
4674 }),
4675 string_inverted_index: None,
4676 });
4677
4678 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4679 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4681 {
4682 hnsw_config.ef_construction = 500; }
4684
4685 let result = Schema::reconcile_schema_and_config(
4687 Some(&schema),
4688 Some(&collection_config),
4689 KnnIndex::Spann,
4690 );
4691 assert!(result.is_err());
4692 assert!(matches!(
4693 result.unwrap_err(),
4694 SchemaError::ConfigAndSchemaConflict
4695 ));
4696 }
4697
4698 #[test]
4699 fn test_reconcile_with_collection_config_hnsw_override() {
4700 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
4704 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4705 ef_construction: 300,
4706 max_neighbors: 32,
4707 ef_search: 50,
4708 num_threads: 8,
4709 batch_size: 200,
4710 sync_threshold: 2000,
4711 resize_factor: 1.5,
4712 space: Space::L2,
4713 }),
4714 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4715 };
4716
4717 let result =
4718 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4719 .unwrap();
4720
4721 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4723 let vector_index = embedding_override
4724 .float_list
4725 .as_ref()
4726 .unwrap()
4727 .vector_index
4728 .as_ref()
4729 .unwrap();
4730
4731 assert!(vector_index.enabled);
4732 assert_eq!(vector_index.config.space, Some(Space::L2));
4733 assert_eq!(
4734 vector_index.config.embedding_function,
4735 Some(EmbeddingFunctionConfiguration::Legacy)
4736 );
4737 assert_eq!(
4738 vector_index.config.source_key,
4739 Some(DOCUMENT_KEY.to_string())
4740 );
4741
4742 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4743 assert_eq!(hnsw_config.ef_construction, Some(300));
4744 assert_eq!(hnsw_config.max_neighbors, Some(32));
4745 assert_eq!(hnsw_config.ef_search, Some(50));
4746 assert_eq!(hnsw_config.num_threads, Some(8));
4747 assert_eq!(hnsw_config.batch_size, Some(200));
4748 assert_eq!(hnsw_config.sync_threshold, Some(2000));
4749 assert_eq!(hnsw_config.resize_factor, Some(1.5));
4750
4751 assert!(vector_index.config.spann.is_none());
4752 }
4753
4754 #[test]
4755 fn test_reconcile_with_collection_config_spann_override() {
4756 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
4760 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4761 search_nprobe: 20,
4762 search_rng_factor: 3.0,
4763 search_rng_epsilon: 0.2,
4764 nreplica_count: 5,
4765 write_rng_factor: 2.0,
4766 write_rng_epsilon: 0.1,
4767 split_threshold: 2000,
4768 num_samples_kmeans: 200,
4769 initial_lambda: 0.8,
4770 reassign_neighbor_count: 100,
4771 merge_threshold: 800,
4772 num_centers_to_merge_to: 20,
4773 write_nprobe: 10,
4774 ef_construction: 400,
4775 ef_search: 60,
4776 max_neighbors: 24,
4777 space: Space::Cosine,
4778 }),
4779 embedding_function: None,
4780 };
4781
4782 let result =
4783 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4784 .unwrap();
4785
4786 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4788 let vector_index = embedding_override
4789 .float_list
4790 .as_ref()
4791 .unwrap()
4792 .vector_index
4793 .as_ref()
4794 .unwrap();
4795
4796 assert!(vector_index.enabled);
4797 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4798 assert_eq!(vector_index.config.embedding_function, None);
4799 assert_eq!(
4800 vector_index.config.source_key,
4801 Some(DOCUMENT_KEY.to_string())
4802 );
4803
4804 assert!(vector_index.config.hnsw.is_none());
4805
4806 let spann_config = vector_index.config.spann.as_ref().unwrap();
4807 assert_eq!(spann_config.search_nprobe, Some(20));
4808 assert_eq!(spann_config.search_rng_factor, Some(3.0));
4809 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4810 assert_eq!(spann_config.nreplica_count, Some(5));
4811 assert_eq!(spann_config.write_rng_factor, Some(2.0));
4812 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4813 assert_eq!(spann_config.split_threshold, Some(2000));
4814 assert_eq!(spann_config.num_samples_kmeans, Some(200));
4815 assert_eq!(spann_config.initial_lambda, Some(0.8));
4816 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4817 assert_eq!(spann_config.merge_threshold, Some(800));
4818 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4819 assert_eq!(spann_config.write_nprobe, Some(10));
4820 assert_eq!(spann_config.ef_construction, Some(400));
4821 assert_eq!(spann_config.ef_search, Some(60));
4822 assert_eq!(spann_config.max_neighbors, Some(24));
4823 }
4824
4825 #[test]
4826 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4827 let schema = Schema::new_default(KnnIndex::Hnsw);
4830
4831 let collection_config = InternalCollectionConfiguration {
4832 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4833 ef_construction: 300,
4834 max_neighbors: 32,
4835 ef_search: 50,
4836 num_threads: 8,
4837 batch_size: 200,
4838 sync_threshold: 2000,
4839 resize_factor: 1.5,
4840 space: Space::L2,
4841 }),
4842 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4843 };
4844
4845 let result =
4846 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4847 .unwrap();
4848
4849 let defaults_vector_index = result
4851 .defaults
4852 .float_list
4853 .as_ref()
4854 .unwrap()
4855 .vector_index
4856 .as_ref()
4857 .unwrap();
4858
4859 assert!(!defaults_vector_index.enabled);
4861 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4863 assert_eq!(
4864 defaults_vector_index.config.embedding_function,
4865 Some(EmbeddingFunctionConfiguration::Legacy)
4866 );
4867 assert_eq!(defaults_vector_index.config.source_key, None);
4868 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4869 assert_eq!(defaults_hnsw.ef_construction, Some(300));
4870 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4871
4872 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4874 let embedding_vector_index = embedding_override
4875 .float_list
4876 .as_ref()
4877 .unwrap()
4878 .vector_index
4879 .as_ref()
4880 .unwrap();
4881
4882 assert!(embedding_vector_index.enabled);
4884 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4886 assert_eq!(
4887 embedding_vector_index.config.embedding_function,
4888 Some(EmbeddingFunctionConfiguration::Legacy)
4889 );
4890 assert_eq!(
4891 embedding_vector_index.config.source_key,
4892 Some(DOCUMENT_KEY.to_string())
4893 );
4894 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4895 assert_eq!(embedding_hnsw.ef_construction, Some(300));
4896 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4897 }
4898
4899 #[test]
4900 fn test_is_schema_default() {
4901 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4903 assert!(default_hnsw_schema.is_default());
4904
4905 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4906 assert!(default_spann_schema.is_default());
4907
4908 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4910 if let Some(ref mut string_type) = modified_schema.defaults.string {
4912 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4913 string_inverted.enabled = false; }
4915 }
4916 assert!(!modified_schema.is_default());
4917
4918 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4920 schema_with_extra_overrides
4921 .keys
4922 .insert("custom_key".to_string(), ValueTypes::default());
4923 assert!(!schema_with_extra_overrides.is_default());
4924 }
4925
4926 #[test]
4927 fn test_is_schema_default_with_space() {
4928 let schema = Schema::new_default(KnnIndex::Hnsw);
4929 assert!(schema.is_default());
4930
4931 let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4932 if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4933 if let Some(ref mut vector_index) = float_list.vector_index {
4934 vector_index.config.space = Some(Space::Cosine);
4935 }
4936 }
4937 assert!(!schema_with_space.is_default());
4938
4939 let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4940 if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4941 .keys
4942 .get_mut(EMBEDDING_KEY)
4943 {
4944 if let Some(ref mut float_list) = embedding_key.float_list {
4945 if let Some(ref mut vector_index) = float_list.vector_index {
4946 vector_index.config.space = Some(Space::Cosine);
4947 }
4948 }
4949 }
4950 assert!(!schema_with_space_in_embedding_key.is_default());
4951 }
4952
4953 #[test]
4954 fn test_is_schema_default_with_embedding_function() {
4955 let schema = Schema::new_default(KnnIndex::Hnsw);
4956 assert!(schema.is_default());
4957
4958 let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4959 if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4960 if let Some(ref mut vector_index) = float_list.vector_index {
4961 vector_index.config.embedding_function =
4962 Some(EmbeddingFunctionConfiguration::Legacy);
4963 }
4964 }
4965 assert!(!schema_with_embedding_function.is_default());
4966
4967 let mut schema_with_embedding_function_in_embedding_key =
4968 Schema::new_default(KnnIndex::Spann);
4969 if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4970 .keys
4971 .get_mut(EMBEDDING_KEY)
4972 {
4973 if let Some(ref mut float_list) = embedding_key.float_list {
4974 if let Some(ref mut vector_index) = float_list.vector_index {
4975 vector_index.config.embedding_function =
4976 Some(EmbeddingFunctionConfiguration::Legacy);
4977 }
4978 }
4979 }
4980 assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4981 }
4982
4983 #[test]
4984 fn test_add_merges_keys_by_value_type() {
4985 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4986 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4987
4988 let string_override = ValueTypes {
4989 string: Some(StringValueType {
4990 string_inverted_index: Some(StringInvertedIndexType {
4991 enabled: true,
4992 config: StringInvertedIndexConfig {},
4993 }),
4994 fts_index: None,
4995 }),
4996 ..Default::default()
4997 };
4998 schema_a
4999 .keys
5000 .insert("custom_field".to_string(), string_override);
5001
5002 let float_override = ValueTypes {
5003 float: Some(FloatValueType {
5004 float_inverted_index: Some(FloatInvertedIndexType {
5005 enabled: true,
5006 config: FloatInvertedIndexConfig {},
5007 }),
5008 }),
5009 ..Default::default()
5010 };
5011 schema_b
5012 .keys
5013 .insert("custom_field".to_string(), float_override);
5014
5015 let merged = schema_a.merge(&schema_b).unwrap();
5016 let merged_override = merged.keys.get("custom_field").unwrap();
5017
5018 assert!(merged_override.string.is_some());
5019 assert!(merged_override.float.is_some());
5020 assert!(
5021 merged_override
5022 .string
5023 .as_ref()
5024 .unwrap()
5025 .string_inverted_index
5026 .as_ref()
5027 .unwrap()
5028 .enabled
5029 );
5030 assert!(
5031 merged_override
5032 .float
5033 .as_ref()
5034 .unwrap()
5035 .float_inverted_index
5036 .as_ref()
5037 .unwrap()
5038 .enabled
5039 );
5040 }
5041
5042 #[test]
5043 fn test_add_rejects_different_defaults() {
5044 let schema_a = Schema::new_default(KnnIndex::Hnsw);
5045 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5046
5047 if let Some(string_type) = schema_b.defaults.string.as_mut() {
5048 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
5049 string_index.enabled = false;
5050 }
5051 }
5052
5053 let err = schema_a.merge(&schema_b).unwrap_err();
5054 assert!(matches!(err, SchemaError::DefaultsMismatch));
5055 }
5056
5057 #[test]
5058 fn test_add_detects_conflicting_value_type_configuration() {
5059 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
5060 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5061
5062 let string_override_enabled = ValueTypes {
5063 string: Some(StringValueType {
5064 string_inverted_index: Some(StringInvertedIndexType {
5065 enabled: true,
5066 config: StringInvertedIndexConfig {},
5067 }),
5068 fts_index: None,
5069 }),
5070 ..Default::default()
5071 };
5072 schema_a
5073 .keys
5074 .insert("custom_field".to_string(), string_override_enabled);
5075
5076 let string_override_disabled = ValueTypes {
5077 string: Some(StringValueType {
5078 string_inverted_index: Some(StringInvertedIndexType {
5079 enabled: false,
5080 config: StringInvertedIndexConfig {},
5081 }),
5082 fts_index: None,
5083 }),
5084 ..Default::default()
5085 };
5086 schema_b
5087 .keys
5088 .insert("custom_field".to_string(), string_override_disabled);
5089
5090 let err = schema_a.merge(&schema_b).unwrap_err();
5091 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
5092 }
5093
5094 #[test]
5096 fn test_backward_compatibility_aliases() {
5097 let old_format_json = r###"{
5099 "defaults": {
5100 "#string": {
5101 "$fts_index": {
5102 "enabled": true,
5103 "config": {}
5104 }
5105 },
5106 "#int": {
5107 "$int_inverted_index": {
5108 "enabled": true,
5109 "config": {}
5110 }
5111 },
5112 "#float_list": {
5113 "$vector_index": {
5114 "enabled": true,
5115 "config": {
5116 "spann": {
5117 "search_nprobe": 10
5118 }
5119 }
5120 }
5121 }
5122 },
5123 "key_overrides": {
5124 "#document": {
5125 "#string": {
5126 "$fts_index": {
5127 "enabled": false,
5128 "config": {}
5129 }
5130 }
5131 }
5132 }
5133 }"###;
5134
5135 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
5136
5137 let new_format_json = r###"{
5139 "defaults": {
5140 "string": {
5141 "fts_index": {
5142 "enabled": true,
5143 "config": {}
5144 }
5145 },
5146 "int": {
5147 "int_inverted_index": {
5148 "enabled": true,
5149 "config": {}
5150 }
5151 },
5152 "float_list": {
5153 "vector_index": {
5154 "enabled": true,
5155 "config": {
5156 "spann": {
5157 "search_nprobe": 10
5158 }
5159 }
5160 }
5161 }
5162 },
5163 "keys": {
5164 "#document": {
5165 "string": {
5166 "fts_index": {
5167 "enabled": false,
5168 "config": {}
5169 }
5170 }
5171 }
5172 }
5173 }"###;
5174
5175 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
5176
5177 assert_eq!(schema_from_old, schema_from_new);
5179
5180 assert!(schema_from_old.defaults.string.is_some());
5182 assert!(schema_from_old
5183 .defaults
5184 .string
5185 .as_ref()
5186 .unwrap()
5187 .fts_index
5188 .is_some());
5189 assert!(
5190 schema_from_old
5191 .defaults
5192 .string
5193 .as_ref()
5194 .unwrap()
5195 .fts_index
5196 .as_ref()
5197 .unwrap()
5198 .enabled
5199 );
5200
5201 assert!(schema_from_old.defaults.int.is_some());
5202 assert!(schema_from_old
5203 .defaults
5204 .int
5205 .as_ref()
5206 .unwrap()
5207 .int_inverted_index
5208 .is_some());
5209
5210 assert!(schema_from_old.defaults.float_list.is_some());
5211 assert!(schema_from_old
5212 .defaults
5213 .float_list
5214 .as_ref()
5215 .unwrap()
5216 .vector_index
5217 .is_some());
5218
5219 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
5220 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
5221 assert!(doc_override.string.is_some());
5222 assert!(
5223 !doc_override
5224 .string
5225 .as_ref()
5226 .unwrap()
5227 .fts_index
5228 .as_ref()
5229 .unwrap()
5230 .enabled
5231 );
5232
5233 let serialized = serde_json::to_string(&schema_from_old).unwrap();
5235
5236 assert!(serialized.contains(r#""keys":"#));
5238 assert!(serialized.contains(r#""string":"#));
5239 assert!(serialized.contains(r#""fts_index":"#));
5240 assert!(serialized.contains(r#""int_inverted_index":"#));
5241 assert!(serialized.contains(r#""vector_index":"#));
5242
5243 assert!(!serialized.contains(r#""key_overrides":"#));
5245 assert!(!serialized.contains(r###""#string":"###));
5246 assert!(!serialized.contains(r###""$fts_index":"###));
5247 assert!(!serialized.contains(r###""$int_inverted_index":"###));
5248 assert!(!serialized.contains(r###""$vector_index":"###));
5249 }
5250
5251 #[test]
5252 fn test_hnsw_index_config_validation() {
5253 use validator::Validate;
5254
5255 let valid_config = HnswIndexConfig {
5257 batch_size: Some(10),
5258 sync_threshold: Some(100),
5259 ef_construction: Some(100),
5260 max_neighbors: Some(16),
5261 ..Default::default()
5262 };
5263 assert!(valid_config.validate().is_ok());
5264
5265 let invalid_batch_size = HnswIndexConfig {
5267 batch_size: Some(1),
5268 ..Default::default()
5269 };
5270 assert!(invalid_batch_size.validate().is_err());
5271
5272 let invalid_sync_threshold = HnswIndexConfig {
5274 sync_threshold: Some(1),
5275 ..Default::default()
5276 };
5277 assert!(invalid_sync_threshold.validate().is_err());
5278
5279 let boundary_config = HnswIndexConfig {
5281 batch_size: Some(2),
5282 sync_threshold: Some(2),
5283 ..Default::default()
5284 };
5285 assert!(boundary_config.validate().is_ok());
5286
5287 let all_none_config = HnswIndexConfig {
5289 ..Default::default()
5290 };
5291 assert!(all_none_config.validate().is_ok());
5292
5293 let other_fields_config = HnswIndexConfig {
5295 ef_construction: Some(1),
5296 max_neighbors: Some(1),
5297 ef_search: Some(1),
5298 num_threads: Some(1),
5299 resize_factor: Some(0.1),
5300 ..Default::default()
5301 };
5302 assert!(other_fields_config.validate().is_ok());
5303 }
5304
5305 #[test]
5306 fn test_spann_index_config_validation() {
5307 use validator::Validate;
5308
5309 let valid_config = SpannIndexConfig {
5311 write_nprobe: Some(32),
5312 nreplica_count: Some(4),
5313 split_threshold: Some(100),
5314 merge_threshold: Some(50),
5315 reassign_neighbor_count: Some(32),
5316 num_centers_to_merge_to: Some(4),
5317 ef_construction: Some(100),
5318 ef_search: Some(100),
5319 max_neighbors: Some(32),
5320 search_rng_factor: Some(1.0),
5321 write_rng_factor: Some(1.0),
5322 search_rng_epsilon: Some(7.5),
5323 write_rng_epsilon: Some(7.5),
5324 ..Default::default()
5325 };
5326 assert!(valid_config.validate().is_ok());
5327
5328 let invalid_write_nprobe = SpannIndexConfig {
5330 write_nprobe: Some(200),
5331 ..Default::default()
5332 };
5333 assert!(invalid_write_nprobe.validate().is_err());
5334
5335 let invalid_split_threshold = SpannIndexConfig {
5337 split_threshold: Some(10),
5338 ..Default::default()
5339 };
5340 assert!(invalid_split_threshold.validate().is_err());
5341
5342 let invalid_split_threshold_high = SpannIndexConfig {
5344 split_threshold: Some(250),
5345 ..Default::default()
5346 };
5347 assert!(invalid_split_threshold_high.validate().is_err());
5348
5349 let invalid_nreplica = SpannIndexConfig {
5351 nreplica_count: Some(10),
5352 ..Default::default()
5353 };
5354 assert!(invalid_nreplica.validate().is_err());
5355
5356 let invalid_reassign = SpannIndexConfig {
5358 reassign_neighbor_count: Some(100),
5359 ..Default::default()
5360 };
5361 assert!(invalid_reassign.validate().is_err());
5362
5363 let invalid_merge_threshold_low = SpannIndexConfig {
5365 merge_threshold: Some(5),
5366 ..Default::default()
5367 };
5368 assert!(invalid_merge_threshold_low.validate().is_err());
5369
5370 let invalid_merge_threshold_high = SpannIndexConfig {
5371 merge_threshold: Some(150),
5372 ..Default::default()
5373 };
5374 assert!(invalid_merge_threshold_high.validate().is_err());
5375
5376 let invalid_num_centers = SpannIndexConfig {
5378 num_centers_to_merge_to: Some(10),
5379 ..Default::default()
5380 };
5381 assert!(invalid_num_centers.validate().is_err());
5382
5383 let invalid_ef_construction = SpannIndexConfig {
5385 ef_construction: Some(300),
5386 ..Default::default()
5387 };
5388 assert!(invalid_ef_construction.validate().is_err());
5389
5390 let invalid_ef_search = SpannIndexConfig {
5392 ef_search: Some(300),
5393 ..Default::default()
5394 };
5395 assert!(invalid_ef_search.validate().is_err());
5396
5397 let invalid_max_neighbors = SpannIndexConfig {
5399 max_neighbors: Some(100),
5400 ..Default::default()
5401 };
5402 assert!(invalid_max_neighbors.validate().is_err());
5403
5404 let invalid_search_nprobe = SpannIndexConfig {
5406 search_nprobe: Some(200),
5407 ..Default::default()
5408 };
5409 assert!(invalid_search_nprobe.validate().is_err());
5410
5411 let invalid_search_rng_factor_low = SpannIndexConfig {
5413 search_rng_factor: Some(0.9),
5414 ..Default::default()
5415 };
5416 assert!(invalid_search_rng_factor_low.validate().is_err());
5417
5418 let invalid_search_rng_factor_high = SpannIndexConfig {
5419 search_rng_factor: Some(1.1),
5420 ..Default::default()
5421 };
5422 assert!(invalid_search_rng_factor_high.validate().is_err());
5423
5424 let valid_search_rng_factor = SpannIndexConfig {
5426 search_rng_factor: Some(1.0),
5427 ..Default::default()
5428 };
5429 assert!(valid_search_rng_factor.validate().is_ok());
5430
5431 let invalid_search_rng_epsilon_low = SpannIndexConfig {
5433 search_rng_epsilon: Some(4.0),
5434 ..Default::default()
5435 };
5436 assert!(invalid_search_rng_epsilon_low.validate().is_err());
5437
5438 let invalid_search_rng_epsilon_high = SpannIndexConfig {
5439 search_rng_epsilon: Some(11.0),
5440 ..Default::default()
5441 };
5442 assert!(invalid_search_rng_epsilon_high.validate().is_err());
5443
5444 let valid_search_rng_epsilon = SpannIndexConfig {
5446 search_rng_epsilon: Some(7.5),
5447 ..Default::default()
5448 };
5449 assert!(valid_search_rng_epsilon.validate().is_ok());
5450
5451 let invalid_write_rng_factor_low = SpannIndexConfig {
5453 write_rng_factor: Some(0.9),
5454 ..Default::default()
5455 };
5456 assert!(invalid_write_rng_factor_low.validate().is_err());
5457
5458 let invalid_write_rng_factor_high = SpannIndexConfig {
5459 write_rng_factor: Some(1.1),
5460 ..Default::default()
5461 };
5462 assert!(invalid_write_rng_factor_high.validate().is_err());
5463
5464 let valid_write_rng_factor = SpannIndexConfig {
5466 write_rng_factor: Some(1.0),
5467 ..Default::default()
5468 };
5469 assert!(valid_write_rng_factor.validate().is_ok());
5470
5471 let invalid_write_rng_epsilon_low = SpannIndexConfig {
5473 write_rng_epsilon: Some(4.0),
5474 ..Default::default()
5475 };
5476 assert!(invalid_write_rng_epsilon_low.validate().is_err());
5477
5478 let invalid_write_rng_epsilon_high = SpannIndexConfig {
5479 write_rng_epsilon: Some(11.0),
5480 ..Default::default()
5481 };
5482 assert!(invalid_write_rng_epsilon_high.validate().is_err());
5483
5484 let valid_write_rng_epsilon = SpannIndexConfig {
5486 write_rng_epsilon: Some(7.5),
5487 ..Default::default()
5488 };
5489 assert!(valid_write_rng_epsilon.validate().is_ok());
5490
5491 let invalid_num_samples_kmeans = SpannIndexConfig {
5493 num_samples_kmeans: Some(1500),
5494 ..Default::default()
5495 };
5496 assert!(invalid_num_samples_kmeans.validate().is_err());
5497
5498 let valid_num_samples_kmeans = SpannIndexConfig {
5500 num_samples_kmeans: Some(500),
5501 ..Default::default()
5502 };
5503 assert!(valid_num_samples_kmeans.validate().is_ok());
5504
5505 let invalid_initial_lambda_high = SpannIndexConfig {
5507 initial_lambda: Some(150.0),
5508 ..Default::default()
5509 };
5510 assert!(invalid_initial_lambda_high.validate().is_err());
5511
5512 let invalid_initial_lambda_low = SpannIndexConfig {
5513 initial_lambda: Some(50.0),
5514 ..Default::default()
5515 };
5516 assert!(invalid_initial_lambda_low.validate().is_err());
5517
5518 let valid_initial_lambda = SpannIndexConfig {
5520 initial_lambda: Some(100.0),
5521 ..Default::default()
5522 };
5523 assert!(valid_initial_lambda.validate().is_ok());
5524
5525 let all_none_config = SpannIndexConfig {
5527 ..Default::default()
5528 };
5529 assert!(all_none_config.validate().is_ok());
5530 }
5531
5532 #[test]
5533 fn test_builder_pattern_crud_workflow() {
5534 let schema = Schema::new_default(KnnIndex::Hnsw)
5538 .create_index(
5539 None,
5540 IndexConfig::Vector(VectorIndexConfig {
5541 space: Some(Space::Cosine),
5542 embedding_function: None,
5543 source_key: None,
5544 hnsw: Some(HnswIndexConfig {
5545 ef_construction: Some(200),
5546 max_neighbors: Some(32),
5547 ef_search: Some(50),
5548 num_threads: None,
5549 batch_size: None,
5550 sync_threshold: None,
5551 resize_factor: None,
5552 }),
5553 spann: None,
5554 }),
5555 )
5556 .expect("vector config should succeed")
5557 .create_index(
5558 Some("category"),
5559 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5560 )
5561 .expect("string inverted on key should succeed")
5562 .create_index(
5563 Some("year"),
5564 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5565 )
5566 .expect("int inverted on key should succeed")
5567 .create_index(
5568 Some("rating"),
5569 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5570 )
5571 .expect("float inverted on key should succeed")
5572 .create_index(
5573 Some("is_active"),
5574 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5575 )
5576 .expect("bool inverted on key should succeed");
5577
5578 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5581 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5582 assert!(embedding.float_list.is_some());
5583 let vector_index = embedding
5584 .float_list
5585 .as_ref()
5586 .unwrap()
5587 .vector_index
5588 .as_ref()
5589 .unwrap();
5590 assert!(vector_index.enabled);
5591 assert_eq!(vector_index.config.space, Some(Space::Cosine));
5592 assert_eq!(
5593 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5594 Some(200)
5595 );
5596
5597 assert!(schema.keys.contains_key("category"));
5599 assert!(schema.keys.contains_key("year"));
5600 assert!(schema.keys.contains_key("rating"));
5601 assert!(schema.keys.contains_key("is_active"));
5602
5603 let category = schema.keys.get("category").unwrap();
5605 assert!(category.string.is_some());
5606 let string_idx = category
5607 .string
5608 .as_ref()
5609 .unwrap()
5610 .string_inverted_index
5611 .as_ref()
5612 .unwrap();
5613 assert!(string_idx.enabled);
5614
5615 let year = schema.keys.get("year").unwrap();
5617 assert!(year.int.is_some());
5618 let int_idx = year
5619 .int
5620 .as_ref()
5621 .unwrap()
5622 .int_inverted_index
5623 .as_ref()
5624 .unwrap();
5625 assert!(int_idx.enabled);
5626
5627 let schema = schema
5629 .delete_index(
5630 Some("category"),
5631 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5632 )
5633 .expect("delete string inverted should succeed")
5634 .delete_index(
5635 Some("year"),
5636 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5637 )
5638 .expect("delete int inverted should succeed");
5639
5640 let category = schema.keys.get("category").unwrap();
5642 let string_idx = category
5643 .string
5644 .as_ref()
5645 .unwrap()
5646 .string_inverted_index
5647 .as_ref()
5648 .unwrap();
5649 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
5652 let int_idx = year
5653 .int
5654 .as_ref()
5655 .unwrap()
5656 .int_inverted_index
5657 .as_ref()
5658 .unwrap();
5659 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
5663 let float_idx = rating
5664 .float
5665 .as_ref()
5666 .unwrap()
5667 .float_inverted_index
5668 .as_ref()
5669 .unwrap();
5670 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
5673 let bool_idx = is_active
5674 .boolean
5675 .as_ref()
5676 .unwrap()
5677 .bool_inverted_index
5678 .as_ref()
5679 .unwrap();
5680 assert!(bool_idx.enabled); }
5682
5683 #[test]
5684 fn test_builder_create_index_validation_errors() {
5685 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5692 Some("my_vectors"),
5693 IndexConfig::Vector(VectorIndexConfig {
5694 space: Some(Space::L2),
5695 embedding_function: None,
5696 source_key: None,
5697 hnsw: None,
5698 spann: None,
5699 }),
5700 );
5701 assert!(result.is_err());
5702 assert!(matches!(
5703 result.unwrap_err(),
5704 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5705 ));
5706
5707 let result = Schema::new_default(KnnIndex::Hnsw)
5709 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5710 assert!(result.is_err());
5711 assert!(matches!(
5712 result.unwrap_err(),
5713 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5714 ));
5715
5716 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5718 Some(DOCUMENT_KEY),
5719 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5720 );
5721 assert!(result.is_err());
5722 assert!(matches!(
5723 result.unwrap_err(),
5724 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5725 ));
5726
5727 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5729 Some(EMBEDDING_KEY),
5730 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5731 );
5732 assert!(result.is_err());
5733 assert!(matches!(
5734 result.unwrap_err(),
5735 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5736 ));
5737
5738 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5740 None,
5741 IndexConfig::SparseVector(SparseVectorIndexConfig {
5742 embedding_function: None,
5743 source_key: None,
5744 bm25: None,
5745 }),
5746 );
5747 assert!(result.is_err());
5748 assert!(matches!(
5749 result.unwrap_err(),
5750 SchemaBuilderError::SparseVectorRequiresKey
5751 ));
5752
5753 let result = Schema::new_default(KnnIndex::Hnsw)
5755 .create_index(
5756 Some("sparse1"),
5757 IndexConfig::SparseVector(SparseVectorIndexConfig {
5758 embedding_function: None,
5759 source_key: None,
5760 bm25: None,
5761 }),
5762 )
5763 .expect("first sparse should succeed")
5764 .create_index(
5765 Some("sparse2"),
5766 IndexConfig::SparseVector(SparseVectorIndexConfig {
5767 embedding_function: None,
5768 source_key: None,
5769 bm25: None,
5770 }),
5771 );
5772 assert!(result.is_err());
5773 assert!(matches!(
5774 result.unwrap_err(),
5775 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5776 ));
5777 }
5778
5779 #[test]
5780 fn test_builder_delete_index_validation_errors() {
5781 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5787 Some(EMBEDDING_KEY),
5788 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5789 );
5790 assert!(result.is_err());
5791 assert!(matches!(
5792 result.unwrap_err(),
5793 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5794 ));
5795
5796 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5798 Some(DOCUMENT_KEY),
5799 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5800 );
5801 assert!(result.is_err());
5802 assert!(matches!(
5803 result.unwrap_err(),
5804 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5805 ));
5806
5807 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5809 None,
5810 IndexConfig::Vector(VectorIndexConfig {
5811 space: None,
5812 embedding_function: None,
5813 source_key: None,
5814 hnsw: None,
5815 spann: None,
5816 }),
5817 );
5818 assert!(result.is_err());
5819 assert!(matches!(
5820 result.unwrap_err(),
5821 SchemaBuilderError::VectorIndexDeletionNotSupported
5822 ));
5823
5824 let result = Schema::new_default(KnnIndex::Hnsw)
5826 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5827 assert!(result.is_err());
5828 assert!(matches!(
5829 result.unwrap_err(),
5830 SchemaBuilderError::FtsIndexDeletionNotSupported
5831 ));
5832
5833 let result = Schema::new_default(KnnIndex::Hnsw)
5835 .create_index(
5836 Some("sparse"),
5837 IndexConfig::SparseVector(SparseVectorIndexConfig {
5838 embedding_function: None,
5839 source_key: None,
5840 bm25: None,
5841 }),
5842 )
5843 .expect("create should succeed")
5844 .delete_index(
5845 Some("sparse"),
5846 IndexConfig::SparseVector(SparseVectorIndexConfig {
5847 embedding_function: None,
5848 source_key: None,
5849 bm25: None,
5850 }),
5851 );
5852 assert!(result.is_err());
5853 assert!(matches!(
5854 result.unwrap_err(),
5855 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5856 ));
5857 }
5858
5859 #[test]
5860 fn test_builder_pattern_chaining() {
5861 let schema = Schema::new_default(KnnIndex::Hnsw)
5863 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5864 .unwrap()
5865 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5866 .unwrap()
5867 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5868 .unwrap()
5869 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5870 .unwrap()
5871 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5872 .unwrap()
5873 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5874 .unwrap();
5875
5876 assert!(
5878 schema
5879 .keys
5880 .get("tag1")
5881 .unwrap()
5882 .string
5883 .as_ref()
5884 .unwrap()
5885 .string_inverted_index
5886 .as_ref()
5887 .unwrap()
5888 .enabled
5889 );
5890
5891 assert!(
5893 !schema
5894 .keys
5895 .get("tag2")
5896 .unwrap()
5897 .string
5898 .as_ref()
5899 .unwrap()
5900 .string_inverted_index
5901 .as_ref()
5902 .unwrap()
5903 .enabled
5904 );
5905
5906 assert!(
5908 schema
5909 .keys
5910 .get("tag3")
5911 .unwrap()
5912 .string
5913 .as_ref()
5914 .unwrap()
5915 .string_inverted_index
5916 .as_ref()
5917 .unwrap()
5918 .enabled
5919 );
5920
5921 assert!(
5923 schema
5924 .keys
5925 .get("count")
5926 .unwrap()
5927 .int
5928 .as_ref()
5929 .unwrap()
5930 .int_inverted_index
5931 .as_ref()
5932 .unwrap()
5933 .enabled
5934 );
5935
5936 assert!(
5938 schema
5939 .keys
5940 .get("score")
5941 .unwrap()
5942 .float
5943 .as_ref()
5944 .unwrap()
5945 .float_inverted_index
5946 .as_ref()
5947 .unwrap()
5948 .enabled
5949 );
5950 }
5951
5952 #[test]
5953 fn test_schema_default_matches_python() {
5954 let schema = Schema::default();
5956
5957 assert!(schema.defaults.string.is_some());
5963 let string = schema.defaults.string.as_ref().unwrap();
5964 assert!(!string.fts_index.as_ref().unwrap().enabled);
5965 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5966
5967 assert!(schema.defaults.float_list.is_some());
5969 let float_list = schema.defaults.float_list.as_ref().unwrap();
5970 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5971 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5972 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
5976
5977 assert!(schema.defaults.sparse_vector.is_some());
5979 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5980 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5981
5982 assert!(schema.defaults.int.is_some());
5984 assert!(
5985 schema
5986 .defaults
5987 .int
5988 .as_ref()
5989 .unwrap()
5990 .int_inverted_index
5991 .as_ref()
5992 .unwrap()
5993 .enabled
5994 );
5995
5996 assert!(schema.defaults.float.is_some());
5998 assert!(
5999 schema
6000 .defaults
6001 .float
6002 .as_ref()
6003 .unwrap()
6004 .float_inverted_index
6005 .as_ref()
6006 .unwrap()
6007 .enabled
6008 );
6009
6010 assert!(schema.defaults.boolean.is_some());
6012 assert!(
6013 schema
6014 .defaults
6015 .boolean
6016 .as_ref()
6017 .unwrap()
6018 .bool_inverted_index
6019 .as_ref()
6020 .unwrap()
6021 .enabled
6022 );
6023
6024 assert!(schema.keys.contains_key(DOCUMENT_KEY));
6030 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
6031 assert!(doc.string.is_some());
6032 assert!(
6033 doc.string
6034 .as_ref()
6035 .unwrap()
6036 .fts_index
6037 .as_ref()
6038 .unwrap()
6039 .enabled
6040 );
6041 assert!(
6042 !doc.string
6043 .as_ref()
6044 .unwrap()
6045 .string_inverted_index
6046 .as_ref()
6047 .unwrap()
6048 .enabled
6049 );
6050
6051 assert!(schema.keys.contains_key(EMBEDDING_KEY));
6053 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
6054 assert!(embedding.float_list.is_some());
6055 let vec_idx = embedding
6056 .float_list
6057 .as_ref()
6058 .unwrap()
6059 .vector_index
6060 .as_ref()
6061 .unwrap();
6062 assert!(vec_idx.enabled);
6063 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
6064 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
6070 }
6071
6072 #[test]
6073 fn test_schema_default_works_with_builder() {
6074 let schema = Schema::default()
6076 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
6077 .expect("should succeed");
6078
6079 assert!(schema.keys.contains_key("category"));
6081 assert!(schema.keys.contains_key(DOCUMENT_KEY));
6082 assert!(schema.keys.contains_key(EMBEDDING_KEY));
6083 assert_eq!(schema.keys.len(), 3);
6084 }
6085
6086 #[cfg(feature = "testing")]
6087 mod proptests {
6088 use super::*;
6089 use crate::strategies::{
6090 embedding_function_strategy, internal_collection_configuration_strategy,
6091 internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
6092 knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
6093 };
6094 use crate::{
6095 HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
6096 };
6097 use proptest::prelude::*;
6098 use proptest::strategy::BoxedStrategy;
6099 use proptest::string::string_regex;
6100 use serde_json::json;
6101
6102 fn default_embedding_function_strategy(
6103 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6104 proptest::option::of(prop_oneof![
6105 Just(EmbeddingFunctionConfiguration::Unknown),
6106 Just(EmbeddingFunctionConfiguration::Known(
6107 EmbeddingFunctionNewConfiguration {
6108 name: "default".to_string(),
6109 config: json!({ "alpha": 1 }),
6110 }
6111 )),
6112 ])
6113 }
6114
6115 fn sparse_embedding_function_strategy(
6116 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6117 let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
6118 EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
6119 name,
6120 config: json!({ "alpha": 1 }),
6121 })
6122 });
6123
6124 proptest::option::of(prop_oneof![
6125 Just(EmbeddingFunctionConfiguration::Unknown),
6126 known_strategy,
6127 ])
6128 }
6129
6130 fn non_default_internal_collection_configuration_strategy(
6131 ) -> impl Strategy<Value = InternalCollectionConfiguration> {
6132 internal_collection_configuration_strategy()
6133 .prop_filter("non-default configuration", |config| !config.is_default())
6134 }
6135
6136 fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6137 (
6138 proptest::option::of(1usize..=512),
6139 proptest::option::of(1usize..=128),
6140 proptest::option::of(1usize..=512),
6141 proptest::option::of(1usize..=64),
6142 proptest::option::of(2usize..=4096),
6143 proptest::option::of(2usize..=4096),
6144 proptest::option::of(prop_oneof![
6145 Just(0.5f64),
6146 Just(1.0f64),
6147 Just(1.5f64),
6148 Just(2.0f64)
6149 ]),
6150 )
6151 .prop_map(
6152 |(
6153 ef_construction,
6154 max_neighbors,
6155 ef_search,
6156 num_threads,
6157 batch_size,
6158 sync_threshold,
6159 resize_factor,
6160 )| HnswIndexConfig {
6161 ef_construction,
6162 max_neighbors,
6163 ef_search,
6164 num_threads,
6165 batch_size,
6166 sync_threshold,
6167 resize_factor,
6168 },
6169 )
6170 }
6171
6172 fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6173 let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
6174 (
6175 (
6176 proptest::option::of(1u32..=128), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy.clone()), proptest::option::of(1u32..=8), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy), proptest::option::of(50u32..=200), proptest::option::of(1usize..=1000), ),
6185 (
6186 proptest::option::of(Just(100.0f32)), proptest::option::of(1u32..=64), proptest::option::of(25u32..=100), proptest::option::of(1u32..=8), proptest::option::of(1u32..=64), proptest::option::of(1usize..=200), proptest::option::of(1usize..=200), proptest::option::of(1usize..=64), ),
6195 )
6196 .prop_map(
6197 |(
6198 (
6199 search_nprobe,
6200 search_rng_factor,
6201 search_rng_epsilon,
6202 nreplica_count,
6203 write_rng_factor,
6204 write_rng_epsilon,
6205 split_threshold,
6206 num_samples_kmeans,
6207 ),
6208 (
6209 initial_lambda,
6210 reassign_neighbor_count,
6211 merge_threshold,
6212 num_centers_to_merge_to,
6213 write_nprobe,
6214 ef_construction,
6215 ef_search,
6216 max_neighbors,
6217 ),
6218 )| SpannIndexConfig {
6219 search_nprobe,
6220 search_rng_factor,
6221 search_rng_epsilon,
6222 nreplica_count,
6223 write_rng_factor,
6224 write_rng_epsilon,
6225 split_threshold,
6226 num_samples_kmeans,
6227 initial_lambda,
6228 reassign_neighbor_count,
6229 merge_threshold,
6230 num_centers_to_merge_to,
6231 write_nprobe,
6232 ef_construction,
6233 ef_search,
6234 max_neighbors,
6235 center_drift_threshold: None,
6236 quantize: false,
6237 },
6238 )
6239 }
6240
6241 proptest! {
6242 #[test]
6243 fn merge_hnsw_configs_preserves_user_overrides(
6244 base in partial_hnsw_index_config_strategy(),
6245 user in partial_hnsw_index_config_strategy(),
6246 ) {
6247 let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
6248 .expect("merge should return Some when both are Some");
6249
6250 if user.ef_construction.is_some() {
6252 prop_assert_eq!(merged.ef_construction, user.ef_construction);
6253 }
6254 if user.max_neighbors.is_some() {
6255 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6256 }
6257 if user.ef_search.is_some() {
6258 prop_assert_eq!(merged.ef_search, user.ef_search);
6259 }
6260 if user.num_threads.is_some() {
6261 prop_assert_eq!(merged.num_threads, user.num_threads);
6262 }
6263 if user.batch_size.is_some() {
6264 prop_assert_eq!(merged.batch_size, user.batch_size);
6265 }
6266 if user.sync_threshold.is_some() {
6267 prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
6268 }
6269 if user.resize_factor.is_some() {
6270 prop_assert_eq!(merged.resize_factor, user.resize_factor);
6271 }
6272 }
6273
6274 #[test]
6275 fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
6276 base in partial_hnsw_index_config_strategy(),
6277 ) {
6278 let merged = Schema::merge_hnsw_configs(Some(&base), None)
6279 .expect("merge should return Some when base is Some");
6280
6281 prop_assert_eq!(merged, base);
6283 }
6284
6285 #[test]
6286 fn merge_hnsw_configs_returns_user_when_base_is_none(
6287 user in partial_hnsw_index_config_strategy(),
6288 ) {
6289 let merged = Schema::merge_hnsw_configs(None, Some(&user))
6290 .expect("merge should return Some when user is Some");
6291
6292 prop_assert_eq!(merged, user);
6294 }
6295
6296 #[test]
6297 fn merge_spann_configs_preserves_user_overrides(
6298 base in partial_spann_index_config_strategy(),
6299 user in partial_spann_index_config_strategy(),
6300 ) {
6301 let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
6302 .expect("merge should return Ok")
6303 .expect("merge should return Some when both are Some");
6304
6305 if user.search_nprobe.is_some() {
6307 prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
6308 }
6309 if user.search_rng_epsilon.is_some() {
6310 prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
6311 }
6312 if user.split_threshold.is_some() {
6313 prop_assert_eq!(merged.split_threshold, user.split_threshold);
6314 }
6315 if user.ef_construction.is_some() {
6316 prop_assert_eq!(merged.ef_construction, user.ef_construction);
6317 }
6318 if user.ef_search.is_some() {
6319 prop_assert_eq!(merged.ef_search, user.ef_search);
6320 }
6321 if user.max_neighbors.is_some() {
6322 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6323 }
6324 }
6325
6326 #[test]
6327 fn merge_spann_configs_falls_back_to_base_when_user_is_none(
6328 base in partial_spann_index_config_strategy(),
6329 ) {
6330 let merged = Schema::merge_spann_configs(Some(&base), None)
6331 .expect("merge should return Ok")
6332 .expect("merge should return Some when base is Some");
6333
6334 prop_assert_eq!(merged, base);
6336 }
6337
6338 #[test]
6339 fn merge_vector_index_config_preserves_user_overrides(
6340 base in vector_index_config_strategy(),
6341 user in vector_index_config_strategy(),
6342 knn in knn_index_strategy(),
6343 ) {
6344 let merged = Schema::merge_vector_index_config(&base, &user, knn)
6345 .expect("merge should succeed");
6346
6347 if user.space.is_some() {
6349 prop_assert_eq!(merged.space, user.space);
6350 }
6351 if user.embedding_function.is_some() {
6352 prop_assert_eq!(merged.embedding_function, user.embedding_function);
6353 }
6354 if user.source_key.is_some() {
6355 prop_assert_eq!(merged.source_key, user.source_key);
6356 }
6357
6358 match knn {
6360 KnnIndex::Hnsw => {
6361 if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6362 let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6363 if user_hnsw.ef_construction.is_some() {
6364 prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6365 }
6366 }
6367 }
6368 KnnIndex::Spann => {
6369 if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6370 let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6371 if user_spann.search_nprobe.is_some() {
6372 prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6373 }
6374 }
6375 }
6376 }
6377 }
6378 }
6379
6380 fn expected_vector_index_config(
6381 config: &InternalCollectionConfiguration,
6382 ) -> VectorIndexConfig {
6383 match &config.vector_index {
6384 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6385 space: Some(hnsw_config.space.clone()),
6386 embedding_function: config.embedding_function.clone(),
6387 source_key: None,
6388 hnsw: Some(HnswIndexConfig {
6389 ef_construction: Some(hnsw_config.ef_construction),
6390 max_neighbors: Some(hnsw_config.max_neighbors),
6391 ef_search: Some(hnsw_config.ef_search),
6392 num_threads: Some(hnsw_config.num_threads),
6393 batch_size: Some(hnsw_config.batch_size),
6394 sync_threshold: Some(hnsw_config.sync_threshold),
6395 resize_factor: Some(hnsw_config.resize_factor),
6396 }),
6397 spann: None,
6398 },
6399 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6400 space: Some(spann_config.space.clone()),
6401 embedding_function: config.embedding_function.clone(),
6402 source_key: None,
6403 hnsw: None,
6404 spann: Some(SpannIndexConfig {
6405 search_nprobe: Some(spann_config.search_nprobe),
6406 search_rng_factor: Some(spann_config.search_rng_factor),
6407 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6408 nreplica_count: Some(spann_config.nreplica_count),
6409 write_rng_factor: Some(spann_config.write_rng_factor),
6410 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6411 split_threshold: Some(spann_config.split_threshold),
6412 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6413 initial_lambda: Some(spann_config.initial_lambda),
6414 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6415 merge_threshold: Some(spann_config.merge_threshold),
6416 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6417 write_nprobe: Some(spann_config.write_nprobe),
6418 ef_construction: Some(spann_config.ef_construction),
6419 ef_search: Some(spann_config.ef_search),
6420 max_neighbors: Some(spann_config.max_neighbors),
6421 center_drift_threshold: None,
6422 quantize: false,
6423 }),
6424 },
6425 }
6426 }
6427
6428 fn non_special_key_strategy() -> BoxedStrategy<String> {
6429 string_regex(TEST_NAME_PATTERN)
6430 .unwrap()
6431 .prop_filter("exclude special keys", |key| {
6432 key != DOCUMENT_KEY && key != EMBEDDING_KEY
6433 })
6434 .boxed()
6435 }
6436
6437 fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6438 proptest::option::of(prop_oneof![
6439 Just(DOCUMENT_KEY.to_string()),
6440 string_regex(TEST_NAME_PATTERN).unwrap(),
6441 ])
6442 .boxed()
6443 }
6444
6445 fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6446 any::<bool>().prop_map(|enabled| FtsIndexType {
6447 enabled,
6448 config: FtsIndexConfig {},
6449 })
6450 }
6451
6452 fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6453 any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6454 enabled,
6455 config: StringInvertedIndexConfig {},
6456 })
6457 }
6458
6459 fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6460 proptest::option::of(
6461 (
6462 proptest::option::of(string_inverted_index_type_strategy()),
6463 proptest::option::of(fts_index_type_strategy()),
6464 )
6465 .prop_map(|(string_inverted_index, fts_index)| {
6466 StringValueType {
6467 string_inverted_index,
6468 fts_index,
6469 }
6470 }),
6471 )
6472 .boxed()
6473 }
6474
6475 fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6476 any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6477 enabled,
6478 config: FloatInvertedIndexConfig {},
6479 })
6480 }
6481
6482 fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6483 proptest::option::of(
6484 proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6485 |float_inverted_index| FloatValueType {
6486 float_inverted_index,
6487 },
6488 ),
6489 )
6490 .boxed()
6491 }
6492
6493 fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6494 any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6495 enabled,
6496 config: IntInvertedIndexConfig {},
6497 })
6498 }
6499
6500 fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6501 proptest::option::of(
6502 proptest::option::of(int_inverted_index_type_strategy())
6503 .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6504 )
6505 .boxed()
6506 }
6507
6508 fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6509 any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6510 enabled,
6511 config: BoolInvertedIndexConfig {},
6512 })
6513 }
6514
6515 fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6516 proptest::option::of(
6517 proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6518 |bool_inverted_index| BoolValueType {
6519 bool_inverted_index,
6520 },
6521 ),
6522 )
6523 .boxed()
6524 }
6525
6526 fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6527 (
6528 sparse_embedding_function_strategy(),
6529 source_key_strategy(),
6530 proptest::option::of(any::<bool>()),
6531 )
6532 .prop_map(|(embedding_function, source_key, bm25)| {
6533 SparseVectorIndexConfig {
6534 embedding_function,
6535 source_key,
6536 bm25,
6537 }
6538 })
6539 }
6540
6541 fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6542 proptest::option::of(
6543 (
6544 any::<bool>(),
6545 proptest::option::of(sparse_vector_index_config_strategy()),
6546 )
6547 .prop_map(|(enabled, config)| SparseVectorValueType {
6548 sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6549 enabled,
6550 config: cfg,
6551 }),
6552 }),
6553 )
6554 .boxed()
6555 }
6556
6557 fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6558 internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6559 ef_construction: Some(config.ef_construction),
6560 max_neighbors: Some(config.max_neighbors),
6561 ef_search: Some(config.ef_search),
6562 num_threads: Some(config.num_threads),
6563 batch_size: Some(config.batch_size),
6564 sync_threshold: Some(config.sync_threshold),
6565 resize_factor: Some(config.resize_factor),
6566 })
6567 }
6568
6569 fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6570 internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6571 search_nprobe: Some(config.search_nprobe),
6572 search_rng_factor: Some(config.search_rng_factor),
6573 search_rng_epsilon: Some(config.search_rng_epsilon),
6574 nreplica_count: Some(config.nreplica_count),
6575 write_rng_factor: Some(config.write_rng_factor),
6576 write_rng_epsilon: Some(config.write_rng_epsilon),
6577 split_threshold: Some(config.split_threshold),
6578 num_samples_kmeans: Some(config.num_samples_kmeans),
6579 initial_lambda: Some(config.initial_lambda),
6580 reassign_neighbor_count: Some(config.reassign_neighbor_count),
6581 merge_threshold: Some(config.merge_threshold),
6582 num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6583 write_nprobe: Some(config.write_nprobe),
6584 ef_construction: Some(config.ef_construction),
6585 ef_search: Some(config.ef_search),
6586 max_neighbors: Some(config.max_neighbors),
6587 center_drift_threshold: None,
6588 quantize: false,
6589 })
6590 }
6591
6592 fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6593 (
6594 proptest::option::of(space_strategy()),
6595 embedding_function_strategy(),
6596 source_key_strategy(),
6597 proptest::option::of(hnsw_index_config_strategy()),
6598 proptest::option::of(spann_index_config_strategy()),
6599 )
6600 .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6601 VectorIndexConfig {
6602 space,
6603 embedding_function,
6604 source_key,
6605 hnsw,
6606 spann,
6607 }
6608 })
6609 }
6610
6611 fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6612 (any::<bool>(), vector_index_config_strategy())
6613 .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6614 }
6615
6616 fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6617 proptest::option::of(
6618 proptest::option::of(vector_index_type_strategy())
6619 .prop_map(|vector_index| FloatListValueType { vector_index }),
6620 )
6621 .boxed()
6622 }
6623
6624 fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6625 (
6626 string_value_type_strategy(),
6627 float_list_value_type_strategy(),
6628 sparse_vector_value_type_strategy(),
6629 int_value_type_strategy(),
6630 float_value_type_strategy(),
6631 bool_value_type_strategy(),
6632 )
6633 .prop_map(
6634 |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6635 string,
6636 float_list,
6637 sparse_vector,
6638 int,
6639 float,
6640 boolean,
6641 },
6642 )
6643 .boxed()
6644 }
6645
6646 fn schema_strategy() -> BoxedStrategy<Schema> {
6647 (
6648 value_types_strategy(),
6649 proptest::collection::hash_map(
6650 non_special_key_strategy(),
6651 value_types_strategy(),
6652 0..=3,
6653 ),
6654 proptest::option::of(value_types_strategy()),
6655 proptest::option::of(value_types_strategy()),
6656 )
6657 .prop_map(
6658 |(defaults, mut extra_keys, document_override, embedding_override)| {
6659 if let Some(doc) = document_override {
6660 extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6661 }
6662 if let Some(embed) = embedding_override {
6663 extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6664 }
6665 Schema {
6666 defaults,
6667 keys: extra_keys,
6668 cmek: None,
6669 source_attached_function_id: None,
6670 }
6671 },
6672 )
6673 .boxed()
6674 }
6675
6676 fn force_non_default_schema(mut schema: Schema) -> Schema {
6677 if schema.is_default() {
6678 if let Some(string_value) = schema
6679 .defaults
6680 .string
6681 .as_mut()
6682 .and_then(|string_value| string_value.string_inverted_index.as_mut())
6683 {
6684 string_value.enabled = !string_value.enabled;
6685 } else {
6686 schema.defaults.string = Some(StringValueType {
6687 string_inverted_index: Some(StringInvertedIndexType {
6688 enabled: false,
6689 config: StringInvertedIndexConfig {},
6690 }),
6691 fts_index: None,
6692 });
6693 }
6694 }
6695 schema
6696 }
6697
6698 fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6699 schema_strategy().prop_map(force_non_default_schema).boxed()
6700 }
6701
6702 fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6703 let defaults = schema
6704 .defaults
6705 .float_list
6706 .as_ref()
6707 .and_then(|fl| fl.vector_index.as_ref())
6708 .map(|vi| vi.config.clone())
6709 .expect("defaults vector index missing");
6710
6711 let embedding = schema
6712 .keys
6713 .get(EMBEDDING_KEY)
6714 .and_then(|value_types| value_types.float_list.as_ref())
6715 .and_then(|fl| fl.vector_index.as_ref())
6716 .map(|vi| vi.config.clone())
6717 .expect("#embedding vector index missing");
6718
6719 (defaults, embedding)
6720 }
6721
6722 proptest! {
6723 #[test]
6724 fn reconcile_schema_and_config_matches_convert_for_config_only(
6725 config in internal_collection_configuration_strategy(),
6726 knn in knn_index_strategy(),
6727 ) {
6728 let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6729 .expect("reconciliation should succeed");
6730
6731 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6732 let expected_config = expected_vector_index_config(&config);
6733
6734 prop_assert_eq!(defaults_vi, expected_config.clone());
6735
6736 let mut expected_embedding_config = expected_config;
6737 expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6738 prop_assert_eq!(embedding_vi, expected_embedding_config);
6739
6740 prop_assert_eq!(result.keys.len(), 2);
6741 }
6742 }
6743
6744 proptest! {
6745 #[test]
6746 fn reconcile_schema_and_config_errors_when_both_non_default(
6747 config in non_default_internal_collection_configuration_strategy(),
6748 knn in knn_index_strategy(),
6749 ) {
6750 let schema = Schema::try_from(&config)
6751 .expect("conversion should succeed");
6752 prop_assume!(!schema.is_default());
6753
6754 let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6755
6756 prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6757 }
6758 }
6759
6760 proptest! {
6761 #[test]
6762 fn reconcile_schema_and_config_matches_schema_only_path(
6763 schema in schema_strategy(),
6764 knn in knn_index_strategy(),
6765 ) {
6766 let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6767 .expect("reconciliation should succeed");
6768
6769 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6770
6771 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6773 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6774 if let Some(schema_space) = &schema_vi.config.space {
6776 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6777 }
6778 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6779 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6780 }
6781 match knn {
6783 KnnIndex::Hnsw => {
6784 if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6785 if let Some(merged_hnsw) = &defaults_vi.hnsw {
6786 if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6787 prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6788 }
6789 }
6790 }
6791 }
6792 KnnIndex::Spann => {
6793 if let Some(schema_spann) = &schema_vi.config.spann {
6794 if let Some(merged_spann) = &defaults_vi.spann {
6795 if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6796 prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6797 }
6798 }
6799 }
6800 }
6801 }
6802 }
6803 }
6804
6805 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6807 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6808 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6809 if let Some(schema_space) = &embedding_vi_type.config.space {
6810 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6811 }
6812 }
6813 }
6814 }
6815 }
6816 }
6817
6818 proptest! {
6819 #[test]
6820 fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6821 embedding_function in default_embedding_function_strategy(),
6822 knn in knn_index_strategy(),
6823 ) {
6824 let schema = Schema::new_default(knn);
6825 let mut config = match knn {
6826 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6827 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6828 };
6829 config.embedding_function = embedding_function.clone();
6830
6831 let result = Schema::reconcile_schema_and_config(
6832 Some(&schema),
6833 Some(&config),
6834 knn,
6835 )
6836 .expect("reconciliation should succeed");
6837
6838 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6839
6840 if let Some(ef) = embedding_function {
6842 prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6843 prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6844 } else {
6845 prop_assert_eq!(defaults_vi.embedding_function, None);
6847 prop_assert_eq!(embedding_vi.embedding_function, None);
6848 }
6849 }
6850 }
6851
6852 proptest! {
6853 #[test]
6854 fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6855 schema in non_default_schema_strategy(),
6856 knn in knn_index_strategy(),
6857 ) {
6858 let default_config = match knn {
6859 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6860 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6861 };
6862
6863 let result = Schema::reconcile_schema_and_config(
6864 Some(&schema),
6865 Some(&default_config),
6866 knn,
6867 )
6868 .expect("reconciliation should succeed");
6869
6870 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6871
6872 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6875 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6876 if let Some(schema_space) = &schema_vi.config.space {
6877 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6878 }
6879 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6880 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6881 }
6882 }
6883 }
6884
6885 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6887 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6888 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6889 if let Some(schema_space) = &embedding_vi_type.config.space {
6890 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6891 }
6892 }
6893 }
6894 }
6895 }
6896 }
6897 }
6898}