1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8 EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14 default_batch_size, default_construction_ef, default_construction_ef_spann,
15 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21 InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25 fn code(&self) -> ErrorCodes {
26 ErrorCodes::Internal
27 }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33 MissingIndexConfiguration { key: String, value_type: String },
34 #[error("Schema reconciliation failed: {reason}")]
35 InvalidSchema { reason: String },
36 #[error("Cannot set both collection config and schema simultaneously")]
37 ConfigAndSchemaConflict,
38 #[error("Cannot merge schemas with differing defaults")]
39 DefaultsMismatch,
40 #[error("Conflicting configuration for {context}")]
41 ConfigurationConflict { context: String },
42 #[error("Invalid HNSW configuration: {0}")]
43 InvalidHnswConfig(validator::ValidationErrors),
44 #[error("Invalid SPANN configuration: {0}")]
45 InvalidSpannConfig(validator::ValidationErrors),
46 #[error(transparent)]
47 Builder(#[from] SchemaBuilderError),
48}
49
50#[derive(Debug, Error)]
51pub enum SchemaBuilderError {
52 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
53 VectorIndexMustBeGlobal { key: String },
54 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
55 FtsIndexMustBeGlobal { key: String },
56 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
57 SpecialKeyModificationNotAllowed { key: String },
58 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
59 SparseVectorRequiresKey,
60 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
61 MultipleSparseVectorIndexes { existing_key: String },
62 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
63 VectorIndexDeletionNotSupported,
64 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
65 FtsIndexDeletionNotSupported,
66 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
67 SparseVectorIndexDeletionNotSupported,
68}
69
70#[derive(Debug, Error)]
71pub enum FilterValidationError {
72 #[error(
73 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
74 )]
75 IndexingDisabled {
76 key: String,
77 value_type: MetadataValueType,
78 },
79 #[error(transparent)]
80 Schema(#[from] SchemaError),
81}
82
83impl ChromaError for SchemaBuilderError {
84 fn code(&self) -> ErrorCodes {
85 ErrorCodes::InvalidArgument
86 }
87}
88
89impl ChromaError for FilterValidationError {
90 fn code(&self) -> ErrorCodes {
91 match self {
92 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
93 FilterValidationError::Schema(_) => ErrorCodes::Internal,
94 }
95 }
96}
97
98pub const STRING_VALUE_NAME: &str = "string";
105pub const INT_VALUE_NAME: &str = "int";
106pub const BOOL_VALUE_NAME: &str = "bool";
107pub const FLOAT_VALUE_NAME: &str = "float";
108pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
109pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
110
111pub const FTS_INDEX_NAME: &str = "fts_index";
113pub const VECTOR_INDEX_NAME: &str = "vector_index";
114pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
115pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
116pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
117pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
118pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
119
120pub const DOCUMENT_KEY: &str = "#document";
122pub const EMBEDDING_KEY: &str = "#embedding";
123
124#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
133#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
134pub struct Schema {
135 pub defaults: ValueTypes,
137 #[serde(rename = "keys", alias = "key_overrides")]
140 pub keys: HashMap<String, ValueTypes>,
141}
142
143impl Default for Schema {
144 fn default() -> Self {
161 let defaults = ValueTypes {
163 string: Some(StringValueType {
164 fts_index: Some(FtsIndexType {
165 enabled: false,
166 config: FtsIndexConfig {},
167 }),
168 string_inverted_index: Some(StringInvertedIndexType {
169 enabled: true,
170 config: StringInvertedIndexConfig {},
171 }),
172 }),
173 float_list: Some(FloatListValueType {
174 vector_index: Some(VectorIndexType {
175 enabled: false,
176 config: VectorIndexConfig {
177 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
179 source_key: None,
180 hnsw: None, spann: None, },
183 }),
184 }),
185 sparse_vector: Some(SparseVectorValueType {
186 sparse_vector_index: Some(SparseVectorIndexType {
187 enabled: false,
188 config: SparseVectorIndexConfig {
189 embedding_function: None,
190 source_key: None,
191 bm25: None,
192 },
193 }),
194 }),
195 int: Some(IntValueType {
196 int_inverted_index: Some(IntInvertedIndexType {
197 enabled: true,
198 config: IntInvertedIndexConfig {},
199 }),
200 }),
201 float: Some(FloatValueType {
202 float_inverted_index: Some(FloatInvertedIndexType {
203 enabled: true,
204 config: FloatInvertedIndexConfig {},
205 }),
206 }),
207 boolean: Some(BoolValueType {
208 bool_inverted_index: Some(BoolInvertedIndexType {
209 enabled: true,
210 config: BoolInvertedIndexConfig {},
211 }),
212 }),
213 };
214
215 let mut keys = HashMap::new();
217
218 keys.insert(
220 DOCUMENT_KEY.to_string(),
221 ValueTypes {
222 string: Some(StringValueType {
223 fts_index: Some(FtsIndexType {
224 enabled: true,
225 config: FtsIndexConfig {},
226 }),
227 string_inverted_index: Some(StringInvertedIndexType {
228 enabled: false,
229 config: StringInvertedIndexConfig {},
230 }),
231 }),
232 ..Default::default()
233 },
234 );
235
236 keys.insert(
238 EMBEDDING_KEY.to_string(),
239 ValueTypes {
240 float_list: Some(FloatListValueType {
241 vector_index: Some(VectorIndexType {
242 enabled: true,
243 config: VectorIndexConfig {
244 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
246 source_key: Some(DOCUMENT_KEY.to_string()),
247 hnsw: None, spann: None, },
250 }),
251 }),
252 ..Default::default()
253 },
254 );
255
256 Schema { defaults, keys }
257 }
258}
259
260pub fn is_embedding_function_default(
261 embedding_function: &Option<EmbeddingFunctionConfiguration>,
262) -> bool {
263 match embedding_function {
264 None => true,
265 Some(embedding_function) => embedding_function.is_default(),
266 }
267}
268
269pub fn is_space_default(space: &Option<Space>) -> bool {
271 match space {
272 None => true, Some(s) => *s == default_space(), }
275}
276
277pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
279 hnsw_config.ef_construction == Some(default_construction_ef())
280 && hnsw_config.ef_search == Some(default_search_ef())
281 && hnsw_config.max_neighbors == Some(default_m())
282 && hnsw_config.num_threads == Some(default_num_threads())
283 && hnsw_config.batch_size == Some(default_batch_size())
284 && hnsw_config.sync_threshold == Some(default_sync_threshold())
285 && hnsw_config.resize_factor == Some(default_resize_factor())
286}
287
288#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
295#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
296pub struct ValueTypes {
297 #[serde(
298 rename = "string",
299 alias = "#string",
300 skip_serializing_if = "Option::is_none"
301 )] pub string: Option<StringValueType>,
303
304 #[serde(
305 rename = "float_list",
306 alias = "#float_list",
307 skip_serializing_if = "Option::is_none"
308 )]
309 pub float_list: Option<FloatListValueType>,
311
312 #[serde(
313 rename = "sparse_vector",
314 alias = "#sparse_vector",
315 skip_serializing_if = "Option::is_none"
316 )]
317 pub sparse_vector: Option<SparseVectorValueType>,
319
320 #[serde(
321 rename = "int",
322 alias = "#int",
323 skip_serializing_if = "Option::is_none"
324 )] pub int: Option<IntValueType>,
326
327 #[serde(
328 rename = "float",
329 alias = "#float",
330 skip_serializing_if = "Option::is_none"
331 )] pub float: Option<FloatValueType>,
333
334 #[serde(
335 rename = "bool",
336 alias = "#bool",
337 skip_serializing_if = "Option::is_none"
338 )] pub boolean: Option<BoolValueType>,
340}
341
342#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
344#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
345pub struct StringValueType {
346 #[serde(
347 rename = "fts_index",
348 alias = "$fts_index",
349 skip_serializing_if = "Option::is_none"
350 )] pub fts_index: Option<FtsIndexType>,
352
353 #[serde(
354 rename = "string_inverted_index", alias = "$string_inverted_index",
356 skip_serializing_if = "Option::is_none"
357 )]
358 pub string_inverted_index: Option<StringInvertedIndexType>,
359}
360
361#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
363#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
364pub struct FloatListValueType {
365 #[serde(
366 rename = "vector_index",
367 alias = "$vector_index",
368 skip_serializing_if = "Option::is_none"
369 )] pub vector_index: Option<VectorIndexType>,
371}
372
373#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
375#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
376pub struct SparseVectorValueType {
377 #[serde(
378 rename = "sparse_vector_index", alias = "$sparse_vector_index",
380 skip_serializing_if = "Option::is_none"
381 )]
382 pub sparse_vector_index: Option<SparseVectorIndexType>,
383}
384
385#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
387#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
388pub struct IntValueType {
389 #[serde(
390 rename = "int_inverted_index",
391 alias = "$int_inverted_index",
392 skip_serializing_if = "Option::is_none"
393 )]
394 pub int_inverted_index: Option<IntInvertedIndexType>,
396}
397
398#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
400#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
401pub struct FloatValueType {
402 #[serde(
403 rename = "float_inverted_index", alias = "$float_inverted_index",
405 skip_serializing_if = "Option::is_none"
406 )]
407 pub float_inverted_index: Option<FloatInvertedIndexType>,
408}
409
410#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
412#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
413pub struct BoolValueType {
414 #[serde(
415 rename = "bool_inverted_index", alias = "$bool_inverted_index",
417 skip_serializing_if = "Option::is_none"
418 )]
419 pub bool_inverted_index: Option<BoolInvertedIndexType>,
420}
421
422#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
424#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
425pub struct FtsIndexType {
426 pub enabled: bool,
427 pub config: FtsIndexConfig,
428}
429
430#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
431#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
432pub struct VectorIndexType {
433 pub enabled: bool,
434 pub config: VectorIndexConfig,
435}
436
437#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
438#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
439pub struct SparseVectorIndexType {
440 pub enabled: bool,
441 pub config: SparseVectorIndexConfig,
442}
443
444#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
445#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
446pub struct StringInvertedIndexType {
447 pub enabled: bool,
448 pub config: StringInvertedIndexConfig,
449}
450
451#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
452#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
453pub struct IntInvertedIndexType {
454 pub enabled: bool,
455 pub config: IntInvertedIndexConfig,
456}
457
458#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
459#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
460pub struct FloatInvertedIndexType {
461 pub enabled: bool,
462 pub config: FloatInvertedIndexConfig,
463}
464
465#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
466#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
467pub struct BoolInvertedIndexType {
468 pub enabled: bool,
469 pub config: BoolInvertedIndexConfig,
470}
471
472impl Schema {
473 pub fn new_default(default_knn_index: KnnIndex) -> Self {
475 let vector_config = VectorIndexType {
477 enabled: false,
478 config: VectorIndexConfig {
479 space: Some(default_space()),
480 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
481 source_key: None,
482 hnsw: match default_knn_index {
483 KnnIndex::Hnsw => Some(HnswIndexConfig {
484 ef_construction: Some(default_construction_ef()),
485 max_neighbors: Some(default_m()),
486 ef_search: Some(default_search_ef()),
487 num_threads: Some(default_num_threads()),
488 batch_size: Some(default_batch_size()),
489 sync_threshold: Some(default_sync_threshold()),
490 resize_factor: Some(default_resize_factor()),
491 }),
492 KnnIndex::Spann => None,
493 },
494 spann: match default_knn_index {
495 KnnIndex::Hnsw => None,
496 KnnIndex::Spann => Some(SpannIndexConfig {
497 search_nprobe: Some(default_search_nprobe()),
498 search_rng_factor: Some(default_search_rng_factor()),
499 search_rng_epsilon: Some(default_search_rng_epsilon()),
500 nreplica_count: Some(default_nreplica_count()),
501 write_rng_factor: Some(default_write_rng_factor()),
502 write_rng_epsilon: Some(default_write_rng_epsilon()),
503 split_threshold: Some(default_split_threshold()),
504 num_samples_kmeans: Some(default_num_samples_kmeans()),
505 initial_lambda: Some(default_initial_lambda()),
506 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
507 merge_threshold: Some(default_merge_threshold()),
508 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
509 write_nprobe: Some(default_write_nprobe()),
510 ef_construction: Some(default_construction_ef_spann()),
511 ef_search: Some(default_search_ef_spann()),
512 max_neighbors: Some(default_m_spann()),
513 }),
514 },
515 },
516 };
517
518 let defaults = ValueTypes {
520 string: Some(StringValueType {
521 string_inverted_index: Some(StringInvertedIndexType {
522 enabled: true,
523 config: StringInvertedIndexConfig {},
524 }),
525 fts_index: Some(FtsIndexType {
526 enabled: false,
527 config: FtsIndexConfig {},
528 }),
529 }),
530 float: Some(FloatValueType {
531 float_inverted_index: Some(FloatInvertedIndexType {
532 enabled: true,
533 config: FloatInvertedIndexConfig {},
534 }),
535 }),
536 int: Some(IntValueType {
537 int_inverted_index: Some(IntInvertedIndexType {
538 enabled: true,
539 config: IntInvertedIndexConfig {},
540 }),
541 }),
542 boolean: Some(BoolValueType {
543 bool_inverted_index: Some(BoolInvertedIndexType {
544 enabled: true,
545 config: BoolInvertedIndexConfig {},
546 }),
547 }),
548 float_list: Some(FloatListValueType {
549 vector_index: Some(vector_config),
550 }),
551 sparse_vector: Some(SparseVectorValueType {
552 sparse_vector_index: Some(SparseVectorIndexType {
553 enabled: false,
554 config: SparseVectorIndexConfig {
555 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
556 source_key: None,
557 bm25: Some(false),
558 },
559 }),
560 }),
561 };
562
563 let mut keys = HashMap::new();
565
566 let embedding_defaults = ValueTypes {
568 float_list: Some(FloatListValueType {
569 vector_index: Some(VectorIndexType {
570 enabled: true,
571 config: VectorIndexConfig {
572 space: Some(default_space()),
573 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
574 source_key: Some(DOCUMENT_KEY.to_string()),
575 hnsw: match default_knn_index {
576 KnnIndex::Hnsw => Some(HnswIndexConfig {
577 ef_construction: Some(default_construction_ef()),
578 max_neighbors: Some(default_m()),
579 ef_search: Some(default_search_ef()),
580 num_threads: Some(default_num_threads()),
581 batch_size: Some(default_batch_size()),
582 sync_threshold: Some(default_sync_threshold()),
583 resize_factor: Some(default_resize_factor()),
584 }),
585 KnnIndex::Spann => None,
586 },
587 spann: match default_knn_index {
588 KnnIndex::Hnsw => None,
589 KnnIndex::Spann => Some(SpannIndexConfig {
590 search_nprobe: Some(default_search_nprobe()),
591 search_rng_factor: Some(default_search_rng_factor()),
592 search_rng_epsilon: Some(default_search_rng_epsilon()),
593 nreplica_count: Some(default_nreplica_count()),
594 write_rng_factor: Some(default_write_rng_factor()),
595 write_rng_epsilon: Some(default_write_rng_epsilon()),
596 split_threshold: Some(default_split_threshold()),
597 num_samples_kmeans: Some(default_num_samples_kmeans()),
598 initial_lambda: Some(default_initial_lambda()),
599 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
600 merge_threshold: Some(default_merge_threshold()),
601 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
602 write_nprobe: Some(default_write_nprobe()),
603 ef_construction: Some(default_construction_ef_spann()),
604 ef_search: Some(default_search_ef_spann()),
605 max_neighbors: Some(default_m_spann()),
606 }),
607 },
608 },
609 }),
610 }),
611 ..Default::default()
612 };
613 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
614
615 let document_defaults = ValueTypes {
617 string: Some(StringValueType {
618 fts_index: Some(FtsIndexType {
619 enabled: true,
620 config: FtsIndexConfig {},
621 }),
622 string_inverted_index: Some(StringInvertedIndexType {
623 enabled: false,
624 config: StringInvertedIndexConfig {},
625 }),
626 }),
627 ..Default::default()
628 };
629 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
630
631 Schema { defaults, keys }
632 }
633
634 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
635 let to_internal = |vector_index: &VectorIndexType| {
636 let space = vector_index.config.space.clone();
637 vector_index
638 .config
639 .spann
640 .clone()
641 .map(|config| (space.as_ref(), &config).into())
642 };
643
644 self.keys
645 .get(EMBEDDING_KEY)
646 .and_then(|value_types| value_types.float_list.as_ref())
647 .and_then(|float_list| float_list.vector_index.as_ref())
648 .and_then(to_internal)
649 .or_else(|| {
650 self.defaults
651 .float_list
652 .as_ref()
653 .and_then(|float_list| float_list.vector_index.as_ref())
654 .and_then(to_internal)
655 })
656 }
657
658 pub fn reconcile_with_defaults(user_schema: Option<Schema>) -> Result<Self, SchemaError> {
665 let default_schema = Schema::new_default(KnnIndex::Spann);
666
667 match user_schema {
668 Some(user) => {
669 let merged_defaults =
671 Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
672
673 let mut merged_keys = default_schema.keys.clone();
675 for (key, user_value_types) in user.keys {
676 if let Some(default_value_types) = merged_keys.get(&key) {
677 let merged_value_types =
679 Self::merge_value_types(default_value_types, &user_value_types)?;
680 merged_keys.insert(key, merged_value_types);
681 } else {
682 merged_keys.insert(key, user_value_types);
684 }
685 }
686
687 Ok(Schema {
688 defaults: merged_defaults,
689 keys: merged_keys,
690 })
691 }
692 None => Ok(default_schema),
693 }
694 }
695
696 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
698 if self.defaults != other.defaults {
699 return Err(SchemaError::DefaultsMismatch);
700 }
701
702 let mut keys = self.keys.clone();
703
704 for (key, other_value_types) in &other.keys {
705 if let Some(existing) = keys.get(key).cloned() {
706 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
707 keys.insert(key.clone(), merged);
708 } else {
709 keys.insert(key.clone(), other_value_types.clone());
710 }
711 }
712
713 Ok(Schema {
714 defaults: self.defaults.clone(),
715 keys,
716 })
717 }
718
719 fn merge_override_value_types(
720 key: &str,
721 left: &ValueTypes,
722 right: &ValueTypes,
723 ) -> Result<ValueTypes, SchemaError> {
724 Ok(ValueTypes {
725 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
726 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
727 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
728 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
729 float_list: Self::merge_float_list_override(
730 key,
731 left.float_list.as_ref(),
732 right.float_list.as_ref(),
733 )?,
734 sparse_vector: Self::merge_sparse_vector_override(
735 key,
736 left.sparse_vector.as_ref(),
737 right.sparse_vector.as_ref(),
738 )?,
739 })
740 }
741
742 fn merge_string_override(
743 key: &str,
744 left: Option<&StringValueType>,
745 right: Option<&StringValueType>,
746 ) -> Result<Option<StringValueType>, SchemaError> {
747 match (left, right) {
748 (Some(l), Some(r)) => Ok(Some(StringValueType {
749 string_inverted_index: Self::merge_index_or_error(
750 l.string_inverted_index.as_ref(),
751 r.string_inverted_index.as_ref(),
752 &format!("key '{key}' string.string_inverted_index"),
753 )?,
754 fts_index: Self::merge_index_or_error(
755 l.fts_index.as_ref(),
756 r.fts_index.as_ref(),
757 &format!("key '{key}' string.fts_index"),
758 )?,
759 })),
760 (Some(l), None) => Ok(Some(l.clone())),
761 (None, Some(r)) => Ok(Some(r.clone())),
762 (None, None) => Ok(None),
763 }
764 }
765
766 fn merge_float_override(
767 key: &str,
768 left: Option<&FloatValueType>,
769 right: Option<&FloatValueType>,
770 ) -> Result<Option<FloatValueType>, SchemaError> {
771 match (left, right) {
772 (Some(l), Some(r)) => Ok(Some(FloatValueType {
773 float_inverted_index: Self::merge_index_or_error(
774 l.float_inverted_index.as_ref(),
775 r.float_inverted_index.as_ref(),
776 &format!("key '{key}' float.float_inverted_index"),
777 )?,
778 })),
779 (Some(l), None) => Ok(Some(l.clone())),
780 (None, Some(r)) => Ok(Some(r.clone())),
781 (None, None) => Ok(None),
782 }
783 }
784
785 fn merge_int_override(
786 key: &str,
787 left: Option<&IntValueType>,
788 right: Option<&IntValueType>,
789 ) -> Result<Option<IntValueType>, SchemaError> {
790 match (left, right) {
791 (Some(l), Some(r)) => Ok(Some(IntValueType {
792 int_inverted_index: Self::merge_index_or_error(
793 l.int_inverted_index.as_ref(),
794 r.int_inverted_index.as_ref(),
795 &format!("key '{key}' int.int_inverted_index"),
796 )?,
797 })),
798 (Some(l), None) => Ok(Some(l.clone())),
799 (None, Some(r)) => Ok(Some(r.clone())),
800 (None, None) => Ok(None),
801 }
802 }
803
804 fn merge_bool_override(
805 key: &str,
806 left: Option<&BoolValueType>,
807 right: Option<&BoolValueType>,
808 ) -> Result<Option<BoolValueType>, SchemaError> {
809 match (left, right) {
810 (Some(l), Some(r)) => Ok(Some(BoolValueType {
811 bool_inverted_index: Self::merge_index_or_error(
812 l.bool_inverted_index.as_ref(),
813 r.bool_inverted_index.as_ref(),
814 &format!("key '{key}' bool.bool_inverted_index"),
815 )?,
816 })),
817 (Some(l), None) => Ok(Some(l.clone())),
818 (None, Some(r)) => Ok(Some(r.clone())),
819 (None, None) => Ok(None),
820 }
821 }
822
823 fn merge_float_list_override(
824 key: &str,
825 left: Option<&FloatListValueType>,
826 right: Option<&FloatListValueType>,
827 ) -> Result<Option<FloatListValueType>, SchemaError> {
828 match (left, right) {
829 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
830 vector_index: Self::merge_index_or_error(
831 l.vector_index.as_ref(),
832 r.vector_index.as_ref(),
833 &format!("key '{key}' float_list.vector_index"),
834 )?,
835 })),
836 (Some(l), None) => Ok(Some(l.clone())),
837 (None, Some(r)) => Ok(Some(r.clone())),
838 (None, None) => Ok(None),
839 }
840 }
841
842 fn merge_sparse_vector_override(
843 key: &str,
844 left: Option<&SparseVectorValueType>,
845 right: Option<&SparseVectorValueType>,
846 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
847 match (left, right) {
848 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
849 sparse_vector_index: Self::merge_index_or_error(
850 l.sparse_vector_index.as_ref(),
851 r.sparse_vector_index.as_ref(),
852 &format!("key '{key}' sparse_vector.sparse_vector_index"),
853 )?,
854 })),
855 (Some(l), None) => Ok(Some(l.clone())),
856 (None, Some(r)) => Ok(Some(r.clone())),
857 (None, None) => Ok(None),
858 }
859 }
860
861 fn merge_index_or_error<T: Clone + PartialEq>(
862 left: Option<&T>,
863 right: Option<&T>,
864 context: &str,
865 ) -> Result<Option<T>, SchemaError> {
866 match (left, right) {
867 (Some(l), Some(r)) => {
868 if l == r {
869 Ok(Some(l.clone()))
870 } else {
871 Err(SchemaError::ConfigurationConflict {
872 context: context.to_string(),
873 })
874 }
875 }
876 (Some(l), None) => Ok(Some(l.clone())),
877 (None, Some(r)) => Ok(Some(r.clone())),
878 (None, None) => Ok(None),
879 }
880 }
881
882 fn merge_value_types(
885 default: &ValueTypes,
886 user: &ValueTypes,
887 ) -> Result<ValueTypes, SchemaError> {
888 let float_list =
890 Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
891
892 if let Some(ref fl) = float_list {
894 Self::validate_float_list_value_type(fl)?;
895 }
896
897 Ok(ValueTypes {
898 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
899 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
900 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
901 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
902 float_list,
903 sparse_vector: Self::merge_sparse_vector_type(
904 default.sparse_vector.as_ref(),
905 user.sparse_vector.as_ref(),
906 )?,
907 })
908 }
909
910 fn merge_string_type(
912 default: Option<&StringValueType>,
913 user: Option<&StringValueType>,
914 ) -> Result<Option<StringValueType>, SchemaError> {
915 match (default, user) {
916 (Some(default), Some(user)) => Ok(Some(StringValueType {
917 string_inverted_index: Self::merge_string_inverted_index_type(
918 default.string_inverted_index.as_ref(),
919 user.string_inverted_index.as_ref(),
920 )?,
921 fts_index: Self::merge_fts_index_type(
922 default.fts_index.as_ref(),
923 user.fts_index.as_ref(),
924 )?,
925 })),
926 (Some(default), None) => Ok(Some(default.clone())),
927 (None, Some(user)) => Ok(Some(user.clone())),
928 (None, None) => Ok(None),
929 }
930 }
931
932 fn merge_float_type(
934 default: Option<&FloatValueType>,
935 user: Option<&FloatValueType>,
936 ) -> Result<Option<FloatValueType>, SchemaError> {
937 match (default, user) {
938 (Some(default), Some(user)) => Ok(Some(FloatValueType {
939 float_inverted_index: Self::merge_float_inverted_index_type(
940 default.float_inverted_index.as_ref(),
941 user.float_inverted_index.as_ref(),
942 )?,
943 })),
944 (Some(default), None) => Ok(Some(default.clone())),
945 (None, Some(user)) => Ok(Some(user.clone())),
946 (None, None) => Ok(None),
947 }
948 }
949
950 fn merge_int_type(
952 default: Option<&IntValueType>,
953 user: Option<&IntValueType>,
954 ) -> Result<Option<IntValueType>, SchemaError> {
955 match (default, user) {
956 (Some(default), Some(user)) => Ok(Some(IntValueType {
957 int_inverted_index: Self::merge_int_inverted_index_type(
958 default.int_inverted_index.as_ref(),
959 user.int_inverted_index.as_ref(),
960 )?,
961 })),
962 (Some(default), None) => Ok(Some(default.clone())),
963 (None, Some(user)) => Ok(Some(user.clone())),
964 (None, None) => Ok(None),
965 }
966 }
967
968 fn merge_bool_type(
970 default: Option<&BoolValueType>,
971 user: Option<&BoolValueType>,
972 ) -> Result<Option<BoolValueType>, SchemaError> {
973 match (default, user) {
974 (Some(default), Some(user)) => Ok(Some(BoolValueType {
975 bool_inverted_index: Self::merge_bool_inverted_index_type(
976 default.bool_inverted_index.as_ref(),
977 user.bool_inverted_index.as_ref(),
978 )?,
979 })),
980 (Some(default), None) => Ok(Some(default.clone())),
981 (None, Some(user)) => Ok(Some(user.clone())),
982 (None, None) => Ok(None),
983 }
984 }
985
986 fn merge_float_list_type(
988 default: Option<&FloatListValueType>,
989 user: Option<&FloatListValueType>,
990 ) -> Option<FloatListValueType> {
991 match (default, user) {
992 (Some(default), Some(user)) => Some(FloatListValueType {
993 vector_index: Self::merge_vector_index_type(
994 default.vector_index.as_ref(),
995 user.vector_index.as_ref(),
996 ),
997 }),
998 (Some(default), None) => Some(default.clone()),
999 (None, Some(user)) => Some(user.clone()),
1000 (None, None) => None,
1001 }
1002 }
1003
1004 fn merge_sparse_vector_type(
1006 default: Option<&SparseVectorValueType>,
1007 user: Option<&SparseVectorValueType>,
1008 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1009 match (default, user) {
1010 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1011 sparse_vector_index: Self::merge_sparse_vector_index_type(
1012 default.sparse_vector_index.as_ref(),
1013 user.sparse_vector_index.as_ref(),
1014 )?,
1015 })),
1016 (Some(default), None) => Ok(Some(default.clone())),
1017 (None, Some(user)) => Ok(Some(user.clone())),
1018 (None, None) => Ok(None),
1019 }
1020 }
1021
1022 fn merge_string_inverted_index_type(
1024 default: Option<&StringInvertedIndexType>,
1025 user: Option<&StringInvertedIndexType>,
1026 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1027 match (default, user) {
1028 (Some(_default), Some(user)) => {
1029 Ok(Some(StringInvertedIndexType {
1030 enabled: user.enabled, config: user.config.clone(), }))
1033 }
1034 (Some(default), None) => Ok(Some(default.clone())),
1035 (None, Some(user)) => Ok(Some(user.clone())),
1036 (None, None) => Ok(None),
1037 }
1038 }
1039
1040 fn merge_fts_index_type(
1041 default: Option<&FtsIndexType>,
1042 user: Option<&FtsIndexType>,
1043 ) -> Result<Option<FtsIndexType>, SchemaError> {
1044 match (default, user) {
1045 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1046 enabled: user.enabled,
1047 config: user.config.clone(),
1048 })),
1049 (Some(default), None) => Ok(Some(default.clone())),
1050 (None, Some(user)) => Ok(Some(user.clone())),
1051 (None, None) => Ok(None),
1052 }
1053 }
1054
1055 fn merge_float_inverted_index_type(
1056 default: Option<&FloatInvertedIndexType>,
1057 user: Option<&FloatInvertedIndexType>,
1058 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1059 match (default, user) {
1060 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1061 enabled: user.enabled,
1062 config: user.config.clone(),
1063 })),
1064 (Some(default), None) => Ok(Some(default.clone())),
1065 (None, Some(user)) => Ok(Some(user.clone())),
1066 (None, None) => Ok(None),
1067 }
1068 }
1069
1070 fn merge_int_inverted_index_type(
1071 default: Option<&IntInvertedIndexType>,
1072 user: Option<&IntInvertedIndexType>,
1073 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1074 match (default, user) {
1075 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1076 enabled: user.enabled,
1077 config: user.config.clone(),
1078 })),
1079 (Some(default), None) => Ok(Some(default.clone())),
1080 (None, Some(user)) => Ok(Some(user.clone())),
1081 (None, None) => Ok(None),
1082 }
1083 }
1084
1085 fn merge_bool_inverted_index_type(
1086 default: Option<&BoolInvertedIndexType>,
1087 user: Option<&BoolInvertedIndexType>,
1088 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1089 match (default, user) {
1090 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1091 enabled: user.enabled,
1092 config: user.config.clone(),
1093 })),
1094 (Some(default), None) => Ok(Some(default.clone())),
1095 (None, Some(user)) => Ok(Some(user.clone())),
1096 (None, None) => Ok(None),
1097 }
1098 }
1099
1100 fn merge_vector_index_type(
1101 default: Option<&VectorIndexType>,
1102 user: Option<&VectorIndexType>,
1103 ) -> Option<VectorIndexType> {
1104 match (default, user) {
1105 (Some(default), Some(user)) => Some(VectorIndexType {
1106 enabled: user.enabled,
1107 config: Self::merge_vector_index_config(&default.config, &user.config),
1108 }),
1109 (Some(default), None) => Some(default.clone()),
1110 (None, Some(user)) => Some(user.clone()),
1111 (None, None) => None,
1112 }
1113 }
1114
1115 fn merge_sparse_vector_index_type(
1116 default: Option<&SparseVectorIndexType>,
1117 user: Option<&SparseVectorIndexType>,
1118 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1119 match (default, user) {
1120 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1121 enabled: user.enabled,
1122 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1123 })),
1124 (Some(default), None) => Ok(Some(default.clone())),
1125 (None, Some(user)) => Ok(Some(user.clone())),
1126 (None, None) => Ok(None),
1127 }
1128 }
1129
1130 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1133 if let Some(vector_index) = &float_list.vector_index {
1134 if let Some(hnsw) = &vector_index.config.hnsw {
1135 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1136 }
1137 if let Some(spann) = &vector_index.config.spann {
1138 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1139 }
1140 }
1141 Ok(())
1142 }
1143
1144 fn merge_vector_index_config(
1146 default: &VectorIndexConfig,
1147 user: &VectorIndexConfig,
1148 ) -> VectorIndexConfig {
1149 VectorIndexConfig {
1150 space: user.space.clone().or(default.space.clone()),
1151 embedding_function: user
1152 .embedding_function
1153 .clone()
1154 .or(default.embedding_function.clone()),
1155 source_key: user.source_key.clone().or(default.source_key.clone()),
1156 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1157 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1158 }
1159 }
1160
1161 fn merge_sparse_vector_index_config(
1163 default: &SparseVectorIndexConfig,
1164 user: &SparseVectorIndexConfig,
1165 ) -> SparseVectorIndexConfig {
1166 SparseVectorIndexConfig {
1167 embedding_function: user
1168 .embedding_function
1169 .clone()
1170 .or(default.embedding_function.clone()),
1171 source_key: user.source_key.clone().or(default.source_key.clone()),
1172 bm25: user.bm25.or(default.bm25),
1173 }
1174 }
1175
1176 fn merge_hnsw_configs(
1178 default_hnsw: Option<&HnswIndexConfig>,
1179 user_hnsw: Option<&HnswIndexConfig>,
1180 ) -> Option<HnswIndexConfig> {
1181 match (default_hnsw, user_hnsw) {
1182 (Some(default), Some(user)) => Some(HnswIndexConfig {
1183 ef_construction: user.ef_construction.or(default.ef_construction),
1184 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1185 ef_search: user.ef_search.or(default.ef_search),
1186 num_threads: user.num_threads.or(default.num_threads),
1187 batch_size: user.batch_size.or(default.batch_size),
1188 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1189 resize_factor: user.resize_factor.or(default.resize_factor),
1190 }),
1191 (Some(default), None) => Some(default.clone()),
1192 (None, Some(user)) => Some(user.clone()),
1193 (None, None) => None,
1194 }
1195 }
1196
1197 fn merge_spann_configs(
1199 default_spann: Option<&SpannIndexConfig>,
1200 user_spann: Option<&SpannIndexConfig>,
1201 ) -> Option<SpannIndexConfig> {
1202 match (default_spann, user_spann) {
1203 (Some(default), Some(user)) => Some(SpannIndexConfig {
1204 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1205 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1206 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1207 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1208 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1209 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1210 split_threshold: user.split_threshold.or(default.split_threshold),
1211 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1212 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1213 reassign_neighbor_count: user
1214 .reassign_neighbor_count
1215 .or(default.reassign_neighbor_count),
1216 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1217 num_centers_to_merge_to: user
1218 .num_centers_to_merge_to
1219 .or(default.num_centers_to_merge_to),
1220 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1221 ef_construction: user.ef_construction.or(default.ef_construction),
1222 ef_search: user.ef_search.or(default.ef_search),
1223 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1224 }),
1225 (Some(default), None) => Some(default.clone()),
1226 (None, Some(user)) => Some(user.clone()),
1227 (None, None) => None,
1228 }
1229 }
1230
1231 pub fn reconcile_with_collection_config(
1238 schema: Schema,
1239 collection_config: InternalCollectionConfiguration,
1240 ) -> Result<Schema, SchemaError> {
1241 if collection_config.is_default() {
1243 return Ok(schema);
1245 }
1246
1247 if !Self::is_schema_default(&schema) {
1249 return Err(SchemaError::ConfigAndSchemaConflict);
1251 }
1252
1253 Self::convert_collection_config_to_schema(collection_config)
1255 }
1256
1257 pub fn reconcile_schema_and_config(
1258 schema: Option<Schema>,
1259 configuration: Option<InternalCollectionConfiguration>,
1260 ) -> Result<Schema, SchemaError> {
1261 let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1262 if let Some(config) = configuration {
1263 Self::reconcile_with_collection_config(reconciled_schema, config)
1264 } else {
1265 Ok(reconciled_schema)
1266 }
1267 }
1268
1269 pub fn default_with_embedding_function(
1270 embedding_function: EmbeddingFunctionConfiguration,
1271 ) -> Schema {
1272 let mut schema = Schema::new_default(KnnIndex::Spann);
1273 if let Some(float_list) = &mut schema.defaults.float_list {
1274 if let Some(vector_index) = &mut float_list.vector_index {
1275 vector_index.config.embedding_function = Some(embedding_function.clone());
1276 }
1277 }
1278 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1279 if let Some(float_list) = &mut embedding_types.float_list {
1280 if let Some(vector_index) = &mut float_list.vector_index {
1281 vector_index.config.embedding_function = Some(embedding_function);
1282 }
1283 }
1284 }
1285 schema
1286 }
1287
1288 fn is_schema_default(schema: &Schema) -> bool {
1290 let default_hnsw = Schema::new_default(KnnIndex::Hnsw);
1292 let default_spann = Schema::new_default(KnnIndex::Spann);
1293
1294 schema == &default_hnsw || schema == &default_spann
1295 }
1296
1297 fn convert_collection_config_to_schema(
1299 collection_config: InternalCollectionConfiguration,
1300 ) -> Result<Schema, SchemaError> {
1301 let mut schema = Schema::new_default(KnnIndex::Spann); let vector_config = match collection_config.vector_index {
1306 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1307 space: Some(hnsw_config.space),
1308 embedding_function: collection_config.embedding_function,
1309 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: Some(HnswIndexConfig {
1311 ef_construction: Some(hnsw_config.ef_construction),
1312 max_neighbors: Some(hnsw_config.max_neighbors),
1313 ef_search: Some(hnsw_config.ef_search),
1314 num_threads: Some(hnsw_config.num_threads),
1315 batch_size: Some(hnsw_config.batch_size),
1316 sync_threshold: Some(hnsw_config.sync_threshold),
1317 resize_factor: Some(hnsw_config.resize_factor),
1318 }),
1319 spann: None,
1320 },
1321 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1322 space: Some(spann_config.space),
1323 embedding_function: collection_config.embedding_function,
1324 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: None,
1326 spann: Some(SpannIndexConfig {
1327 search_nprobe: Some(spann_config.search_nprobe),
1328 search_rng_factor: Some(spann_config.search_rng_factor),
1329 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1330 nreplica_count: Some(spann_config.nreplica_count),
1331 write_rng_factor: Some(spann_config.write_rng_factor),
1332 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1333 split_threshold: Some(spann_config.split_threshold),
1334 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1335 initial_lambda: Some(spann_config.initial_lambda),
1336 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1337 merge_threshold: Some(spann_config.merge_threshold),
1338 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1339 write_nprobe: Some(spann_config.write_nprobe),
1340 ef_construction: Some(spann_config.ef_construction),
1341 ef_search: Some(spann_config.ef_search),
1342 max_neighbors: Some(spann_config.max_neighbors),
1343 }),
1344 },
1345 };
1346
1347 if let Some(float_list) = &mut schema.defaults.float_list {
1350 if let Some(vector_index) = &mut float_list.vector_index {
1351 vector_index.config = vector_config.clone();
1352 }
1353 }
1354
1355 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1358 if let Some(float_list) = &mut embedding_types.float_list {
1359 if let Some(vector_index) = &mut float_list.vector_index {
1360 vector_index.config = vector_config;
1361 }
1362 }
1363 }
1364
1365 Ok(schema)
1366 }
1367
1368 pub fn is_metadata_type_index_enabled(
1370 &self,
1371 key: &str,
1372 value_type: MetadataValueType,
1373 ) -> Result<bool, SchemaError> {
1374 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1375
1376 match value_type {
1377 MetadataValueType::Bool => match &v_type.boolean {
1378 Some(bool_type) => match &bool_type.bool_inverted_index {
1379 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1380 None => Err(SchemaError::MissingIndexConfiguration {
1381 key: key.to_string(),
1382 value_type: "bool".to_string(),
1383 }),
1384 },
1385 None => match &self.defaults.boolean {
1386 Some(bool_type) => match &bool_type.bool_inverted_index {
1387 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1388 None => Err(SchemaError::MissingIndexConfiguration {
1389 key: key.to_string(),
1390 value_type: "bool".to_string(),
1391 }),
1392 },
1393 None => Err(SchemaError::MissingIndexConfiguration {
1394 key: key.to_string(),
1395 value_type: "bool".to_string(),
1396 }),
1397 },
1398 },
1399 MetadataValueType::Int => match &v_type.int {
1400 Some(int_type) => match &int_type.int_inverted_index {
1401 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1402 None => Err(SchemaError::MissingIndexConfiguration {
1403 key: key.to_string(),
1404 value_type: "int".to_string(),
1405 }),
1406 },
1407 None => match &self.defaults.int {
1408 Some(int_type) => match &int_type.int_inverted_index {
1409 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1410 None => Err(SchemaError::MissingIndexConfiguration {
1411 key: key.to_string(),
1412 value_type: "int".to_string(),
1413 }),
1414 },
1415 None => Err(SchemaError::MissingIndexConfiguration {
1416 key: key.to_string(),
1417 value_type: "int".to_string(),
1418 }),
1419 },
1420 },
1421 MetadataValueType::Float => match &v_type.float {
1422 Some(float_type) => match &float_type.float_inverted_index {
1423 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1424 None => Err(SchemaError::MissingIndexConfiguration {
1425 key: key.to_string(),
1426 value_type: "float".to_string(),
1427 }),
1428 },
1429 None => match &self.defaults.float {
1430 Some(float_type) => match &float_type.float_inverted_index {
1431 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1432 None => Err(SchemaError::MissingIndexConfiguration {
1433 key: key.to_string(),
1434 value_type: "float".to_string(),
1435 }),
1436 },
1437 None => Err(SchemaError::MissingIndexConfiguration {
1438 key: key.to_string(),
1439 value_type: "float".to_string(),
1440 }),
1441 },
1442 },
1443 MetadataValueType::Str => match &v_type.string {
1444 Some(string_type) => match &string_type.string_inverted_index {
1445 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1446 None => Err(SchemaError::MissingIndexConfiguration {
1447 key: key.to_string(),
1448 value_type: "string".to_string(),
1449 }),
1450 },
1451 None => match &self.defaults.string {
1452 Some(string_type) => match &string_type.string_inverted_index {
1453 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1454 None => Err(SchemaError::MissingIndexConfiguration {
1455 key: key.to_string(),
1456 value_type: "string".to_string(),
1457 }),
1458 },
1459 None => Err(SchemaError::MissingIndexConfiguration {
1460 key: key.to_string(),
1461 value_type: "string".to_string(),
1462 }),
1463 },
1464 },
1465 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1466 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1467 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1468 None => Err(SchemaError::MissingIndexConfiguration {
1469 key: key.to_string(),
1470 value_type: "sparse_vector".to_string(),
1471 }),
1472 },
1473 None => match &self.defaults.sparse_vector {
1474 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1475 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1476 None => Err(SchemaError::MissingIndexConfiguration {
1477 key: key.to_string(),
1478 value_type: "sparse_vector".to_string(),
1479 }),
1480 },
1481 None => Err(SchemaError::MissingIndexConfiguration {
1482 key: key.to_string(),
1483 value_type: "sparse_vector".to_string(),
1484 }),
1485 },
1486 },
1487 }
1488 }
1489
1490 pub fn is_metadata_where_indexing_enabled(
1491 &self,
1492 where_clause: &Where,
1493 ) -> Result<(), FilterValidationError> {
1494 match where_clause {
1495 Where::Composite(composite) => {
1496 for child in &composite.children {
1497 self.is_metadata_where_indexing_enabled(child)?;
1498 }
1499 Ok(())
1500 }
1501 Where::Document(_) => Ok(()),
1502 Where::Metadata(expression) => {
1503 let value_type = match &expression.comparison {
1504 MetadataComparison::Primitive(_, value) => value.value_type(),
1505 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1506 };
1507 let is_enabled = self
1508 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1509 .map_err(FilterValidationError::Schema)?;
1510 if !is_enabled {
1511 return Err(FilterValidationError::IndexingDisabled {
1512 key: expression.key.clone(),
1513 value_type,
1514 });
1515 }
1516 Ok(())
1517 }
1518 }
1519 }
1520
1521 pub fn is_knn_key_indexing_enabled(
1522 &self,
1523 key: &str,
1524 query: &QueryVector,
1525 ) -> Result<(), FilterValidationError> {
1526 match query {
1527 QueryVector::Sparse(_) => {
1528 let is_enabled = self
1529 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1530 .map_err(FilterValidationError::Schema)?;
1531 if !is_enabled {
1532 return Err(FilterValidationError::IndexingDisabled {
1533 key: key.to_string(),
1534 value_type: MetadataValueType::SparseVector,
1535 });
1536 }
1537 Ok(())
1538 }
1539 QueryVector::Dense(_) => {
1540 Ok(())
1543 }
1544 }
1545 }
1546
1547 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1548 let value_types = self.keys.entry(key.to_string()).or_default();
1549 match value_type {
1550 MetadataValueType::Bool => {
1551 if value_types.boolean.is_none() {
1552 value_types.boolean = self.defaults.boolean.clone();
1553 return true;
1554 }
1555 }
1556 MetadataValueType::Int => {
1557 if value_types.int.is_none() {
1558 value_types.int = self.defaults.int.clone();
1559 return true;
1560 }
1561 }
1562 MetadataValueType::Float => {
1563 if value_types.float.is_none() {
1564 value_types.float = self.defaults.float.clone();
1565 return true;
1566 }
1567 }
1568 MetadataValueType::Str => {
1569 if value_types.string.is_none() {
1570 value_types.string = self.defaults.string.clone();
1571 return true;
1572 }
1573 }
1574 MetadataValueType::SparseVector => {
1575 if value_types.sparse_vector.is_none() {
1576 value_types.sparse_vector = self.defaults.sparse_vector.clone();
1577 return true;
1578 }
1579 }
1580 }
1581 false
1582 }
1583
1584 pub fn create_index(
1624 mut self,
1625 key: Option<&str>,
1626 config: IndexConfig,
1627 ) -> Result<Self, SchemaBuilderError> {
1628 match (&key, &config) {
1630 (None, IndexConfig::Vector(cfg)) => {
1631 self._set_vector_index_config_builder(cfg.clone());
1632 return Ok(self);
1633 }
1634 (None, IndexConfig::Fts(cfg)) => {
1635 self._set_fts_index_config_builder(cfg.clone());
1636 return Ok(self);
1637 }
1638 (Some(k), IndexConfig::Vector(_)) => {
1639 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
1640 }
1641 (Some(k), IndexConfig::Fts(_)) => {
1642 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
1643 }
1644 _ => {}
1645 }
1646
1647 if let Some(k) = key {
1649 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1650 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1651 key: k.to_string(),
1652 });
1653 }
1654 }
1655
1656 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
1658 return Err(SchemaBuilderError::SparseVectorRequiresKey);
1659 }
1660
1661 match key {
1663 Some(k) => self._set_index_for_key_builder(k, config, true)?,
1664 None => self._set_index_in_defaults_builder(config, true)?,
1665 }
1666
1667 Ok(self)
1668 }
1669
1670 pub fn delete_index(
1698 mut self,
1699 key: Option<&str>,
1700 config: IndexConfig,
1701 ) -> Result<Self, SchemaBuilderError> {
1702 if let Some(k) = key {
1704 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1705 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1706 key: k.to_string(),
1707 });
1708 }
1709 }
1710
1711 match &config {
1713 IndexConfig::Vector(_) => {
1714 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
1715 }
1716 IndexConfig::Fts(_) => {
1717 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
1718 }
1719 IndexConfig::SparseVector(_) => {
1720 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
1721 }
1722 _ => {}
1723 }
1724
1725 match key {
1727 Some(k) => self._set_index_for_key_builder(k, config, false)?,
1728 None => self._set_index_in_defaults_builder(config, false)?,
1729 }
1730
1731 Ok(self)
1732 }
1733
1734 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
1736 if let Some(float_list) = &mut self.defaults.float_list {
1738 if let Some(vector_index) = &mut float_list.vector_index {
1739 vector_index.config = config.clone();
1740 }
1741 }
1742
1743 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
1745 if let Some(float_list) = &mut embedding_types.float_list {
1746 if let Some(vector_index) = &mut float_list.vector_index {
1747 let mut updated_config = config;
1748 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
1750 vector_index.config = updated_config;
1751 }
1752 }
1753 }
1754 }
1755
1756 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
1758 if let Some(string) = &mut self.defaults.string {
1760 if let Some(fts_index) = &mut string.fts_index {
1761 fts_index.config = config.clone();
1762 }
1763 }
1764
1765 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
1767 if let Some(string) = &mut document_types.string {
1768 if let Some(fts_index) = &mut string.fts_index {
1769 fts_index.config = config;
1770 }
1771 }
1772 }
1773 }
1774
1775 fn _set_index_for_key_builder(
1777 &mut self,
1778 key: &str,
1779 config: IndexConfig,
1780 enabled: bool,
1781 ) -> Result<(), SchemaBuilderError> {
1782 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
1784 let existing_key = self
1786 .keys
1787 .iter()
1788 .find(|(k, v)| {
1789 k.as_str() != key
1790 && v.sparse_vector
1791 .as_ref()
1792 .and_then(|sv| sv.sparse_vector_index.as_ref())
1793 .map(|idx| idx.enabled)
1794 .unwrap_or(false)
1795 })
1796 .map(|(k, _)| k.clone());
1797
1798 if let Some(existing_key) = existing_key {
1799 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
1800 }
1801 }
1802
1803 let value_types = self.keys.entry(key.to_string()).or_default();
1805
1806 match config {
1808 IndexConfig::Vector(_) => {
1809 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
1810 key: key.to_string(),
1811 });
1812 }
1813 IndexConfig::Fts(_) => {
1814 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
1815 key: key.to_string(),
1816 });
1817 }
1818 IndexConfig::SparseVector(cfg) => {
1819 value_types.sparse_vector = Some(SparseVectorValueType {
1820 sparse_vector_index: Some(SparseVectorIndexType {
1821 enabled,
1822 config: cfg,
1823 }),
1824 });
1825 }
1826 IndexConfig::StringInverted(cfg) => {
1827 if value_types.string.is_none() {
1828 value_types.string = Some(StringValueType {
1829 fts_index: None,
1830 string_inverted_index: None,
1831 });
1832 }
1833 if let Some(string) = &mut value_types.string {
1834 string.string_inverted_index = Some(StringInvertedIndexType {
1835 enabled,
1836 config: cfg,
1837 });
1838 }
1839 }
1840 IndexConfig::IntInverted(cfg) => {
1841 value_types.int = Some(IntValueType {
1842 int_inverted_index: Some(IntInvertedIndexType {
1843 enabled,
1844 config: cfg,
1845 }),
1846 });
1847 }
1848 IndexConfig::FloatInverted(cfg) => {
1849 value_types.float = Some(FloatValueType {
1850 float_inverted_index: Some(FloatInvertedIndexType {
1851 enabled,
1852 config: cfg,
1853 }),
1854 });
1855 }
1856 IndexConfig::BoolInverted(cfg) => {
1857 value_types.boolean = Some(BoolValueType {
1858 bool_inverted_index: Some(BoolInvertedIndexType {
1859 enabled,
1860 config: cfg,
1861 }),
1862 });
1863 }
1864 }
1865
1866 Ok(())
1867 }
1868
1869 fn _set_index_in_defaults_builder(
1871 &mut self,
1872 config: IndexConfig,
1873 enabled: bool,
1874 ) -> Result<(), SchemaBuilderError> {
1875 match config {
1876 IndexConfig::Vector(_) => {
1877 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
1878 key: "defaults".to_string(),
1879 });
1880 }
1881 IndexConfig::Fts(_) => {
1882 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
1883 key: "defaults".to_string(),
1884 });
1885 }
1886 IndexConfig::SparseVector(cfg) => {
1887 self.defaults.sparse_vector = Some(SparseVectorValueType {
1888 sparse_vector_index: Some(SparseVectorIndexType {
1889 enabled,
1890 config: cfg,
1891 }),
1892 });
1893 }
1894 IndexConfig::StringInverted(cfg) => {
1895 if self.defaults.string.is_none() {
1896 self.defaults.string = Some(StringValueType {
1897 fts_index: None,
1898 string_inverted_index: None,
1899 });
1900 }
1901 if let Some(string) = &mut self.defaults.string {
1902 string.string_inverted_index = Some(StringInvertedIndexType {
1903 enabled,
1904 config: cfg,
1905 });
1906 }
1907 }
1908 IndexConfig::IntInverted(cfg) => {
1909 self.defaults.int = Some(IntValueType {
1910 int_inverted_index: Some(IntInvertedIndexType {
1911 enabled,
1912 config: cfg,
1913 }),
1914 });
1915 }
1916 IndexConfig::FloatInverted(cfg) => {
1917 self.defaults.float = Some(FloatValueType {
1918 float_inverted_index: Some(FloatInvertedIndexType {
1919 enabled,
1920 config: cfg,
1921 }),
1922 });
1923 }
1924 IndexConfig::BoolInverted(cfg) => {
1925 self.defaults.boolean = Some(BoolValueType {
1926 bool_inverted_index: Some(BoolInvertedIndexType {
1927 enabled,
1928 config: cfg,
1929 }),
1930 });
1931 }
1932 }
1933
1934 Ok(())
1935 }
1936}
1937
1938#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1943#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1944#[serde(deny_unknown_fields)]
1945pub struct VectorIndexConfig {
1946 #[serde(skip_serializing_if = "Option::is_none")]
1948 pub space: Option<Space>,
1949 #[serde(skip_serializing_if = "Option::is_none")]
1951 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1952 #[serde(skip_serializing_if = "Option::is_none")]
1954 pub source_key: Option<String>,
1955 #[serde(skip_serializing_if = "Option::is_none")]
1957 pub hnsw: Option<HnswIndexConfig>,
1958 #[serde(skip_serializing_if = "Option::is_none")]
1960 pub spann: Option<SpannIndexConfig>,
1961}
1962
1963#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1965#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1966#[serde(deny_unknown_fields)]
1967pub struct HnswIndexConfig {
1968 #[serde(skip_serializing_if = "Option::is_none")]
1969 pub ef_construction: Option<usize>,
1970 #[serde(skip_serializing_if = "Option::is_none")]
1971 pub max_neighbors: Option<usize>,
1972 #[serde(skip_serializing_if = "Option::is_none")]
1973 pub ef_search: Option<usize>,
1974 #[serde(skip_serializing_if = "Option::is_none")]
1975 pub num_threads: Option<usize>,
1976 #[serde(skip_serializing_if = "Option::is_none")]
1977 #[validate(range(min = 2))]
1978 pub batch_size: Option<usize>,
1979 #[serde(skip_serializing_if = "Option::is_none")]
1980 #[validate(range(min = 2))]
1981 pub sync_threshold: Option<usize>,
1982 #[serde(skip_serializing_if = "Option::is_none")]
1983 pub resize_factor: Option<f64>,
1984}
1985
1986#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1988#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1989#[serde(deny_unknown_fields)]
1990pub struct SpannIndexConfig {
1991 #[serde(skip_serializing_if = "Option::is_none")]
1992 #[validate(range(max = 128))]
1993 pub search_nprobe: Option<u32>,
1994 #[serde(skip_serializing_if = "Option::is_none")]
1995 #[validate(range(min = 1.0, max = 1.0))]
1996 pub search_rng_factor: Option<f32>,
1997 #[serde(skip_serializing_if = "Option::is_none")]
1998 #[validate(range(min = 5.0, max = 10.0))]
1999 pub search_rng_epsilon: Option<f32>,
2000 #[serde(skip_serializing_if = "Option::is_none")]
2001 #[validate(range(max = 8))]
2002 pub nreplica_count: Option<u32>,
2003 #[serde(skip_serializing_if = "Option::is_none")]
2004 #[validate(range(min = 1.0, max = 1.0))]
2005 pub write_rng_factor: Option<f32>,
2006 #[serde(skip_serializing_if = "Option::is_none")]
2007 #[validate(range(min = 5.0, max = 10.0))]
2008 pub write_rng_epsilon: Option<f32>,
2009 #[serde(skip_serializing_if = "Option::is_none")]
2010 #[validate(range(min = 50, max = 200))]
2011 pub split_threshold: Option<u32>,
2012 #[serde(skip_serializing_if = "Option::is_none")]
2013 #[validate(range(max = 1000))]
2014 pub num_samples_kmeans: Option<usize>,
2015 #[serde(skip_serializing_if = "Option::is_none")]
2016 #[validate(range(min = 100.0, max = 100.0))]
2017 pub initial_lambda: Option<f32>,
2018 #[serde(skip_serializing_if = "Option::is_none")]
2019 #[validate(range(max = 64))]
2020 pub reassign_neighbor_count: Option<u32>,
2021 #[serde(skip_serializing_if = "Option::is_none")]
2022 #[validate(range(min = 25, max = 100))]
2023 pub merge_threshold: Option<u32>,
2024 #[serde(skip_serializing_if = "Option::is_none")]
2025 #[validate(range(max = 8))]
2026 pub num_centers_to_merge_to: Option<u32>,
2027 #[serde(skip_serializing_if = "Option::is_none")]
2028 #[validate(range(max = 64))]
2029 pub write_nprobe: Option<u32>,
2030 #[serde(skip_serializing_if = "Option::is_none")]
2031 #[validate(range(max = 200))]
2032 pub ef_construction: Option<usize>,
2033 #[serde(skip_serializing_if = "Option::is_none")]
2034 #[validate(range(max = 200))]
2035 pub ef_search: Option<usize>,
2036 #[serde(skip_serializing_if = "Option::is_none")]
2037 #[validate(range(max = 64))]
2038 pub max_neighbors: Option<usize>,
2039}
2040
2041#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2042#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2043#[serde(deny_unknown_fields)]
2044pub struct SparseVectorIndexConfig {
2045 #[serde(skip_serializing_if = "Option::is_none")]
2047 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2048 #[serde(skip_serializing_if = "Option::is_none")]
2050 pub source_key: Option<String>,
2051 #[serde(skip_serializing_if = "Option::is_none")]
2053 pub bm25: Option<bool>,
2054}
2055
2056#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2057#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2058#[serde(deny_unknown_fields)]
2059pub struct FtsIndexConfig {
2060 }
2062
2063#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2064#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2065#[serde(deny_unknown_fields)]
2066pub struct StringInvertedIndexConfig {
2067 }
2069
2070#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2071#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2072#[serde(deny_unknown_fields)]
2073pub struct IntInvertedIndexConfig {
2074 }
2076
2077#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2078#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2079#[serde(deny_unknown_fields)]
2080pub struct FloatInvertedIndexConfig {
2081 }
2083
2084#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2085#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2086#[serde(deny_unknown_fields)]
2087pub struct BoolInvertedIndexConfig {
2088 }
2090
2091#[derive(Clone, Debug)]
2097pub enum IndexConfig {
2098 Vector(VectorIndexConfig),
2099 SparseVector(SparseVectorIndexConfig),
2100 Fts(FtsIndexConfig),
2101 StringInverted(StringInvertedIndexConfig),
2102 IntInverted(IntInvertedIndexConfig),
2103 FloatInverted(FloatInvertedIndexConfig),
2104 BoolInverted(BoolInvertedIndexConfig),
2105}
2106
2107impl From<VectorIndexConfig> for IndexConfig {
2109 fn from(config: VectorIndexConfig) -> Self {
2110 IndexConfig::Vector(config)
2111 }
2112}
2113
2114impl From<SparseVectorIndexConfig> for IndexConfig {
2115 fn from(config: SparseVectorIndexConfig) -> Self {
2116 IndexConfig::SparseVector(config)
2117 }
2118}
2119
2120impl From<FtsIndexConfig> for IndexConfig {
2121 fn from(config: FtsIndexConfig) -> Self {
2122 IndexConfig::Fts(config)
2123 }
2124}
2125
2126impl From<StringInvertedIndexConfig> for IndexConfig {
2127 fn from(config: StringInvertedIndexConfig) -> Self {
2128 IndexConfig::StringInverted(config)
2129 }
2130}
2131
2132impl From<IntInvertedIndexConfig> for IndexConfig {
2133 fn from(config: IntInvertedIndexConfig) -> Self {
2134 IndexConfig::IntInverted(config)
2135 }
2136}
2137
2138impl From<FloatInvertedIndexConfig> for IndexConfig {
2139 fn from(config: FloatInvertedIndexConfig) -> Self {
2140 IndexConfig::FloatInverted(config)
2141 }
2142}
2143
2144impl From<BoolInvertedIndexConfig> for IndexConfig {
2145 fn from(config: BoolInvertedIndexConfig) -> Self {
2146 IndexConfig::BoolInverted(config)
2147 }
2148}
2149
2150#[cfg(test)]
2151mod tests {
2152 use super::*;
2153 use crate::hnsw_configuration::Space;
2154 use crate::metadata::SparseVector;
2155 use crate::{
2156 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2157 };
2158 use serde_json::json;
2159
2160 #[test]
2161 fn test_reconcile_with_defaults_none_user_schema() {
2162 let result = Schema::reconcile_with_defaults(None).unwrap();
2164 let expected = Schema::new_default(KnnIndex::Spann);
2165 assert_eq!(result, expected);
2166 }
2167
2168 #[test]
2169 fn test_reconcile_with_defaults_empty_user_schema() {
2170 let user_schema = Schema {
2172 defaults: ValueTypes::default(),
2173 keys: HashMap::new(),
2174 };
2175
2176 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2177 let expected = Schema::new_default(KnnIndex::Spann);
2178 assert_eq!(result, expected);
2179 }
2180
2181 #[test]
2182 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2183 let mut user_schema = Schema {
2185 defaults: ValueTypes::default(),
2186 keys: HashMap::new(),
2187 };
2188
2189 user_schema.defaults.string = Some(StringValueType {
2190 string_inverted_index: Some(StringInvertedIndexType {
2191 enabled: false, config: StringInvertedIndexConfig {},
2193 }),
2194 fts_index: None,
2195 });
2196
2197 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2198
2199 assert!(
2201 !result
2202 .defaults
2203 .string
2204 .as_ref()
2205 .unwrap()
2206 .string_inverted_index
2207 .as_ref()
2208 .unwrap()
2209 .enabled
2210 );
2211 assert!(result.defaults.float.is_some());
2213 assert!(result.defaults.int.is_some());
2214 }
2215
2216 #[test]
2217 fn test_reconcile_with_defaults_user_overrides_vector_config() {
2218 let mut user_schema = Schema {
2220 defaults: ValueTypes::default(),
2221 keys: HashMap::new(),
2222 };
2223
2224 user_schema.defaults.float_list = Some(FloatListValueType {
2225 vector_index: Some(VectorIndexType {
2226 enabled: true, config: VectorIndexConfig {
2228 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
2232 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
2236 batch_size: None,
2237 sync_threshold: None,
2238 resize_factor: None,
2239 }),
2240 spann: None,
2241 },
2242 }),
2243 });
2244
2245 let result = {
2247 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2248 let merged_defaults =
2249 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2250 let mut merged_keys = default_schema.keys.clone();
2251 for (key, user_value_types) in user_schema.keys {
2252 if let Some(default_value_types) = merged_keys.get(&key) {
2253 let merged_value_types =
2254 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2255 merged_keys.insert(key, merged_value_types);
2256 } else {
2257 merged_keys.insert(key, user_value_types);
2258 }
2259 }
2260 Schema {
2261 defaults: merged_defaults,
2262 keys: merged_keys,
2263 }
2264 };
2265
2266 let vector_config = &result
2267 .defaults
2268 .float_list
2269 .as_ref()
2270 .unwrap()
2271 .vector_index
2272 .as_ref()
2273 .unwrap()
2274 .config;
2275
2276 assert_eq!(vector_config.space, Some(Space::L2));
2278 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2279 assert_eq!(
2280 vector_config.hnsw.as_ref().unwrap().ef_construction,
2281 Some(500)
2282 );
2283
2284 assert_eq!(
2286 vector_config.embedding_function,
2287 Some(EmbeddingFunctionConfiguration::Legacy)
2288 );
2289 assert_eq!(
2291 vector_config.hnsw.as_ref().unwrap().max_neighbors,
2292 Some(default_m())
2293 );
2294 }
2295
2296 #[test]
2297 fn test_reconcile_with_defaults_keys() {
2298 let mut user_schema = Schema {
2300 defaults: ValueTypes::default(),
2301 keys: HashMap::new(),
2302 };
2303
2304 let custom_key_types = ValueTypes {
2306 string: Some(StringValueType {
2307 fts_index: Some(FtsIndexType {
2308 enabled: true,
2309 config: FtsIndexConfig {},
2310 }),
2311 string_inverted_index: Some(StringInvertedIndexType {
2312 enabled: false,
2313 config: StringInvertedIndexConfig {},
2314 }),
2315 }),
2316 ..Default::default()
2317 };
2318 user_schema
2319 .keys
2320 .insert("custom_key".to_string(), custom_key_types);
2321
2322 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2323
2324 assert!(result.keys.contains_key(EMBEDDING_KEY));
2326 assert!(result.keys.contains_key(DOCUMENT_KEY));
2327
2328 assert!(result.keys.contains_key("custom_key"));
2330 let custom_override = result.keys.get("custom_key").unwrap();
2331 assert!(
2332 custom_override
2333 .string
2334 .as_ref()
2335 .unwrap()
2336 .fts_index
2337 .as_ref()
2338 .unwrap()
2339 .enabled
2340 );
2341 }
2342
2343 #[test]
2344 fn test_reconcile_with_defaults_override_existing_key() {
2345 let mut user_schema = Schema {
2347 defaults: ValueTypes::default(),
2348 keys: HashMap::new(),
2349 };
2350
2351 let embedding_override = ValueTypes {
2353 float_list: Some(FloatListValueType {
2354 vector_index: Some(VectorIndexType {
2355 enabled: false, config: VectorIndexConfig {
2357 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2359 source_key: Some("custom_embedding_key".to_string()),
2360 hnsw: None,
2361 spann: None,
2362 },
2363 }),
2364 }),
2365 ..Default::default()
2366 };
2367 user_schema
2368 .keys
2369 .insert(EMBEDDING_KEY.to_string(), embedding_override);
2370
2371 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
2372
2373 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
2374 let vector_config = &embedding_config
2375 .float_list
2376 .as_ref()
2377 .unwrap()
2378 .vector_index
2379 .as_ref()
2380 .unwrap();
2381
2382 assert!(!vector_config.enabled);
2384 assert_eq!(vector_config.config.space, Some(Space::Ip));
2385 assert_eq!(
2386 vector_config.config.source_key,
2387 Some("custom_embedding_key".to_string())
2388 );
2389 }
2390
2391 #[test]
2392 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
2393 let collection_config = InternalCollectionConfiguration {
2394 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2395 space: Space::Cosine,
2396 ef_construction: 128,
2397 ef_search: 96,
2398 max_neighbors: 42,
2399 num_threads: 8,
2400 resize_factor: 1.5,
2401 sync_threshold: 2_000,
2402 batch_size: 256,
2403 }),
2404 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2405 EmbeddingFunctionNewConfiguration {
2406 name: "custom".to_string(),
2407 config: json!({"alpha": 1}),
2408 },
2409 )),
2410 };
2411
2412 let schema =
2413 Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
2414 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2415
2416 assert_eq!(reconstructed, collection_config);
2417 }
2418
2419 #[test]
2420 fn test_convert_schema_to_collection_config_spann_roundtrip() {
2421 let spann_config = InternalSpannConfiguration {
2422 space: Space::Cosine,
2423 search_nprobe: 11,
2424 search_rng_factor: 1.7,
2425 write_nprobe: 5,
2426 nreplica_count: 3,
2427 split_threshold: 150,
2428 merge_threshold: 80,
2429 ef_construction: 120,
2430 ef_search: 90,
2431 max_neighbors: 40,
2432 ..Default::default()
2433 };
2434
2435 let collection_config = InternalCollectionConfiguration {
2436 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
2437 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2438 EmbeddingFunctionNewConfiguration {
2439 name: "custom".to_string(),
2440 config: json!({"beta": true}),
2441 },
2442 )),
2443 };
2444
2445 let schema =
2446 Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
2447 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2448
2449 assert_eq!(reconstructed, collection_config);
2450 }
2451
2452 #[test]
2453 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
2454 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2455 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
2456 if let Some(float_list) = &mut embedding.float_list {
2457 if let Some(vector_index) = &mut float_list.vector_index {
2458 vector_index.config.spann = Some(SpannIndexConfig {
2459 search_nprobe: Some(1),
2460 search_rng_factor: Some(1.0),
2461 search_rng_epsilon: Some(0.1),
2462 nreplica_count: Some(1),
2463 write_rng_factor: Some(1.0),
2464 write_rng_epsilon: Some(0.1),
2465 split_threshold: Some(100),
2466 num_samples_kmeans: Some(10),
2467 initial_lambda: Some(0.5),
2468 reassign_neighbor_count: Some(10),
2469 merge_threshold: Some(50),
2470 num_centers_to_merge_to: Some(3),
2471 write_nprobe: Some(1),
2472 ef_construction: Some(50),
2473 ef_search: Some(40),
2474 max_neighbors: Some(20),
2475 });
2476 }
2477 }
2478 }
2479
2480 let result = InternalCollectionConfiguration::try_from(&schema);
2481 assert!(result.is_err());
2482 }
2483
2484 #[test]
2485 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
2486 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2487 let before = schema.clone();
2488 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
2489 assert!(!modified);
2490 assert_eq!(schema, before);
2491 }
2492
2493 #[test]
2494 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
2495 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2496 assert!(!schema.keys.contains_key("custom_field"));
2497
2498 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2499
2500 assert!(modified);
2501 let entry = schema
2502 .keys
2503 .get("custom_field")
2504 .expect("expected new key override to be inserted");
2505 assert_eq!(entry.boolean, schema.defaults.boolean);
2506 assert!(entry.string.is_none());
2507 assert!(entry.int.is_none());
2508 assert!(entry.float.is_none());
2509 assert!(entry.float_list.is_none());
2510 assert!(entry.sparse_vector.is_none());
2511 }
2512
2513 #[test]
2514 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
2515 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2516 let initial_len = schema.keys.len();
2517 schema.keys.insert(
2518 "custom_field".to_string(),
2519 ValueTypes {
2520 string: schema.defaults.string.clone(),
2521 ..Default::default()
2522 },
2523 );
2524
2525 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2526
2527 assert!(modified);
2528 assert_eq!(schema.keys.len(), initial_len + 1);
2529 let entry = schema
2530 .keys
2531 .get("custom_field")
2532 .expect("expected key override to exist after ensure call");
2533 assert!(entry.string.is_some());
2534 assert_eq!(entry.boolean, schema.defaults.boolean);
2535 }
2536
2537 #[test]
2538 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
2539 let schema = Schema::new_default(KnnIndex::Spann);
2540 let result = schema.is_knn_key_indexing_enabled(
2541 "custom_sparse",
2542 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2543 );
2544
2545 let err = result.expect_err("expected indexing disabled error");
2546 match err {
2547 FilterValidationError::IndexingDisabled { key, value_type } => {
2548 assert_eq!(key, "custom_sparse");
2549 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
2550 }
2551 other => panic!("unexpected error variant: {other:?}"),
2552 }
2553 }
2554
2555 #[test]
2556 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
2557 let mut schema = Schema::new_default(KnnIndex::Spann);
2558 schema.keys.insert(
2559 "sparse_enabled".to_string(),
2560 ValueTypes {
2561 sparse_vector: Some(SparseVectorValueType {
2562 sparse_vector_index: Some(SparseVectorIndexType {
2563 enabled: true,
2564 config: SparseVectorIndexConfig {
2565 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2566 source_key: None,
2567 bm25: None,
2568 },
2569 }),
2570 }),
2571 ..Default::default()
2572 },
2573 );
2574
2575 let result = schema.is_knn_key_indexing_enabled(
2576 "sparse_enabled",
2577 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2578 );
2579
2580 assert!(result.is_ok());
2581 }
2582
2583 #[test]
2584 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2585 let schema = Schema::new_default(KnnIndex::Spann);
2586 let result = schema.is_knn_key_indexing_enabled(
2587 EMBEDDING_KEY,
2588 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2589 );
2590
2591 assert!(result.is_ok());
2592 }
2593
2594 #[test]
2595 fn test_merge_hnsw_configs_field_level() {
2596 let default_hnsw = HnswIndexConfig {
2598 ef_construction: Some(200),
2599 max_neighbors: Some(16),
2600 ef_search: Some(10),
2601 num_threads: Some(4),
2602 batch_size: Some(100),
2603 sync_threshold: Some(1000),
2604 resize_factor: Some(1.2),
2605 };
2606
2607 let user_hnsw = HnswIndexConfig {
2608 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
2616
2617 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2618
2619 assert_eq!(result.ef_construction, Some(300));
2621 assert_eq!(result.ef_search, Some(20));
2622 assert_eq!(result.sync_threshold, Some(2000));
2623
2624 assert_eq!(result.max_neighbors, Some(16));
2626 assert_eq!(result.num_threads, Some(4));
2627 assert_eq!(result.batch_size, Some(100));
2628 assert_eq!(result.resize_factor, Some(1.2));
2629 }
2630
2631 #[test]
2632 fn test_merge_spann_configs_field_level() {
2633 let default_spann = SpannIndexConfig {
2635 search_nprobe: Some(10),
2636 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
2639 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
2643 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
2645 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
2648 ef_construction: Some(100),
2649 ef_search: Some(10),
2650 max_neighbors: Some(16),
2651 };
2652
2653 let user_spann = SpannIndexConfig {
2654 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
2659 write_rng_epsilon: None,
2660 split_threshold: Some(150), num_samples_kmeans: None,
2662 initial_lambda: None,
2663 reassign_neighbor_count: None,
2664 merge_threshold: None,
2665 num_centers_to_merge_to: None,
2666 write_nprobe: None,
2667 ef_construction: None,
2668 ef_search: None,
2669 max_neighbors: None,
2670 };
2671
2672 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
2673
2674 assert_eq!(result.search_nprobe, Some(20));
2676 assert_eq!(result.search_rng_epsilon, Some(8.0));
2677 assert_eq!(result.split_threshold, Some(150));
2678
2679 assert_eq!(result.search_rng_factor, Some(1.0));
2681 assert_eq!(result.nreplica_count, Some(3));
2682 assert_eq!(result.initial_lambda, Some(100.0));
2683 }
2684
2685 #[test]
2686 fn test_spann_index_config_into_internal_configuration() {
2687 let config = SpannIndexConfig {
2688 search_nprobe: Some(33),
2689 search_rng_factor: Some(1.2),
2690 search_rng_epsilon: None,
2691 nreplica_count: None,
2692 write_rng_factor: Some(1.5),
2693 write_rng_epsilon: None,
2694 split_threshold: Some(75),
2695 num_samples_kmeans: None,
2696 initial_lambda: Some(0.9),
2697 reassign_neighbor_count: Some(40),
2698 merge_threshold: None,
2699 num_centers_to_merge_to: Some(4),
2700 write_nprobe: Some(60),
2701 ef_construction: Some(180),
2702 ef_search: Some(170),
2703 max_neighbors: Some(32),
2704 };
2705
2706 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
2707 assert_eq!(with_space.space, Space::Cosine);
2708 assert_eq!(with_space.search_nprobe, 33);
2709 assert_eq!(with_space.search_rng_factor, 1.2);
2710 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2711 assert_eq!(with_space.write_rng_factor, 1.5);
2712 assert_eq!(with_space.write_nprobe, 60);
2713 assert_eq!(with_space.ef_construction, 180);
2714 assert_eq!(with_space.ef_search, 170);
2715 assert_eq!(with_space.max_neighbors, 32);
2716 assert_eq!(with_space.merge_threshold, default_merge_threshold());
2717
2718 let default_space_config: InternalSpannConfiguration = (None, &config).into();
2719 assert_eq!(default_space_config.space, default_space());
2720 }
2721
2722 #[test]
2723 fn test_merge_string_type_combinations() {
2724 let default = StringValueType {
2728 string_inverted_index: Some(StringInvertedIndexType {
2729 enabled: true,
2730 config: StringInvertedIndexConfig {},
2731 }),
2732 fts_index: Some(FtsIndexType {
2733 enabled: false,
2734 config: FtsIndexConfig {},
2735 }),
2736 };
2737
2738 let user = StringValueType {
2739 string_inverted_index: Some(StringInvertedIndexType {
2740 enabled: false, config: StringInvertedIndexConfig {},
2742 }),
2743 fts_index: None, };
2745
2746 let result = Schema::merge_string_type(Some(&default), Some(&user))
2747 .unwrap()
2748 .unwrap();
2749 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
2754 .unwrap()
2755 .unwrap();
2756 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2757
2758 let result = Schema::merge_string_type(None, Some(&user))
2760 .unwrap()
2761 .unwrap();
2762 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2763
2764 let result = Schema::merge_string_type(None, None).unwrap();
2766 assert!(result.is_none());
2767 }
2768
2769 #[test]
2770 fn test_merge_vector_index_config_comprehensive() {
2771 let default_config = VectorIndexConfig {
2773 space: Some(Space::Cosine),
2774 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2775 source_key: Some("default_key".to_string()),
2776 hnsw: Some(HnswIndexConfig {
2777 ef_construction: Some(200),
2778 max_neighbors: Some(16),
2779 ef_search: Some(10),
2780 num_threads: Some(4),
2781 batch_size: Some(100),
2782 sync_threshold: Some(1000),
2783 resize_factor: Some(1.2),
2784 }),
2785 spann: None,
2786 };
2787
2788 let user_config = VectorIndexConfig {
2789 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
2793 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
2797 batch_size: None,
2798 sync_threshold: None,
2799 resize_factor: None,
2800 }),
2801 spann: Some(SpannIndexConfig {
2802 search_nprobe: Some(15),
2803 search_rng_factor: None,
2804 search_rng_epsilon: None,
2805 nreplica_count: None,
2806 write_rng_factor: None,
2807 write_rng_epsilon: None,
2808 split_threshold: None,
2809 num_samples_kmeans: None,
2810 initial_lambda: None,
2811 reassign_neighbor_count: None,
2812 merge_threshold: None,
2813 num_centers_to_merge_to: None,
2814 write_nprobe: None,
2815 ef_construction: None,
2816 ef_search: None,
2817 max_neighbors: None,
2818 }), };
2820
2821 let result = Schema::merge_vector_index_config(&default_config, &user_config);
2822
2823 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
2826 result.embedding_function,
2827 Some(EmbeddingFunctionConfiguration::Legacy)
2828 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_some());
2837 assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2838 }
2839
2840 #[test]
2841 fn test_merge_sparse_vector_index_config() {
2842 let default_config = SparseVectorIndexConfig {
2844 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2845 source_key: Some("default_sparse_key".to_string()),
2846 bm25: None,
2847 };
2848
2849 let user_config = SparseVectorIndexConfig {
2850 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
2853 };
2854
2855 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
2856
2857 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2859 assert_eq!(
2861 result.embedding_function,
2862 Some(EmbeddingFunctionConfiguration::Legacy)
2863 );
2864 }
2865
2866 #[test]
2867 fn test_complex_nested_merging_scenario() {
2868 let mut user_schema = Schema {
2870 defaults: ValueTypes::default(),
2871 keys: HashMap::new(),
2872 };
2873
2874 user_schema.defaults.string = Some(StringValueType {
2876 string_inverted_index: Some(StringInvertedIndexType {
2877 enabled: false,
2878 config: StringInvertedIndexConfig {},
2879 }),
2880 fts_index: Some(FtsIndexType {
2881 enabled: true,
2882 config: FtsIndexConfig {},
2883 }),
2884 });
2885
2886 user_schema.defaults.float_list = Some(FloatListValueType {
2887 vector_index: Some(VectorIndexType {
2888 enabled: true,
2889 config: VectorIndexConfig {
2890 space: Some(Space::Ip),
2891 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
2893 hnsw: Some(HnswIndexConfig {
2894 ef_construction: Some(400),
2895 max_neighbors: Some(32),
2896 ef_search: None, num_threads: None,
2898 batch_size: None,
2899 sync_threshold: None,
2900 resize_factor: None,
2901 }),
2902 spann: None,
2903 },
2904 }),
2905 });
2906
2907 let custom_key_override = ValueTypes {
2909 string: Some(StringValueType {
2910 fts_index: Some(FtsIndexType {
2911 enabled: true,
2912 config: FtsIndexConfig {},
2913 }),
2914 string_inverted_index: None,
2915 }),
2916 ..Default::default()
2917 };
2918 user_schema
2919 .keys
2920 .insert("custom_field".to_string(), custom_key_override);
2921
2922 let result = {
2924 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2925 let merged_defaults =
2926 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2927 let mut merged_keys = default_schema.keys.clone();
2928 for (key, user_value_types) in user_schema.keys {
2929 if let Some(default_value_types) = merged_keys.get(&key) {
2930 let merged_value_types =
2931 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2932 merged_keys.insert(key, merged_value_types);
2933 } else {
2934 merged_keys.insert(key, user_value_types);
2935 }
2936 }
2937 Schema {
2938 defaults: merged_defaults,
2939 keys: merged_keys,
2940 }
2941 };
2942
2943 assert!(
2947 !result
2948 .defaults
2949 .string
2950 .as_ref()
2951 .unwrap()
2952 .string_inverted_index
2953 .as_ref()
2954 .unwrap()
2955 .enabled
2956 );
2957 assert!(
2958 result
2959 .defaults
2960 .string
2961 .as_ref()
2962 .unwrap()
2963 .fts_index
2964 .as_ref()
2965 .unwrap()
2966 .enabled
2967 );
2968
2969 let vector_config = &result
2970 .defaults
2971 .float_list
2972 .as_ref()
2973 .unwrap()
2974 .vector_index
2975 .as_ref()
2976 .unwrap()
2977 .config;
2978 assert_eq!(vector_config.space, Some(Space::Ip));
2979 assert_eq!(
2980 vector_config.embedding_function,
2981 Some(EmbeddingFunctionConfiguration::Legacy)
2982 ); assert_eq!(
2984 vector_config.source_key,
2985 Some("custom_vector_key".to_string())
2986 );
2987 assert_eq!(
2988 vector_config.hnsw.as_ref().unwrap().ef_construction,
2989 Some(400)
2990 );
2991 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2992 assert_eq!(
2993 vector_config.hnsw.as_ref().unwrap().ef_search,
2994 Some(default_search_ef())
2995 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
3003 assert!(
3004 custom_override
3005 .string
3006 .as_ref()
3007 .unwrap()
3008 .fts_index
3009 .as_ref()
3010 .unwrap()
3011 .enabled
3012 );
3013 assert!(custom_override
3014 .string
3015 .as_ref()
3016 .unwrap()
3017 .string_inverted_index
3018 .is_none());
3019 }
3020
3021 #[test]
3022 fn test_reconcile_with_collection_config_default_config() {
3023 let schema = Schema::new_default(KnnIndex::Hnsw);
3025 let collection_config = InternalCollectionConfiguration::default_hnsw();
3026
3027 let result =
3028 Schema::reconcile_with_collection_config(schema.clone(), collection_config).unwrap();
3029 assert_eq!(result, schema);
3030 }
3031
3032 #[test]
3033 fn test_reconcile_with_collection_config_both_non_default() {
3034 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3036 schema.defaults.string = Some(StringValueType {
3037 fts_index: Some(FtsIndexType {
3038 enabled: true,
3039 config: FtsIndexConfig {},
3040 }),
3041 string_inverted_index: None,
3042 });
3043
3044 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
3045 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
3047 {
3048 hnsw_config.ef_construction = 500; }
3050
3051 let result = Schema::reconcile_with_collection_config(schema, collection_config);
3052 assert!(result.is_err());
3053 assert!(matches!(
3054 result.unwrap_err(),
3055 SchemaError::ConfigAndSchemaConflict
3056 ));
3057 }
3058
3059 #[test]
3060 fn test_reconcile_with_collection_config_hnsw_override() {
3061 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
3065 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3066 ef_construction: 300,
3067 max_neighbors: 32,
3068 ef_search: 50,
3069 num_threads: 8,
3070 batch_size: 200,
3071 sync_threshold: 2000,
3072 resize_factor: 1.5,
3073 space: Space::L2,
3074 }),
3075 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3076 };
3077
3078 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
3079
3080 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3082 let vector_index = embedding_override
3083 .float_list
3084 .as_ref()
3085 .unwrap()
3086 .vector_index
3087 .as_ref()
3088 .unwrap();
3089
3090 assert!(vector_index.enabled);
3091 assert_eq!(vector_index.config.space, Some(Space::L2));
3092 assert_eq!(
3093 vector_index.config.embedding_function,
3094 Some(EmbeddingFunctionConfiguration::Legacy)
3095 );
3096 assert_eq!(
3097 vector_index.config.source_key,
3098 Some(DOCUMENT_KEY.to_string())
3099 );
3100
3101 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
3102 assert_eq!(hnsw_config.ef_construction, Some(300));
3103 assert_eq!(hnsw_config.max_neighbors, Some(32));
3104 assert_eq!(hnsw_config.ef_search, Some(50));
3105 assert_eq!(hnsw_config.num_threads, Some(8));
3106 assert_eq!(hnsw_config.batch_size, Some(200));
3107 assert_eq!(hnsw_config.sync_threshold, Some(2000));
3108 assert_eq!(hnsw_config.resize_factor, Some(1.5));
3109
3110 assert!(vector_index.config.spann.is_none());
3111 }
3112
3113 #[test]
3114 fn test_reconcile_with_collection_config_spann_override() {
3115 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
3119 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
3120 search_nprobe: 20,
3121 search_rng_factor: 3.0,
3122 search_rng_epsilon: 0.2,
3123 nreplica_count: 5,
3124 write_rng_factor: 2.0,
3125 write_rng_epsilon: 0.1,
3126 split_threshold: 2000,
3127 num_samples_kmeans: 200,
3128 initial_lambda: 0.8,
3129 reassign_neighbor_count: 100,
3130 merge_threshold: 800,
3131 num_centers_to_merge_to: 20,
3132 write_nprobe: 10,
3133 ef_construction: 400,
3134 ef_search: 60,
3135 max_neighbors: 24,
3136 space: Space::Cosine,
3137 }),
3138 embedding_function: None,
3139 };
3140
3141 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
3142
3143 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3145 let vector_index = embedding_override
3146 .float_list
3147 .as_ref()
3148 .unwrap()
3149 .vector_index
3150 .as_ref()
3151 .unwrap();
3152
3153 assert!(vector_index.enabled);
3154 assert_eq!(vector_index.config.space, Some(Space::Cosine));
3155 assert_eq!(vector_index.config.embedding_function, None);
3156 assert_eq!(
3157 vector_index.config.source_key,
3158 Some(DOCUMENT_KEY.to_string())
3159 );
3160
3161 assert!(vector_index.config.hnsw.is_none());
3162
3163 let spann_config = vector_index.config.spann.as_ref().unwrap();
3164 assert_eq!(spann_config.search_nprobe, Some(20));
3165 assert_eq!(spann_config.search_rng_factor, Some(3.0));
3166 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
3167 assert_eq!(spann_config.nreplica_count, Some(5));
3168 assert_eq!(spann_config.write_rng_factor, Some(2.0));
3169 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
3170 assert_eq!(spann_config.split_threshold, Some(2000));
3171 assert_eq!(spann_config.num_samples_kmeans, Some(200));
3172 assert_eq!(spann_config.initial_lambda, Some(0.8));
3173 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
3174 assert_eq!(spann_config.merge_threshold, Some(800));
3175 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
3176 assert_eq!(spann_config.write_nprobe, Some(10));
3177 assert_eq!(spann_config.ef_construction, Some(400));
3178 assert_eq!(spann_config.ef_search, Some(60));
3179 assert_eq!(spann_config.max_neighbors, Some(24));
3180 }
3181
3182 #[test]
3183 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
3184 let schema = Schema::new_default(KnnIndex::Hnsw);
3187
3188 let collection_config = InternalCollectionConfiguration {
3189 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3190 ef_construction: 300,
3191 max_neighbors: 32,
3192 ef_search: 50,
3193 num_threads: 8,
3194 batch_size: 200,
3195 sync_threshold: 2000,
3196 resize_factor: 1.5,
3197 space: Space::L2,
3198 }),
3199 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3200 };
3201
3202 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
3203
3204 let defaults_vector_index = result
3206 .defaults
3207 .float_list
3208 .as_ref()
3209 .unwrap()
3210 .vector_index
3211 .as_ref()
3212 .unwrap();
3213
3214 assert!(!defaults_vector_index.enabled);
3216 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
3218 assert_eq!(
3219 defaults_vector_index.config.embedding_function,
3220 Some(EmbeddingFunctionConfiguration::Legacy)
3221 );
3222 assert_eq!(
3223 defaults_vector_index.config.source_key,
3224 Some(DOCUMENT_KEY.to_string())
3225 );
3226 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
3227 assert_eq!(defaults_hnsw.ef_construction, Some(300));
3228 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
3229
3230 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3232 let embedding_vector_index = embedding_override
3233 .float_list
3234 .as_ref()
3235 .unwrap()
3236 .vector_index
3237 .as_ref()
3238 .unwrap();
3239
3240 assert!(embedding_vector_index.enabled);
3242 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
3244 assert_eq!(
3245 embedding_vector_index.config.embedding_function,
3246 Some(EmbeddingFunctionConfiguration::Legacy)
3247 );
3248 assert_eq!(
3249 embedding_vector_index.config.source_key,
3250 Some(DOCUMENT_KEY.to_string())
3251 );
3252 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
3253 assert_eq!(embedding_hnsw.ef_construction, Some(300));
3254 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
3255 }
3256
3257 #[test]
3258 fn test_is_schema_default() {
3259 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
3261 assert!(Schema::is_schema_default(&default_hnsw_schema));
3262
3263 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
3264 assert!(Schema::is_schema_default(&default_spann_schema));
3265
3266 let empty_schema = Schema {
3268 defaults: ValueTypes::default(),
3269 keys: HashMap::new(),
3270 };
3271 assert!(!Schema::is_schema_default(&empty_schema));
3272
3273 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
3275 if let Some(ref mut string_type) = modified_schema.defaults.string {
3277 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
3278 string_inverted.enabled = false; }
3280 }
3281 assert!(!Schema::is_schema_default(&modified_schema));
3282
3283 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
3285 schema_with_extra_overrides
3286 .keys
3287 .insert("custom_key".to_string(), ValueTypes::default());
3288 assert!(!Schema::is_schema_default(&schema_with_extra_overrides));
3289 }
3290
3291 #[test]
3292 fn test_add_merges_keys_by_value_type() {
3293 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3294 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3295
3296 let string_override = ValueTypes {
3297 string: Some(StringValueType {
3298 string_inverted_index: Some(StringInvertedIndexType {
3299 enabled: true,
3300 config: StringInvertedIndexConfig {},
3301 }),
3302 fts_index: None,
3303 }),
3304 ..Default::default()
3305 };
3306 schema_a
3307 .keys
3308 .insert("custom_field".to_string(), string_override);
3309
3310 let float_override = ValueTypes {
3311 float: Some(FloatValueType {
3312 float_inverted_index: Some(FloatInvertedIndexType {
3313 enabled: true,
3314 config: FloatInvertedIndexConfig {},
3315 }),
3316 }),
3317 ..Default::default()
3318 };
3319 schema_b
3320 .keys
3321 .insert("custom_field".to_string(), float_override);
3322
3323 let merged = schema_a.merge(&schema_b).unwrap();
3324 let merged_override = merged.keys.get("custom_field").unwrap();
3325
3326 assert!(merged_override.string.is_some());
3327 assert!(merged_override.float.is_some());
3328 assert!(
3329 merged_override
3330 .string
3331 .as_ref()
3332 .unwrap()
3333 .string_inverted_index
3334 .as_ref()
3335 .unwrap()
3336 .enabled
3337 );
3338 assert!(
3339 merged_override
3340 .float
3341 .as_ref()
3342 .unwrap()
3343 .float_inverted_index
3344 .as_ref()
3345 .unwrap()
3346 .enabled
3347 );
3348 }
3349
3350 #[test]
3351 fn test_add_rejects_different_defaults() {
3352 let schema_a = Schema::new_default(KnnIndex::Hnsw);
3353 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3354
3355 if let Some(string_type) = schema_b.defaults.string.as_mut() {
3356 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
3357 string_index.enabled = false;
3358 }
3359 }
3360
3361 let err = schema_a.merge(&schema_b).unwrap_err();
3362 assert!(matches!(err, SchemaError::DefaultsMismatch));
3363 }
3364
3365 #[test]
3366 fn test_add_detects_conflicting_value_type_configuration() {
3367 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3368 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3369
3370 let string_override_enabled = ValueTypes {
3371 string: Some(StringValueType {
3372 string_inverted_index: Some(StringInvertedIndexType {
3373 enabled: true,
3374 config: StringInvertedIndexConfig {},
3375 }),
3376 fts_index: None,
3377 }),
3378 ..Default::default()
3379 };
3380 schema_a
3381 .keys
3382 .insert("custom_field".to_string(), string_override_enabled);
3383
3384 let string_override_disabled = ValueTypes {
3385 string: Some(StringValueType {
3386 string_inverted_index: Some(StringInvertedIndexType {
3387 enabled: false,
3388 config: StringInvertedIndexConfig {},
3389 }),
3390 fts_index: None,
3391 }),
3392 ..Default::default()
3393 };
3394 schema_b
3395 .keys
3396 .insert("custom_field".to_string(), string_override_disabled);
3397
3398 let err = schema_a.merge(&schema_b).unwrap_err();
3399 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
3400 }
3401
3402 #[test]
3404 fn test_backward_compatibility_aliases() {
3405 let old_format_json = r###"{
3407 "defaults": {
3408 "#string": {
3409 "$fts_index": {
3410 "enabled": true,
3411 "config": {}
3412 }
3413 },
3414 "#int": {
3415 "$int_inverted_index": {
3416 "enabled": true,
3417 "config": {}
3418 }
3419 },
3420 "#float_list": {
3421 "$vector_index": {
3422 "enabled": true,
3423 "config": {
3424 "spann": {
3425 "search_nprobe": 10
3426 }
3427 }
3428 }
3429 }
3430 },
3431 "key_overrides": {
3432 "#document": {
3433 "#string": {
3434 "$fts_index": {
3435 "enabled": false,
3436 "config": {}
3437 }
3438 }
3439 }
3440 }
3441 }"###;
3442
3443 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
3444
3445 let new_format_json = r###"{
3447 "defaults": {
3448 "string": {
3449 "fts_index": {
3450 "enabled": true,
3451 "config": {}
3452 }
3453 },
3454 "int": {
3455 "int_inverted_index": {
3456 "enabled": true,
3457 "config": {}
3458 }
3459 },
3460 "float_list": {
3461 "vector_index": {
3462 "enabled": true,
3463 "config": {
3464 "spann": {
3465 "search_nprobe": 10
3466 }
3467 }
3468 }
3469 }
3470 },
3471 "keys": {
3472 "#document": {
3473 "string": {
3474 "fts_index": {
3475 "enabled": false,
3476 "config": {}
3477 }
3478 }
3479 }
3480 }
3481 }"###;
3482
3483 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
3484
3485 assert_eq!(schema_from_old, schema_from_new);
3487
3488 assert!(schema_from_old.defaults.string.is_some());
3490 assert!(schema_from_old
3491 .defaults
3492 .string
3493 .as_ref()
3494 .unwrap()
3495 .fts_index
3496 .is_some());
3497 assert!(
3498 schema_from_old
3499 .defaults
3500 .string
3501 .as_ref()
3502 .unwrap()
3503 .fts_index
3504 .as_ref()
3505 .unwrap()
3506 .enabled
3507 );
3508
3509 assert!(schema_from_old.defaults.int.is_some());
3510 assert!(schema_from_old
3511 .defaults
3512 .int
3513 .as_ref()
3514 .unwrap()
3515 .int_inverted_index
3516 .is_some());
3517
3518 assert!(schema_from_old.defaults.float_list.is_some());
3519 assert!(schema_from_old
3520 .defaults
3521 .float_list
3522 .as_ref()
3523 .unwrap()
3524 .vector_index
3525 .is_some());
3526
3527 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
3528 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
3529 assert!(doc_override.string.is_some());
3530 assert!(
3531 !doc_override
3532 .string
3533 .as_ref()
3534 .unwrap()
3535 .fts_index
3536 .as_ref()
3537 .unwrap()
3538 .enabled
3539 );
3540
3541 let serialized = serde_json::to_string(&schema_from_old).unwrap();
3543
3544 assert!(serialized.contains(r#""keys":"#));
3546 assert!(serialized.contains(r#""string":"#));
3547 assert!(serialized.contains(r#""fts_index":"#));
3548 assert!(serialized.contains(r#""int_inverted_index":"#));
3549 assert!(serialized.contains(r#""vector_index":"#));
3550
3551 assert!(!serialized.contains(r#""key_overrides":"#));
3553 assert!(!serialized.contains(r###""#string":"###));
3554 assert!(!serialized.contains(r###""$fts_index":"###));
3555 assert!(!serialized.contains(r###""$int_inverted_index":"###));
3556 assert!(!serialized.contains(r###""$vector_index":"###));
3557 }
3558
3559 #[test]
3560 fn test_hnsw_index_config_validation() {
3561 use validator::Validate;
3562
3563 let valid_config = HnswIndexConfig {
3565 batch_size: Some(10),
3566 sync_threshold: Some(100),
3567 ef_construction: Some(100),
3568 max_neighbors: Some(16),
3569 ..Default::default()
3570 };
3571 assert!(valid_config.validate().is_ok());
3572
3573 let invalid_batch_size = HnswIndexConfig {
3575 batch_size: Some(1),
3576 ..Default::default()
3577 };
3578 assert!(invalid_batch_size.validate().is_err());
3579
3580 let invalid_sync_threshold = HnswIndexConfig {
3582 sync_threshold: Some(1),
3583 ..Default::default()
3584 };
3585 assert!(invalid_sync_threshold.validate().is_err());
3586
3587 let boundary_config = HnswIndexConfig {
3589 batch_size: Some(2),
3590 sync_threshold: Some(2),
3591 ..Default::default()
3592 };
3593 assert!(boundary_config.validate().is_ok());
3594
3595 let all_none_config = HnswIndexConfig {
3597 ..Default::default()
3598 };
3599 assert!(all_none_config.validate().is_ok());
3600
3601 let other_fields_config = HnswIndexConfig {
3603 ef_construction: Some(1),
3604 max_neighbors: Some(1),
3605 ef_search: Some(1),
3606 num_threads: Some(1),
3607 resize_factor: Some(0.1),
3608 ..Default::default()
3609 };
3610 assert!(other_fields_config.validate().is_ok());
3611 }
3612
3613 #[test]
3614 fn test_spann_index_config_validation() {
3615 use validator::Validate;
3616
3617 let valid_config = SpannIndexConfig {
3619 write_nprobe: Some(32),
3620 nreplica_count: Some(4),
3621 split_threshold: Some(100),
3622 merge_threshold: Some(50),
3623 reassign_neighbor_count: Some(32),
3624 num_centers_to_merge_to: Some(4),
3625 ef_construction: Some(100),
3626 ef_search: Some(100),
3627 max_neighbors: Some(32),
3628 search_rng_factor: Some(1.0),
3629 write_rng_factor: Some(1.0),
3630 search_rng_epsilon: Some(7.5),
3631 write_rng_epsilon: Some(7.5),
3632 ..Default::default()
3633 };
3634 assert!(valid_config.validate().is_ok());
3635
3636 let invalid_write_nprobe = SpannIndexConfig {
3638 write_nprobe: Some(200),
3639 ..Default::default()
3640 };
3641 assert!(invalid_write_nprobe.validate().is_err());
3642
3643 let invalid_split_threshold = SpannIndexConfig {
3645 split_threshold: Some(10),
3646 ..Default::default()
3647 };
3648 assert!(invalid_split_threshold.validate().is_err());
3649
3650 let invalid_split_threshold_high = SpannIndexConfig {
3652 split_threshold: Some(250),
3653 ..Default::default()
3654 };
3655 assert!(invalid_split_threshold_high.validate().is_err());
3656
3657 let invalid_nreplica = SpannIndexConfig {
3659 nreplica_count: Some(10),
3660 ..Default::default()
3661 };
3662 assert!(invalid_nreplica.validate().is_err());
3663
3664 let invalid_reassign = SpannIndexConfig {
3666 reassign_neighbor_count: Some(100),
3667 ..Default::default()
3668 };
3669 assert!(invalid_reassign.validate().is_err());
3670
3671 let invalid_merge_threshold_low = SpannIndexConfig {
3673 merge_threshold: Some(5),
3674 ..Default::default()
3675 };
3676 assert!(invalid_merge_threshold_low.validate().is_err());
3677
3678 let invalid_merge_threshold_high = SpannIndexConfig {
3679 merge_threshold: Some(150),
3680 ..Default::default()
3681 };
3682 assert!(invalid_merge_threshold_high.validate().is_err());
3683
3684 let invalid_num_centers = SpannIndexConfig {
3686 num_centers_to_merge_to: Some(10),
3687 ..Default::default()
3688 };
3689 assert!(invalid_num_centers.validate().is_err());
3690
3691 let invalid_ef_construction = SpannIndexConfig {
3693 ef_construction: Some(300),
3694 ..Default::default()
3695 };
3696 assert!(invalid_ef_construction.validate().is_err());
3697
3698 let invalid_ef_search = SpannIndexConfig {
3700 ef_search: Some(300),
3701 ..Default::default()
3702 };
3703 assert!(invalid_ef_search.validate().is_err());
3704
3705 let invalid_max_neighbors = SpannIndexConfig {
3707 max_neighbors: Some(100),
3708 ..Default::default()
3709 };
3710 assert!(invalid_max_neighbors.validate().is_err());
3711
3712 let invalid_search_nprobe = SpannIndexConfig {
3714 search_nprobe: Some(200),
3715 ..Default::default()
3716 };
3717 assert!(invalid_search_nprobe.validate().is_err());
3718
3719 let invalid_search_rng_factor_low = SpannIndexConfig {
3721 search_rng_factor: Some(0.9),
3722 ..Default::default()
3723 };
3724 assert!(invalid_search_rng_factor_low.validate().is_err());
3725
3726 let invalid_search_rng_factor_high = SpannIndexConfig {
3727 search_rng_factor: Some(1.1),
3728 ..Default::default()
3729 };
3730 assert!(invalid_search_rng_factor_high.validate().is_err());
3731
3732 let valid_search_rng_factor = SpannIndexConfig {
3734 search_rng_factor: Some(1.0),
3735 ..Default::default()
3736 };
3737 assert!(valid_search_rng_factor.validate().is_ok());
3738
3739 let invalid_search_rng_epsilon_low = SpannIndexConfig {
3741 search_rng_epsilon: Some(4.0),
3742 ..Default::default()
3743 };
3744 assert!(invalid_search_rng_epsilon_low.validate().is_err());
3745
3746 let invalid_search_rng_epsilon_high = SpannIndexConfig {
3747 search_rng_epsilon: Some(11.0),
3748 ..Default::default()
3749 };
3750 assert!(invalid_search_rng_epsilon_high.validate().is_err());
3751
3752 let valid_search_rng_epsilon = SpannIndexConfig {
3754 search_rng_epsilon: Some(7.5),
3755 ..Default::default()
3756 };
3757 assert!(valid_search_rng_epsilon.validate().is_ok());
3758
3759 let invalid_write_rng_factor_low = SpannIndexConfig {
3761 write_rng_factor: Some(0.9),
3762 ..Default::default()
3763 };
3764 assert!(invalid_write_rng_factor_low.validate().is_err());
3765
3766 let invalid_write_rng_factor_high = SpannIndexConfig {
3767 write_rng_factor: Some(1.1),
3768 ..Default::default()
3769 };
3770 assert!(invalid_write_rng_factor_high.validate().is_err());
3771
3772 let valid_write_rng_factor = SpannIndexConfig {
3774 write_rng_factor: Some(1.0),
3775 ..Default::default()
3776 };
3777 assert!(valid_write_rng_factor.validate().is_ok());
3778
3779 let invalid_write_rng_epsilon_low = SpannIndexConfig {
3781 write_rng_epsilon: Some(4.0),
3782 ..Default::default()
3783 };
3784 assert!(invalid_write_rng_epsilon_low.validate().is_err());
3785
3786 let invalid_write_rng_epsilon_high = SpannIndexConfig {
3787 write_rng_epsilon: Some(11.0),
3788 ..Default::default()
3789 };
3790 assert!(invalid_write_rng_epsilon_high.validate().is_err());
3791
3792 let valid_write_rng_epsilon = SpannIndexConfig {
3794 write_rng_epsilon: Some(7.5),
3795 ..Default::default()
3796 };
3797 assert!(valid_write_rng_epsilon.validate().is_ok());
3798
3799 let invalid_num_samples_kmeans = SpannIndexConfig {
3801 num_samples_kmeans: Some(1500),
3802 ..Default::default()
3803 };
3804 assert!(invalid_num_samples_kmeans.validate().is_err());
3805
3806 let valid_num_samples_kmeans = SpannIndexConfig {
3808 num_samples_kmeans: Some(500),
3809 ..Default::default()
3810 };
3811 assert!(valid_num_samples_kmeans.validate().is_ok());
3812
3813 let invalid_initial_lambda_high = SpannIndexConfig {
3815 initial_lambda: Some(150.0),
3816 ..Default::default()
3817 };
3818 assert!(invalid_initial_lambda_high.validate().is_err());
3819
3820 let invalid_initial_lambda_low = SpannIndexConfig {
3821 initial_lambda: Some(50.0),
3822 ..Default::default()
3823 };
3824 assert!(invalid_initial_lambda_low.validate().is_err());
3825
3826 let valid_initial_lambda = SpannIndexConfig {
3828 initial_lambda: Some(100.0),
3829 ..Default::default()
3830 };
3831 assert!(valid_initial_lambda.validate().is_ok());
3832
3833 let all_none_config = SpannIndexConfig {
3835 ..Default::default()
3836 };
3837 assert!(all_none_config.validate().is_ok());
3838 }
3839
3840 #[test]
3841 fn test_builder_pattern_crud_workflow() {
3842 let schema = Schema::new_default(KnnIndex::Hnsw)
3846 .create_index(
3847 None,
3848 IndexConfig::Vector(VectorIndexConfig {
3849 space: Some(Space::Cosine),
3850 embedding_function: None,
3851 source_key: None,
3852 hnsw: Some(HnswIndexConfig {
3853 ef_construction: Some(200),
3854 max_neighbors: Some(32),
3855 ef_search: Some(50),
3856 num_threads: None,
3857 batch_size: None,
3858 sync_threshold: None,
3859 resize_factor: None,
3860 }),
3861 spann: None,
3862 }),
3863 )
3864 .expect("vector config should succeed")
3865 .create_index(
3866 Some("category"),
3867 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
3868 )
3869 .expect("string inverted on key should succeed")
3870 .create_index(
3871 Some("year"),
3872 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
3873 )
3874 .expect("int inverted on key should succeed")
3875 .create_index(
3876 Some("rating"),
3877 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
3878 )
3879 .expect("float inverted on key should succeed")
3880 .create_index(
3881 Some("is_active"),
3882 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
3883 )
3884 .expect("bool inverted on key should succeed");
3885
3886 assert!(schema.keys.contains_key(EMBEDDING_KEY));
3889 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
3890 assert!(embedding.float_list.is_some());
3891 let vector_index = embedding
3892 .float_list
3893 .as_ref()
3894 .unwrap()
3895 .vector_index
3896 .as_ref()
3897 .unwrap();
3898 assert!(vector_index.enabled);
3899 assert_eq!(vector_index.config.space, Some(Space::Cosine));
3900 assert_eq!(
3901 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
3902 Some(200)
3903 );
3904
3905 assert!(schema.keys.contains_key("category"));
3907 assert!(schema.keys.contains_key("year"));
3908 assert!(schema.keys.contains_key("rating"));
3909 assert!(schema.keys.contains_key("is_active"));
3910
3911 let category = schema.keys.get("category").unwrap();
3913 assert!(category.string.is_some());
3914 let string_idx = category
3915 .string
3916 .as_ref()
3917 .unwrap()
3918 .string_inverted_index
3919 .as_ref()
3920 .unwrap();
3921 assert!(string_idx.enabled);
3922
3923 let year = schema.keys.get("year").unwrap();
3925 assert!(year.int.is_some());
3926 let int_idx = year
3927 .int
3928 .as_ref()
3929 .unwrap()
3930 .int_inverted_index
3931 .as_ref()
3932 .unwrap();
3933 assert!(int_idx.enabled);
3934
3935 let schema = schema
3937 .delete_index(
3938 Some("category"),
3939 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
3940 )
3941 .expect("delete string inverted should succeed")
3942 .delete_index(
3943 Some("year"),
3944 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
3945 )
3946 .expect("delete int inverted should succeed");
3947
3948 let category = schema.keys.get("category").unwrap();
3950 let string_idx = category
3951 .string
3952 .as_ref()
3953 .unwrap()
3954 .string_inverted_index
3955 .as_ref()
3956 .unwrap();
3957 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
3960 let int_idx = year
3961 .int
3962 .as_ref()
3963 .unwrap()
3964 .int_inverted_index
3965 .as_ref()
3966 .unwrap();
3967 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
3971 let float_idx = rating
3972 .float
3973 .as_ref()
3974 .unwrap()
3975 .float_inverted_index
3976 .as_ref()
3977 .unwrap();
3978 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
3981 let bool_idx = is_active
3982 .boolean
3983 .as_ref()
3984 .unwrap()
3985 .bool_inverted_index
3986 .as_ref()
3987 .unwrap();
3988 assert!(bool_idx.enabled); }
3990
3991 #[test]
3992 fn test_builder_create_index_validation_errors() {
3993 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4000 Some("my_vectors"),
4001 IndexConfig::Vector(VectorIndexConfig {
4002 space: Some(Space::L2),
4003 embedding_function: None,
4004 source_key: None,
4005 hnsw: None,
4006 spann: None,
4007 }),
4008 );
4009 assert!(result.is_err());
4010 assert!(matches!(
4011 result.unwrap_err(),
4012 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
4013 ));
4014
4015 let result = Schema::new_default(KnnIndex::Hnsw)
4017 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
4018 assert!(result.is_err());
4019 assert!(matches!(
4020 result.unwrap_err(),
4021 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
4022 ));
4023
4024 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4026 Some(DOCUMENT_KEY),
4027 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4028 );
4029 assert!(result.is_err());
4030 assert!(matches!(
4031 result.unwrap_err(),
4032 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4033 ));
4034
4035 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4037 Some(EMBEDDING_KEY),
4038 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4039 );
4040 assert!(result.is_err());
4041 assert!(matches!(
4042 result.unwrap_err(),
4043 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4044 ));
4045
4046 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4048 None,
4049 IndexConfig::SparseVector(SparseVectorIndexConfig {
4050 embedding_function: None,
4051 source_key: None,
4052 bm25: None,
4053 }),
4054 );
4055 assert!(result.is_err());
4056 assert!(matches!(
4057 result.unwrap_err(),
4058 SchemaBuilderError::SparseVectorRequiresKey
4059 ));
4060
4061 let result = Schema::new_default(KnnIndex::Hnsw)
4063 .create_index(
4064 Some("sparse1"),
4065 IndexConfig::SparseVector(SparseVectorIndexConfig {
4066 embedding_function: None,
4067 source_key: None,
4068 bm25: None,
4069 }),
4070 )
4071 .expect("first sparse should succeed")
4072 .create_index(
4073 Some("sparse2"),
4074 IndexConfig::SparseVector(SparseVectorIndexConfig {
4075 embedding_function: None,
4076 source_key: None,
4077 bm25: None,
4078 }),
4079 );
4080 assert!(result.is_err());
4081 assert!(matches!(
4082 result.unwrap_err(),
4083 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
4084 ));
4085 }
4086
4087 #[test]
4088 fn test_builder_delete_index_validation_errors() {
4089 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4095 Some(EMBEDDING_KEY),
4096 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4097 );
4098 assert!(result.is_err());
4099 assert!(matches!(
4100 result.unwrap_err(),
4101 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4102 ));
4103
4104 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4106 Some(DOCUMENT_KEY),
4107 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4108 );
4109 assert!(result.is_err());
4110 assert!(matches!(
4111 result.unwrap_err(),
4112 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4113 ));
4114
4115 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4117 None,
4118 IndexConfig::Vector(VectorIndexConfig {
4119 space: None,
4120 embedding_function: None,
4121 source_key: None,
4122 hnsw: None,
4123 spann: None,
4124 }),
4125 );
4126 assert!(result.is_err());
4127 assert!(matches!(
4128 result.unwrap_err(),
4129 SchemaBuilderError::VectorIndexDeletionNotSupported
4130 ));
4131
4132 let result = Schema::new_default(KnnIndex::Hnsw)
4134 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
4135 assert!(result.is_err());
4136 assert!(matches!(
4137 result.unwrap_err(),
4138 SchemaBuilderError::FtsIndexDeletionNotSupported
4139 ));
4140
4141 let result = Schema::new_default(KnnIndex::Hnsw)
4143 .create_index(
4144 Some("sparse"),
4145 IndexConfig::SparseVector(SparseVectorIndexConfig {
4146 embedding_function: None,
4147 source_key: None,
4148 bm25: None,
4149 }),
4150 )
4151 .expect("create should succeed")
4152 .delete_index(
4153 Some("sparse"),
4154 IndexConfig::SparseVector(SparseVectorIndexConfig {
4155 embedding_function: None,
4156 source_key: None,
4157 bm25: None,
4158 }),
4159 );
4160 assert!(result.is_err());
4161 assert!(matches!(
4162 result.unwrap_err(),
4163 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
4164 ));
4165 }
4166
4167 #[test]
4168 fn test_builder_pattern_chaining() {
4169 let schema = Schema::new_default(KnnIndex::Hnsw)
4171 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
4172 .unwrap()
4173 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4174 .unwrap()
4175 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
4176 .unwrap()
4177 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
4178 .unwrap()
4179 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4180 .unwrap()
4181 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
4182 .unwrap();
4183
4184 assert!(
4186 schema
4187 .keys
4188 .get("tag1")
4189 .unwrap()
4190 .string
4191 .as_ref()
4192 .unwrap()
4193 .string_inverted_index
4194 .as_ref()
4195 .unwrap()
4196 .enabled
4197 );
4198
4199 assert!(
4201 !schema
4202 .keys
4203 .get("tag2")
4204 .unwrap()
4205 .string
4206 .as_ref()
4207 .unwrap()
4208 .string_inverted_index
4209 .as_ref()
4210 .unwrap()
4211 .enabled
4212 );
4213
4214 assert!(
4216 schema
4217 .keys
4218 .get("tag3")
4219 .unwrap()
4220 .string
4221 .as_ref()
4222 .unwrap()
4223 .string_inverted_index
4224 .as_ref()
4225 .unwrap()
4226 .enabled
4227 );
4228
4229 assert!(
4231 schema
4232 .keys
4233 .get("count")
4234 .unwrap()
4235 .int
4236 .as_ref()
4237 .unwrap()
4238 .int_inverted_index
4239 .as_ref()
4240 .unwrap()
4241 .enabled
4242 );
4243
4244 assert!(
4246 schema
4247 .keys
4248 .get("score")
4249 .unwrap()
4250 .float
4251 .as_ref()
4252 .unwrap()
4253 .float_inverted_index
4254 .as_ref()
4255 .unwrap()
4256 .enabled
4257 );
4258 }
4259
4260 #[test]
4261 fn test_schema_default_matches_python() {
4262 let schema = Schema::default();
4264
4265 assert!(schema.defaults.string.is_some());
4271 let string = schema.defaults.string.as_ref().unwrap();
4272 assert!(!string.fts_index.as_ref().unwrap().enabled);
4273 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
4274
4275 assert!(schema.defaults.float_list.is_some());
4277 let float_list = schema.defaults.float_list.as_ref().unwrap();
4278 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
4279 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
4280 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
4284
4285 assert!(schema.defaults.sparse_vector.is_some());
4287 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
4288 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
4289
4290 assert!(schema.defaults.int.is_some());
4292 assert!(
4293 schema
4294 .defaults
4295 .int
4296 .as_ref()
4297 .unwrap()
4298 .int_inverted_index
4299 .as_ref()
4300 .unwrap()
4301 .enabled
4302 );
4303
4304 assert!(schema.defaults.float.is_some());
4306 assert!(
4307 schema
4308 .defaults
4309 .float
4310 .as_ref()
4311 .unwrap()
4312 .float_inverted_index
4313 .as_ref()
4314 .unwrap()
4315 .enabled
4316 );
4317
4318 assert!(schema.defaults.boolean.is_some());
4320 assert!(
4321 schema
4322 .defaults
4323 .boolean
4324 .as_ref()
4325 .unwrap()
4326 .bool_inverted_index
4327 .as_ref()
4328 .unwrap()
4329 .enabled
4330 );
4331
4332 assert!(schema.keys.contains_key(DOCUMENT_KEY));
4338 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
4339 assert!(doc.string.is_some());
4340 assert!(
4341 doc.string
4342 .as_ref()
4343 .unwrap()
4344 .fts_index
4345 .as_ref()
4346 .unwrap()
4347 .enabled
4348 );
4349 assert!(
4350 !doc.string
4351 .as_ref()
4352 .unwrap()
4353 .string_inverted_index
4354 .as_ref()
4355 .unwrap()
4356 .enabled
4357 );
4358
4359 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4361 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4362 assert!(embedding.float_list.is_some());
4363 let vec_idx = embedding
4364 .float_list
4365 .as_ref()
4366 .unwrap()
4367 .vector_index
4368 .as_ref()
4369 .unwrap();
4370 assert!(vec_idx.enabled);
4371 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
4372 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
4378 }
4379
4380 #[test]
4381 fn test_schema_default_works_with_builder() {
4382 let schema = Schema::default()
4384 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
4385 .expect("should succeed");
4386
4387 assert!(schema.keys.contains_key("category"));
4389 assert!(schema.keys.contains_key(DOCUMENT_KEY));
4390 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4391 assert_eq!(schema.keys.len(), 3);
4392 }
4393}