1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8 EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14 default_batch_size, default_construction_ef, default_construction_ef_spann,
15 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21 InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25 fn code(&self) -> ErrorCodes {
26 ErrorCodes::Internal
27 }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33 MissingIndexConfiguration { key: String, value_type: String },
34 #[error("Schema reconciliation failed: {reason}")]
35 InvalidSchema { reason: String },
36 #[error("Cannot set both collection config and schema simultaneously")]
37 ConfigAndSchemaConflict,
38 #[error("Cannot merge schemas with differing defaults")]
39 DefaultsMismatch,
40 #[error("Conflicting configuration for {context}")]
41 ConfigurationConflict { context: String },
42 #[error("Invalid HNSW configuration: {0}")]
43 InvalidHnswConfig(validator::ValidationErrors),
44 #[error("Invalid SPANN configuration: {0}")]
45 InvalidSpannConfig(validator::ValidationErrors),
46 #[error(transparent)]
47 Builder(#[from] SchemaBuilderError),
48}
49
50#[derive(Debug, Error)]
51pub enum SchemaBuilderError {
52 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
53 VectorIndexMustBeGlobal { key: String },
54 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
55 FtsIndexMustBeGlobal { key: String },
56 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
57 SpecialKeyModificationNotAllowed { key: String },
58 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
59 SparseVectorRequiresKey,
60 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
61 MultipleSparseVectorIndexes { existing_key: String },
62 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
63 VectorIndexDeletionNotSupported,
64 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
65 FtsIndexDeletionNotSupported,
66 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
67 SparseVectorIndexDeletionNotSupported,
68}
69
70#[derive(Debug, Error)]
71pub enum FilterValidationError {
72 #[error(
73 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
74 )]
75 IndexingDisabled {
76 key: String,
77 value_type: MetadataValueType,
78 },
79 #[error(transparent)]
80 Schema(#[from] SchemaError),
81}
82
83impl ChromaError for SchemaBuilderError {
84 fn code(&self) -> ErrorCodes {
85 ErrorCodes::InvalidArgument
86 }
87}
88
89impl ChromaError for FilterValidationError {
90 fn code(&self) -> ErrorCodes {
91 match self {
92 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
93 FilterValidationError::Schema(_) => ErrorCodes::Internal,
94 }
95 }
96}
97
98pub const STRING_VALUE_NAME: &str = "string";
105pub const INT_VALUE_NAME: &str = "int";
106pub const BOOL_VALUE_NAME: &str = "bool";
107pub const FLOAT_VALUE_NAME: &str = "float";
108pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
109pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
110
111pub const FTS_INDEX_NAME: &str = "fts_index";
113pub const VECTOR_INDEX_NAME: &str = "vector_index";
114pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
115pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
116pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
117pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
118pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
119
120pub const DOCUMENT_KEY: &str = "#document";
122pub const EMBEDDING_KEY: &str = "#embedding";
123
124#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
133#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
134pub struct Schema {
135 pub defaults: ValueTypes,
137 #[serde(rename = "keys", alias = "key_overrides")]
140 pub keys: HashMap<String, ValueTypes>,
141}
142
143impl Default for Schema {
144 fn default() -> Self {
161 let defaults = ValueTypes {
163 string: Some(StringValueType {
164 fts_index: Some(FtsIndexType {
165 enabled: false,
166 config: FtsIndexConfig {},
167 }),
168 string_inverted_index: Some(StringInvertedIndexType {
169 enabled: true,
170 config: StringInvertedIndexConfig {},
171 }),
172 }),
173 float_list: Some(FloatListValueType {
174 vector_index: Some(VectorIndexType {
175 enabled: false,
176 config: VectorIndexConfig {
177 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
179 source_key: None,
180 hnsw: None, spann: None, },
183 }),
184 }),
185 sparse_vector: Some(SparseVectorValueType {
186 sparse_vector_index: Some(SparseVectorIndexType {
187 enabled: false,
188 config: SparseVectorIndexConfig {
189 embedding_function: None,
190 source_key: None,
191 bm25: None,
192 },
193 }),
194 }),
195 int: Some(IntValueType {
196 int_inverted_index: Some(IntInvertedIndexType {
197 enabled: true,
198 config: IntInvertedIndexConfig {},
199 }),
200 }),
201 float: Some(FloatValueType {
202 float_inverted_index: Some(FloatInvertedIndexType {
203 enabled: true,
204 config: FloatInvertedIndexConfig {},
205 }),
206 }),
207 boolean: Some(BoolValueType {
208 bool_inverted_index: Some(BoolInvertedIndexType {
209 enabled: true,
210 config: BoolInvertedIndexConfig {},
211 }),
212 }),
213 };
214
215 let mut keys = HashMap::new();
217
218 keys.insert(
220 DOCUMENT_KEY.to_string(),
221 ValueTypes {
222 string: Some(StringValueType {
223 fts_index: Some(FtsIndexType {
224 enabled: true,
225 config: FtsIndexConfig {},
226 }),
227 string_inverted_index: Some(StringInvertedIndexType {
228 enabled: false,
229 config: StringInvertedIndexConfig {},
230 }),
231 }),
232 ..Default::default()
233 },
234 );
235
236 keys.insert(
238 EMBEDDING_KEY.to_string(),
239 ValueTypes {
240 float_list: Some(FloatListValueType {
241 vector_index: Some(VectorIndexType {
242 enabled: true,
243 config: VectorIndexConfig {
244 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
246 source_key: Some(DOCUMENT_KEY.to_string()),
247 hnsw: None, spann: None, },
250 }),
251 }),
252 ..Default::default()
253 },
254 );
255
256 Schema { defaults, keys }
257 }
258}
259
260pub fn is_embedding_function_default(
261 embedding_function: &Option<EmbeddingFunctionConfiguration>,
262) -> bool {
263 match embedding_function {
264 None => true,
265 Some(embedding_function) => embedding_function.is_default(),
266 }
267}
268
269pub fn is_space_default(space: &Option<Space>) -> bool {
271 match space {
272 None => true, Some(s) => *s == default_space(), }
275}
276
277pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
279 hnsw_config.ef_construction == Some(default_construction_ef())
280 && hnsw_config.ef_search == Some(default_search_ef())
281 && hnsw_config.max_neighbors == Some(default_m())
282 && hnsw_config.num_threads == Some(default_num_threads())
283 && hnsw_config.batch_size == Some(default_batch_size())
284 && hnsw_config.sync_threshold == Some(default_sync_threshold())
285 && hnsw_config.resize_factor == Some(default_resize_factor())
286}
287
288#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
295#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
296pub struct ValueTypes {
297 #[serde(
298 rename = "string",
299 alias = "#string",
300 skip_serializing_if = "Option::is_none"
301 )] pub string: Option<StringValueType>,
303
304 #[serde(
305 rename = "float_list",
306 alias = "#float_list",
307 skip_serializing_if = "Option::is_none"
308 )]
309 pub float_list: Option<FloatListValueType>,
311
312 #[serde(
313 rename = "sparse_vector",
314 alias = "#sparse_vector",
315 skip_serializing_if = "Option::is_none"
316 )]
317 pub sparse_vector: Option<SparseVectorValueType>,
319
320 #[serde(
321 rename = "int",
322 alias = "#int",
323 skip_serializing_if = "Option::is_none"
324 )] pub int: Option<IntValueType>,
326
327 #[serde(
328 rename = "float",
329 alias = "#float",
330 skip_serializing_if = "Option::is_none"
331 )] pub float: Option<FloatValueType>,
333
334 #[serde(
335 rename = "bool",
336 alias = "#bool",
337 skip_serializing_if = "Option::is_none"
338 )] pub boolean: Option<BoolValueType>,
340}
341
342#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
344#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
345pub struct StringValueType {
346 #[serde(
347 rename = "fts_index",
348 alias = "$fts_index",
349 skip_serializing_if = "Option::is_none"
350 )] pub fts_index: Option<FtsIndexType>,
352
353 #[serde(
354 rename = "string_inverted_index", alias = "$string_inverted_index",
356 skip_serializing_if = "Option::is_none"
357 )]
358 pub string_inverted_index: Option<StringInvertedIndexType>,
359}
360
361#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
363#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
364pub struct FloatListValueType {
365 #[serde(
366 rename = "vector_index",
367 alias = "$vector_index",
368 skip_serializing_if = "Option::is_none"
369 )] pub vector_index: Option<VectorIndexType>,
371}
372
373#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
375#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
376pub struct SparseVectorValueType {
377 #[serde(
378 rename = "sparse_vector_index", alias = "$sparse_vector_index",
380 skip_serializing_if = "Option::is_none"
381 )]
382 pub sparse_vector_index: Option<SparseVectorIndexType>,
383}
384
385#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
387#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
388pub struct IntValueType {
389 #[serde(
390 rename = "int_inverted_index",
391 alias = "$int_inverted_index",
392 skip_serializing_if = "Option::is_none"
393 )]
394 pub int_inverted_index: Option<IntInvertedIndexType>,
396}
397
398#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
400#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
401pub struct FloatValueType {
402 #[serde(
403 rename = "float_inverted_index", alias = "$float_inverted_index",
405 skip_serializing_if = "Option::is_none"
406 )]
407 pub float_inverted_index: Option<FloatInvertedIndexType>,
408}
409
410#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
412#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
413pub struct BoolValueType {
414 #[serde(
415 rename = "bool_inverted_index", alias = "$bool_inverted_index",
417 skip_serializing_if = "Option::is_none"
418 )]
419 pub bool_inverted_index: Option<BoolInvertedIndexType>,
420}
421
422#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
424#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
425pub struct FtsIndexType {
426 pub enabled: bool,
427 pub config: FtsIndexConfig,
428}
429
430#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
431#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
432pub struct VectorIndexType {
433 pub enabled: bool,
434 pub config: VectorIndexConfig,
435}
436
437#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
438#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
439pub struct SparseVectorIndexType {
440 pub enabled: bool,
441 pub config: SparseVectorIndexConfig,
442}
443
444#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
445#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
446pub struct StringInvertedIndexType {
447 pub enabled: bool,
448 pub config: StringInvertedIndexConfig,
449}
450
451#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
452#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
453pub struct IntInvertedIndexType {
454 pub enabled: bool,
455 pub config: IntInvertedIndexConfig,
456}
457
458#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
459#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
460pub struct FloatInvertedIndexType {
461 pub enabled: bool,
462 pub config: FloatInvertedIndexConfig,
463}
464
465#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
466#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
467pub struct BoolInvertedIndexType {
468 pub enabled: bool,
469 pub config: BoolInvertedIndexConfig,
470}
471
472impl Schema {
473 pub fn new_default(default_knn_index: KnnIndex) -> Self {
475 let vector_config = VectorIndexType {
477 enabled: false,
478 config: VectorIndexConfig {
479 space: Some(default_space()),
480 embedding_function: None,
481 source_key: None,
482 hnsw: match default_knn_index {
483 KnnIndex::Hnsw => Some(HnswIndexConfig {
484 ef_construction: Some(default_construction_ef()),
485 max_neighbors: Some(default_m()),
486 ef_search: Some(default_search_ef()),
487 num_threads: Some(default_num_threads()),
488 batch_size: Some(default_batch_size()),
489 sync_threshold: Some(default_sync_threshold()),
490 resize_factor: Some(default_resize_factor()),
491 }),
492 KnnIndex::Spann => None,
493 },
494 spann: match default_knn_index {
495 KnnIndex::Hnsw => None,
496 KnnIndex::Spann => Some(SpannIndexConfig {
497 search_nprobe: Some(default_search_nprobe()),
498 search_rng_factor: Some(default_search_rng_factor()),
499 search_rng_epsilon: Some(default_search_rng_epsilon()),
500 nreplica_count: Some(default_nreplica_count()),
501 write_rng_factor: Some(default_write_rng_factor()),
502 write_rng_epsilon: Some(default_write_rng_epsilon()),
503 split_threshold: Some(default_split_threshold()),
504 num_samples_kmeans: Some(default_num_samples_kmeans()),
505 initial_lambda: Some(default_initial_lambda()),
506 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
507 merge_threshold: Some(default_merge_threshold()),
508 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
509 write_nprobe: Some(default_write_nprobe()),
510 ef_construction: Some(default_construction_ef_spann()),
511 ef_search: Some(default_search_ef_spann()),
512 max_neighbors: Some(default_m_spann()),
513 }),
514 },
515 },
516 };
517
518 let defaults = ValueTypes {
520 string: Some(StringValueType {
521 string_inverted_index: Some(StringInvertedIndexType {
522 enabled: true,
523 config: StringInvertedIndexConfig {},
524 }),
525 fts_index: Some(FtsIndexType {
526 enabled: false,
527 config: FtsIndexConfig {},
528 }),
529 }),
530 float: Some(FloatValueType {
531 float_inverted_index: Some(FloatInvertedIndexType {
532 enabled: true,
533 config: FloatInvertedIndexConfig {},
534 }),
535 }),
536 int: Some(IntValueType {
537 int_inverted_index: Some(IntInvertedIndexType {
538 enabled: true,
539 config: IntInvertedIndexConfig {},
540 }),
541 }),
542 boolean: Some(BoolValueType {
543 bool_inverted_index: Some(BoolInvertedIndexType {
544 enabled: true,
545 config: BoolInvertedIndexConfig {},
546 }),
547 }),
548 float_list: Some(FloatListValueType {
549 vector_index: Some(vector_config),
550 }),
551 sparse_vector: Some(SparseVectorValueType {
552 sparse_vector_index: Some(SparseVectorIndexType {
553 enabled: false,
554 config: SparseVectorIndexConfig {
555 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
556 source_key: None,
557 bm25: Some(false),
558 },
559 }),
560 }),
561 };
562
563 let mut keys = HashMap::new();
565
566 let embedding_defaults = ValueTypes {
568 float_list: Some(FloatListValueType {
569 vector_index: Some(VectorIndexType {
570 enabled: true,
571 config: VectorIndexConfig {
572 space: Some(default_space()),
573 embedding_function: None,
574 source_key: Some(DOCUMENT_KEY.to_string()),
575 hnsw: match default_knn_index {
576 KnnIndex::Hnsw => Some(HnswIndexConfig {
577 ef_construction: Some(default_construction_ef()),
578 max_neighbors: Some(default_m()),
579 ef_search: Some(default_search_ef()),
580 num_threads: Some(default_num_threads()),
581 batch_size: Some(default_batch_size()),
582 sync_threshold: Some(default_sync_threshold()),
583 resize_factor: Some(default_resize_factor()),
584 }),
585 KnnIndex::Spann => None,
586 },
587 spann: match default_knn_index {
588 KnnIndex::Hnsw => None,
589 KnnIndex::Spann => Some(SpannIndexConfig {
590 search_nprobe: Some(default_search_nprobe()),
591 search_rng_factor: Some(default_search_rng_factor()),
592 search_rng_epsilon: Some(default_search_rng_epsilon()),
593 nreplica_count: Some(default_nreplica_count()),
594 write_rng_factor: Some(default_write_rng_factor()),
595 write_rng_epsilon: Some(default_write_rng_epsilon()),
596 split_threshold: Some(default_split_threshold()),
597 num_samples_kmeans: Some(default_num_samples_kmeans()),
598 initial_lambda: Some(default_initial_lambda()),
599 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
600 merge_threshold: Some(default_merge_threshold()),
601 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
602 write_nprobe: Some(default_write_nprobe()),
603 ef_construction: Some(default_construction_ef_spann()),
604 ef_search: Some(default_search_ef_spann()),
605 max_neighbors: Some(default_m_spann()),
606 }),
607 },
608 },
609 }),
610 }),
611 ..Default::default()
612 };
613 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
614
615 let document_defaults = ValueTypes {
617 string: Some(StringValueType {
618 fts_index: Some(FtsIndexType {
619 enabled: true,
620 config: FtsIndexConfig {},
621 }),
622 string_inverted_index: Some(StringInvertedIndexType {
623 enabled: false,
624 config: StringInvertedIndexConfig {},
625 }),
626 }),
627 ..Default::default()
628 };
629 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
630
631 Schema { defaults, keys }
632 }
633
634 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
635 let to_internal = |vector_index: &VectorIndexType| {
636 let space = vector_index.config.space.clone();
637 vector_index
638 .config
639 .spann
640 .clone()
641 .map(|config| (space.as_ref(), &config).into())
642 };
643
644 self.keys
645 .get(EMBEDDING_KEY)
646 .and_then(|value_types| value_types.float_list.as_ref())
647 .and_then(|float_list| float_list.vector_index.as_ref())
648 .and_then(to_internal)
649 .or_else(|| {
650 self.defaults
651 .float_list
652 .as_ref()
653 .and_then(|float_list| float_list.vector_index.as_ref())
654 .and_then(to_internal)
655 })
656 }
657
658 pub fn reconcile_with_defaults(user_schema: Option<&Schema>) -> Result<Self, SchemaError> {
665 let default_schema = Schema::new_default(KnnIndex::Spann);
666
667 match user_schema {
668 Some(user) => {
669 let merged_defaults =
671 Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
672
673 let mut merged_keys = default_schema.keys.clone();
675 for (key, user_value_types) in &user.keys {
676 if let Some(default_value_types) = merged_keys.get(key) {
677 let merged_value_types =
679 Self::merge_value_types(default_value_types, user_value_types)?;
680 merged_keys.insert(key.clone(), merged_value_types);
681 } else {
682 merged_keys.insert(key.clone(), user_value_types.clone());
684 }
685 }
686
687 Ok(Schema {
688 defaults: merged_defaults,
689 keys: merged_keys,
690 })
691 }
692 None => Ok(default_schema),
693 }
694 }
695
696 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
698 if self.defaults != other.defaults {
699 return Err(SchemaError::DefaultsMismatch);
700 }
701
702 let mut keys = self.keys.clone();
703
704 for (key, other_value_types) in &other.keys {
705 if let Some(existing) = keys.get(key).cloned() {
706 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
707 keys.insert(key.clone(), merged);
708 } else {
709 keys.insert(key.clone(), other_value_types.clone());
710 }
711 }
712
713 Ok(Schema {
714 defaults: self.defaults.clone(),
715 keys,
716 })
717 }
718
719 fn merge_override_value_types(
720 key: &str,
721 left: &ValueTypes,
722 right: &ValueTypes,
723 ) -> Result<ValueTypes, SchemaError> {
724 Ok(ValueTypes {
725 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
726 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
727 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
728 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
729 float_list: Self::merge_float_list_override(
730 key,
731 left.float_list.as_ref(),
732 right.float_list.as_ref(),
733 )?,
734 sparse_vector: Self::merge_sparse_vector_override(
735 key,
736 left.sparse_vector.as_ref(),
737 right.sparse_vector.as_ref(),
738 )?,
739 })
740 }
741
742 fn merge_string_override(
743 key: &str,
744 left: Option<&StringValueType>,
745 right: Option<&StringValueType>,
746 ) -> Result<Option<StringValueType>, SchemaError> {
747 match (left, right) {
748 (Some(l), Some(r)) => Ok(Some(StringValueType {
749 string_inverted_index: Self::merge_index_or_error(
750 l.string_inverted_index.as_ref(),
751 r.string_inverted_index.as_ref(),
752 &format!("key '{key}' string.string_inverted_index"),
753 )?,
754 fts_index: Self::merge_index_or_error(
755 l.fts_index.as_ref(),
756 r.fts_index.as_ref(),
757 &format!("key '{key}' string.fts_index"),
758 )?,
759 })),
760 (Some(l), None) => Ok(Some(l.clone())),
761 (None, Some(r)) => Ok(Some(r.clone())),
762 (None, None) => Ok(None),
763 }
764 }
765
766 fn merge_float_override(
767 key: &str,
768 left: Option<&FloatValueType>,
769 right: Option<&FloatValueType>,
770 ) -> Result<Option<FloatValueType>, SchemaError> {
771 match (left, right) {
772 (Some(l), Some(r)) => Ok(Some(FloatValueType {
773 float_inverted_index: Self::merge_index_or_error(
774 l.float_inverted_index.as_ref(),
775 r.float_inverted_index.as_ref(),
776 &format!("key '{key}' float.float_inverted_index"),
777 )?,
778 })),
779 (Some(l), None) => Ok(Some(l.clone())),
780 (None, Some(r)) => Ok(Some(r.clone())),
781 (None, None) => Ok(None),
782 }
783 }
784
785 fn merge_int_override(
786 key: &str,
787 left: Option<&IntValueType>,
788 right: Option<&IntValueType>,
789 ) -> Result<Option<IntValueType>, SchemaError> {
790 match (left, right) {
791 (Some(l), Some(r)) => Ok(Some(IntValueType {
792 int_inverted_index: Self::merge_index_or_error(
793 l.int_inverted_index.as_ref(),
794 r.int_inverted_index.as_ref(),
795 &format!("key '{key}' int.int_inverted_index"),
796 )?,
797 })),
798 (Some(l), None) => Ok(Some(l.clone())),
799 (None, Some(r)) => Ok(Some(r.clone())),
800 (None, None) => Ok(None),
801 }
802 }
803
804 fn merge_bool_override(
805 key: &str,
806 left: Option<&BoolValueType>,
807 right: Option<&BoolValueType>,
808 ) -> Result<Option<BoolValueType>, SchemaError> {
809 match (left, right) {
810 (Some(l), Some(r)) => Ok(Some(BoolValueType {
811 bool_inverted_index: Self::merge_index_or_error(
812 l.bool_inverted_index.as_ref(),
813 r.bool_inverted_index.as_ref(),
814 &format!("key '{key}' bool.bool_inverted_index"),
815 )?,
816 })),
817 (Some(l), None) => Ok(Some(l.clone())),
818 (None, Some(r)) => Ok(Some(r.clone())),
819 (None, None) => Ok(None),
820 }
821 }
822
823 fn merge_float_list_override(
824 key: &str,
825 left: Option<&FloatListValueType>,
826 right: Option<&FloatListValueType>,
827 ) -> Result<Option<FloatListValueType>, SchemaError> {
828 match (left, right) {
829 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
830 vector_index: Self::merge_index_or_error(
831 l.vector_index.as_ref(),
832 r.vector_index.as_ref(),
833 &format!("key '{key}' float_list.vector_index"),
834 )?,
835 })),
836 (Some(l), None) => Ok(Some(l.clone())),
837 (None, Some(r)) => Ok(Some(r.clone())),
838 (None, None) => Ok(None),
839 }
840 }
841
842 fn merge_sparse_vector_override(
843 key: &str,
844 left: Option<&SparseVectorValueType>,
845 right: Option<&SparseVectorValueType>,
846 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
847 match (left, right) {
848 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
849 sparse_vector_index: Self::merge_index_or_error(
850 l.sparse_vector_index.as_ref(),
851 r.sparse_vector_index.as_ref(),
852 &format!("key '{key}' sparse_vector.sparse_vector_index"),
853 )?,
854 })),
855 (Some(l), None) => Ok(Some(l.clone())),
856 (None, Some(r)) => Ok(Some(r.clone())),
857 (None, None) => Ok(None),
858 }
859 }
860
861 fn merge_index_or_error<T: Clone + PartialEq>(
862 left: Option<&T>,
863 right: Option<&T>,
864 context: &str,
865 ) -> Result<Option<T>, SchemaError> {
866 match (left, right) {
867 (Some(l), Some(r)) => {
868 if l == r {
869 Ok(Some(l.clone()))
870 } else {
871 Err(SchemaError::ConfigurationConflict {
872 context: context.to_string(),
873 })
874 }
875 }
876 (Some(l), None) => Ok(Some(l.clone())),
877 (None, Some(r)) => Ok(Some(r.clone())),
878 (None, None) => Ok(None),
879 }
880 }
881
882 fn merge_value_types(
885 default: &ValueTypes,
886 user: &ValueTypes,
887 ) -> Result<ValueTypes, SchemaError> {
888 let float_list =
890 Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
891
892 if let Some(ref fl) = float_list {
894 Self::validate_float_list_value_type(fl)?;
895 }
896
897 Ok(ValueTypes {
898 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
899 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
900 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
901 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
902 float_list,
903 sparse_vector: Self::merge_sparse_vector_type(
904 default.sparse_vector.as_ref(),
905 user.sparse_vector.as_ref(),
906 )?,
907 })
908 }
909
910 fn merge_string_type(
912 default: Option<&StringValueType>,
913 user: Option<&StringValueType>,
914 ) -> Result<Option<StringValueType>, SchemaError> {
915 match (default, user) {
916 (Some(default), Some(user)) => Ok(Some(StringValueType {
917 string_inverted_index: Self::merge_string_inverted_index_type(
918 default.string_inverted_index.as_ref(),
919 user.string_inverted_index.as_ref(),
920 )?,
921 fts_index: Self::merge_fts_index_type(
922 default.fts_index.as_ref(),
923 user.fts_index.as_ref(),
924 )?,
925 })),
926 (Some(default), None) => Ok(Some(default.clone())),
927 (None, Some(user)) => Ok(Some(user.clone())),
928 (None, None) => Ok(None),
929 }
930 }
931
932 fn merge_float_type(
934 default: Option<&FloatValueType>,
935 user: Option<&FloatValueType>,
936 ) -> Result<Option<FloatValueType>, SchemaError> {
937 match (default, user) {
938 (Some(default), Some(user)) => Ok(Some(FloatValueType {
939 float_inverted_index: Self::merge_float_inverted_index_type(
940 default.float_inverted_index.as_ref(),
941 user.float_inverted_index.as_ref(),
942 )?,
943 })),
944 (Some(default), None) => Ok(Some(default.clone())),
945 (None, Some(user)) => Ok(Some(user.clone())),
946 (None, None) => Ok(None),
947 }
948 }
949
950 fn merge_int_type(
952 default: Option<&IntValueType>,
953 user: Option<&IntValueType>,
954 ) -> Result<Option<IntValueType>, SchemaError> {
955 match (default, user) {
956 (Some(default), Some(user)) => Ok(Some(IntValueType {
957 int_inverted_index: Self::merge_int_inverted_index_type(
958 default.int_inverted_index.as_ref(),
959 user.int_inverted_index.as_ref(),
960 )?,
961 })),
962 (Some(default), None) => Ok(Some(default.clone())),
963 (None, Some(user)) => Ok(Some(user.clone())),
964 (None, None) => Ok(None),
965 }
966 }
967
968 fn merge_bool_type(
970 default: Option<&BoolValueType>,
971 user: Option<&BoolValueType>,
972 ) -> Result<Option<BoolValueType>, SchemaError> {
973 match (default, user) {
974 (Some(default), Some(user)) => Ok(Some(BoolValueType {
975 bool_inverted_index: Self::merge_bool_inverted_index_type(
976 default.bool_inverted_index.as_ref(),
977 user.bool_inverted_index.as_ref(),
978 )?,
979 })),
980 (Some(default), None) => Ok(Some(default.clone())),
981 (None, Some(user)) => Ok(Some(user.clone())),
982 (None, None) => Ok(None),
983 }
984 }
985
986 fn merge_float_list_type(
988 default: Option<&FloatListValueType>,
989 user: Option<&FloatListValueType>,
990 ) -> Option<FloatListValueType> {
991 match (default, user) {
992 (Some(default), Some(user)) => Some(FloatListValueType {
993 vector_index: Self::merge_vector_index_type(
994 default.vector_index.as_ref(),
995 user.vector_index.as_ref(),
996 ),
997 }),
998 (Some(default), None) => Some(default.clone()),
999 (None, Some(user)) => Some(user.clone()),
1000 (None, None) => None,
1001 }
1002 }
1003
1004 fn merge_sparse_vector_type(
1006 default: Option<&SparseVectorValueType>,
1007 user: Option<&SparseVectorValueType>,
1008 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1009 match (default, user) {
1010 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1011 sparse_vector_index: Self::merge_sparse_vector_index_type(
1012 default.sparse_vector_index.as_ref(),
1013 user.sparse_vector_index.as_ref(),
1014 )?,
1015 })),
1016 (Some(default), None) => Ok(Some(default.clone())),
1017 (None, Some(user)) => Ok(Some(user.clone())),
1018 (None, None) => Ok(None),
1019 }
1020 }
1021
1022 fn merge_string_inverted_index_type(
1024 default: Option<&StringInvertedIndexType>,
1025 user: Option<&StringInvertedIndexType>,
1026 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1027 match (default, user) {
1028 (Some(_default), Some(user)) => {
1029 Ok(Some(StringInvertedIndexType {
1030 enabled: user.enabled, config: user.config.clone(), }))
1033 }
1034 (Some(default), None) => Ok(Some(default.clone())),
1035 (None, Some(user)) => Ok(Some(user.clone())),
1036 (None, None) => Ok(None),
1037 }
1038 }
1039
1040 fn merge_fts_index_type(
1041 default: Option<&FtsIndexType>,
1042 user: Option<&FtsIndexType>,
1043 ) -> Result<Option<FtsIndexType>, SchemaError> {
1044 match (default, user) {
1045 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1046 enabled: user.enabled,
1047 config: user.config.clone(),
1048 })),
1049 (Some(default), None) => Ok(Some(default.clone())),
1050 (None, Some(user)) => Ok(Some(user.clone())),
1051 (None, None) => Ok(None),
1052 }
1053 }
1054
1055 fn merge_float_inverted_index_type(
1056 default: Option<&FloatInvertedIndexType>,
1057 user: Option<&FloatInvertedIndexType>,
1058 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1059 match (default, user) {
1060 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1061 enabled: user.enabled,
1062 config: user.config.clone(),
1063 })),
1064 (Some(default), None) => Ok(Some(default.clone())),
1065 (None, Some(user)) => Ok(Some(user.clone())),
1066 (None, None) => Ok(None),
1067 }
1068 }
1069
1070 fn merge_int_inverted_index_type(
1071 default: Option<&IntInvertedIndexType>,
1072 user: Option<&IntInvertedIndexType>,
1073 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1074 match (default, user) {
1075 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1076 enabled: user.enabled,
1077 config: user.config.clone(),
1078 })),
1079 (Some(default), None) => Ok(Some(default.clone())),
1080 (None, Some(user)) => Ok(Some(user.clone())),
1081 (None, None) => Ok(None),
1082 }
1083 }
1084
1085 fn merge_bool_inverted_index_type(
1086 default: Option<&BoolInvertedIndexType>,
1087 user: Option<&BoolInvertedIndexType>,
1088 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1089 match (default, user) {
1090 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1091 enabled: user.enabled,
1092 config: user.config.clone(),
1093 })),
1094 (Some(default), None) => Ok(Some(default.clone())),
1095 (None, Some(user)) => Ok(Some(user.clone())),
1096 (None, None) => Ok(None),
1097 }
1098 }
1099
1100 fn merge_vector_index_type(
1101 default: Option<&VectorIndexType>,
1102 user: Option<&VectorIndexType>,
1103 ) -> Option<VectorIndexType> {
1104 match (default, user) {
1105 (Some(default), Some(user)) => Some(VectorIndexType {
1106 enabled: user.enabled,
1107 config: Self::merge_vector_index_config(&default.config, &user.config),
1108 }),
1109 (Some(default), None) => Some(default.clone()),
1110 (None, Some(user)) => Some(user.clone()),
1111 (None, None) => None,
1112 }
1113 }
1114
1115 fn merge_sparse_vector_index_type(
1116 default: Option<&SparseVectorIndexType>,
1117 user: Option<&SparseVectorIndexType>,
1118 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1119 match (default, user) {
1120 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1121 enabled: user.enabled,
1122 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1123 })),
1124 (Some(default), None) => Ok(Some(default.clone())),
1125 (None, Some(user)) => Ok(Some(user.clone())),
1126 (None, None) => Ok(None),
1127 }
1128 }
1129
1130 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1133 if let Some(vector_index) = &float_list.vector_index {
1134 if let Some(hnsw) = &vector_index.config.hnsw {
1135 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1136 }
1137 if let Some(spann) = &vector_index.config.spann {
1138 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1139 }
1140 }
1141 Ok(())
1142 }
1143
1144 fn merge_vector_index_config(
1146 default: &VectorIndexConfig,
1147 user: &VectorIndexConfig,
1148 ) -> VectorIndexConfig {
1149 VectorIndexConfig {
1150 space: user.space.clone().or(default.space.clone()),
1151 embedding_function: user
1152 .embedding_function
1153 .clone()
1154 .or(default.embedding_function.clone()),
1155 source_key: user.source_key.clone().or(default.source_key.clone()),
1156 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1157 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1158 }
1159 }
1160
1161 fn merge_sparse_vector_index_config(
1163 default: &SparseVectorIndexConfig,
1164 user: &SparseVectorIndexConfig,
1165 ) -> SparseVectorIndexConfig {
1166 SparseVectorIndexConfig {
1167 embedding_function: user
1168 .embedding_function
1169 .clone()
1170 .or(default.embedding_function.clone()),
1171 source_key: user.source_key.clone().or(default.source_key.clone()),
1172 bm25: user.bm25.or(default.bm25),
1173 }
1174 }
1175
1176 fn merge_hnsw_configs(
1178 default_hnsw: Option<&HnswIndexConfig>,
1179 user_hnsw: Option<&HnswIndexConfig>,
1180 ) -> Option<HnswIndexConfig> {
1181 match (default_hnsw, user_hnsw) {
1182 (Some(default), Some(user)) => Some(HnswIndexConfig {
1183 ef_construction: user.ef_construction.or(default.ef_construction),
1184 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1185 ef_search: user.ef_search.or(default.ef_search),
1186 num_threads: user.num_threads.or(default.num_threads),
1187 batch_size: user.batch_size.or(default.batch_size),
1188 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1189 resize_factor: user.resize_factor.or(default.resize_factor),
1190 }),
1191 (Some(default), None) => Some(default.clone()),
1192 (None, Some(user)) => Some(user.clone()),
1193 (None, None) => None,
1194 }
1195 }
1196
1197 fn merge_spann_configs(
1199 default_spann: Option<&SpannIndexConfig>,
1200 user_spann: Option<&SpannIndexConfig>,
1201 ) -> Option<SpannIndexConfig> {
1202 match (default_spann, user_spann) {
1203 (Some(default), Some(user)) => Some(SpannIndexConfig {
1204 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1205 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1206 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1207 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1208 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1209 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1210 split_threshold: user.split_threshold.or(default.split_threshold),
1211 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1212 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1213 reassign_neighbor_count: user
1214 .reassign_neighbor_count
1215 .or(default.reassign_neighbor_count),
1216 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1217 num_centers_to_merge_to: user
1218 .num_centers_to_merge_to
1219 .or(default.num_centers_to_merge_to),
1220 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1221 ef_construction: user.ef_construction.or(default.ef_construction),
1222 ef_search: user.ef_search.or(default.ef_search),
1223 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1224 }),
1225 (Some(default), None) => Some(default.clone()),
1226 (None, Some(user)) => Some(user.clone()),
1227 (None, None) => None,
1228 }
1229 }
1230
1231 pub fn reconcile_with_collection_config(
1239 schema: &Schema,
1240 collection_config: &InternalCollectionConfiguration,
1241 ) -> Result<Schema, SchemaError> {
1242 if collection_config.is_default() {
1244 return Ok(schema.clone());
1246 }
1247
1248 Self::convert_collection_config_to_schema(collection_config)
1251 }
1252
1253 pub fn reconcile_schema_and_config(
1254 schema: Option<&Schema>,
1255 configuration: Option<&InternalCollectionConfiguration>,
1256 ) -> Result<Schema, SchemaError> {
1257 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1259 if !user_schema.is_default() && !config.is_default() {
1260 return Err(SchemaError::ConfigAndSchemaConflict);
1261 }
1262 }
1263
1264 let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1265 if let Some(config) = configuration {
1266 Self::reconcile_with_collection_config(&reconciled_schema, config)
1267 } else {
1268 Ok(reconciled_schema)
1269 }
1270 }
1271
1272 pub fn default_with_embedding_function(
1273 embedding_function: EmbeddingFunctionConfiguration,
1274 ) -> Schema {
1275 let mut schema = Schema::new_default(KnnIndex::Spann);
1276 if let Some(float_list) = &mut schema.defaults.float_list {
1277 if let Some(vector_index) = &mut float_list.vector_index {
1278 vector_index.config.embedding_function = Some(embedding_function.clone());
1279 }
1280 }
1281 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1282 if let Some(float_list) = &mut embedding_types.float_list {
1283 if let Some(vector_index) = &mut float_list.vector_index {
1284 vector_index.config.embedding_function = Some(embedding_function);
1285 }
1286 }
1287 }
1288 schema
1289 }
1290
1291 pub fn is_default(&self) -> bool {
1293 if !Self::is_value_types_default(&self.defaults) {
1295 return false;
1296 }
1297
1298 for key in self.keys.keys() {
1299 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1300 return false;
1301 }
1302 }
1303
1304 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1306 if !Self::is_embedding_value_types_default(embedding_value) {
1307 return false;
1308 }
1309 }
1310
1311 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1313 if !Self::is_document_value_types_default(document_value) {
1314 return false;
1315 }
1316 }
1317
1318 true
1319 }
1320
1321 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1323 if let Some(string) = &value_types.string {
1325 if let Some(string_inverted) = &string.string_inverted_index {
1326 if !string_inverted.enabled {
1327 return false;
1328 }
1329 }
1331 if let Some(fts) = &string.fts_index {
1332 if fts.enabled {
1333 return false;
1334 }
1335 }
1337 }
1338
1339 if let Some(float) = &value_types.float {
1341 if let Some(float_inverted) = &float.float_inverted_index {
1342 if !float_inverted.enabled {
1343 return false;
1344 }
1345 }
1347 }
1348
1349 if let Some(int) = &value_types.int {
1351 if let Some(int_inverted) = &int.int_inverted_index {
1352 if !int_inverted.enabled {
1353 return false;
1354 }
1355 }
1357 }
1358
1359 if let Some(boolean) = &value_types.boolean {
1361 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1362 if !bool_inverted.enabled {
1363 return false;
1364 }
1365 }
1367 }
1368
1369 if let Some(float_list) = &value_types.float_list {
1371 if let Some(vector_index) = &float_list.vector_index {
1372 if vector_index.enabled {
1373 return false;
1374 }
1375 if vector_index.config.source_key.is_some() {
1378 return false;
1379 }
1380 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1383 (Some(hnsw_config), None) => {
1384 if !hnsw_config.is_default() {
1385 return false;
1386 }
1387 }
1388 (None, Some(spann_config)) => {
1389 if !spann_config.is_default() {
1390 return false;
1391 }
1392 }
1393 (Some(_), Some(_)) => return false, (None, None) => {}
1395 }
1396 }
1397 }
1398
1399 if let Some(sparse_vector) = &value_types.sparse_vector {
1401 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1402 if sparse_index.enabled {
1403 return false;
1404 }
1405 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1407 return false;
1408 }
1409 if sparse_index.config.source_key.is_some() {
1410 return false;
1411 }
1412 if let Some(bm25) = &sparse_index.config.bm25 {
1413 if bm25 != &false {
1414 return false;
1415 }
1416 }
1417 }
1418 }
1419
1420 true
1421 }
1422
1423 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1425 if value_types.string.is_some()
1427 || value_types.float.is_some()
1428 || value_types.int.is_some()
1429 || value_types.boolean.is_some()
1430 || value_types.sparse_vector.is_some()
1431 {
1432 return false;
1433 }
1434
1435 if let Some(float_list) = &value_types.float_list {
1437 if let Some(vector_index) = &float_list.vector_index {
1438 if !vector_index.enabled {
1439 return false;
1440 }
1441 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1443 return false;
1444 }
1445 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1447 return false;
1448 }
1449 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1452 (Some(hnsw_config), None) => {
1453 if !hnsw_config.is_default() {
1454 return false;
1455 }
1456 }
1457 (None, Some(spann_config)) => {
1458 if !spann_config.is_default() {
1459 return false;
1460 }
1461 }
1462 (Some(_), Some(_)) => return false, (None, None) => {}
1464 }
1465 }
1466 }
1467
1468 true
1469 }
1470
1471 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1473 if value_types.float_list.is_some()
1475 || value_types.float.is_some()
1476 || value_types.int.is_some()
1477 || value_types.boolean.is_some()
1478 || value_types.sparse_vector.is_some()
1479 {
1480 return false;
1481 }
1482
1483 if let Some(string) = &value_types.string {
1485 if let Some(fts) = &string.fts_index {
1486 if !fts.enabled {
1487 return false;
1488 }
1489 }
1491 if let Some(string_inverted) = &string.string_inverted_index {
1492 if string_inverted.enabled {
1493 return false;
1494 }
1495 }
1497 }
1498
1499 true
1500 }
1501
1502 fn convert_collection_config_to_schema(
1504 collection_config: &InternalCollectionConfiguration,
1505 ) -> Result<Schema, SchemaError> {
1506 let mut schema = Schema::new_default(KnnIndex::Spann); let vector_config = match &collection_config.vector_index {
1511 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1512 space: Some(hnsw_config.space.clone()),
1513 embedding_function: collection_config.embedding_function.clone(),
1514 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: Some(HnswIndexConfig {
1516 ef_construction: Some(hnsw_config.ef_construction),
1517 max_neighbors: Some(hnsw_config.max_neighbors),
1518 ef_search: Some(hnsw_config.ef_search),
1519 num_threads: Some(hnsw_config.num_threads),
1520 batch_size: Some(hnsw_config.batch_size),
1521 sync_threshold: Some(hnsw_config.sync_threshold),
1522 resize_factor: Some(hnsw_config.resize_factor),
1523 }),
1524 spann: None,
1525 },
1526 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1527 space: Some(spann_config.space.clone()),
1528 embedding_function: collection_config.embedding_function.clone(),
1529 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: None,
1531 spann: Some(SpannIndexConfig {
1532 search_nprobe: Some(spann_config.search_nprobe),
1533 search_rng_factor: Some(spann_config.search_rng_factor),
1534 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1535 nreplica_count: Some(spann_config.nreplica_count),
1536 write_rng_factor: Some(spann_config.write_rng_factor),
1537 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1538 split_threshold: Some(spann_config.split_threshold),
1539 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1540 initial_lambda: Some(spann_config.initial_lambda),
1541 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1542 merge_threshold: Some(spann_config.merge_threshold),
1543 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1544 write_nprobe: Some(spann_config.write_nprobe),
1545 ef_construction: Some(spann_config.ef_construction),
1546 ef_search: Some(spann_config.ef_search),
1547 max_neighbors: Some(spann_config.max_neighbors),
1548 }),
1549 },
1550 };
1551
1552 if let Some(float_list) = &mut schema.defaults.float_list {
1555 if let Some(vector_index) = &mut float_list.vector_index {
1556 vector_index.config = vector_config.clone();
1557 }
1558 }
1559
1560 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1563 if let Some(float_list) = &mut embedding_types.float_list {
1564 if let Some(vector_index) = &mut float_list.vector_index {
1565 vector_index.config = vector_config;
1566 }
1567 }
1568 }
1569
1570 Ok(schema)
1571 }
1572
1573 pub fn is_metadata_type_index_enabled(
1575 &self,
1576 key: &str,
1577 value_type: MetadataValueType,
1578 ) -> Result<bool, SchemaError> {
1579 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1580
1581 match value_type {
1582 MetadataValueType::Bool => match &v_type.boolean {
1583 Some(bool_type) => match &bool_type.bool_inverted_index {
1584 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1585 None => Err(SchemaError::MissingIndexConfiguration {
1586 key: key.to_string(),
1587 value_type: "bool".to_string(),
1588 }),
1589 },
1590 None => match &self.defaults.boolean {
1591 Some(bool_type) => match &bool_type.bool_inverted_index {
1592 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1593 None => Err(SchemaError::MissingIndexConfiguration {
1594 key: key.to_string(),
1595 value_type: "bool".to_string(),
1596 }),
1597 },
1598 None => Err(SchemaError::MissingIndexConfiguration {
1599 key: key.to_string(),
1600 value_type: "bool".to_string(),
1601 }),
1602 },
1603 },
1604 MetadataValueType::Int => match &v_type.int {
1605 Some(int_type) => match &int_type.int_inverted_index {
1606 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1607 None => Err(SchemaError::MissingIndexConfiguration {
1608 key: key.to_string(),
1609 value_type: "int".to_string(),
1610 }),
1611 },
1612 None => match &self.defaults.int {
1613 Some(int_type) => match &int_type.int_inverted_index {
1614 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1615 None => Err(SchemaError::MissingIndexConfiguration {
1616 key: key.to_string(),
1617 value_type: "int".to_string(),
1618 }),
1619 },
1620 None => Err(SchemaError::MissingIndexConfiguration {
1621 key: key.to_string(),
1622 value_type: "int".to_string(),
1623 }),
1624 },
1625 },
1626 MetadataValueType::Float => match &v_type.float {
1627 Some(float_type) => match &float_type.float_inverted_index {
1628 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1629 None => Err(SchemaError::MissingIndexConfiguration {
1630 key: key.to_string(),
1631 value_type: "float".to_string(),
1632 }),
1633 },
1634 None => match &self.defaults.float {
1635 Some(float_type) => match &float_type.float_inverted_index {
1636 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1637 None => Err(SchemaError::MissingIndexConfiguration {
1638 key: key.to_string(),
1639 value_type: "float".to_string(),
1640 }),
1641 },
1642 None => Err(SchemaError::MissingIndexConfiguration {
1643 key: key.to_string(),
1644 value_type: "float".to_string(),
1645 }),
1646 },
1647 },
1648 MetadataValueType::Str => match &v_type.string {
1649 Some(string_type) => match &string_type.string_inverted_index {
1650 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1651 None => Err(SchemaError::MissingIndexConfiguration {
1652 key: key.to_string(),
1653 value_type: "string".to_string(),
1654 }),
1655 },
1656 None => match &self.defaults.string {
1657 Some(string_type) => match &string_type.string_inverted_index {
1658 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1659 None => Err(SchemaError::MissingIndexConfiguration {
1660 key: key.to_string(),
1661 value_type: "string".to_string(),
1662 }),
1663 },
1664 None => Err(SchemaError::MissingIndexConfiguration {
1665 key: key.to_string(),
1666 value_type: "string".to_string(),
1667 }),
1668 },
1669 },
1670 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1671 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1672 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1673 None => Err(SchemaError::MissingIndexConfiguration {
1674 key: key.to_string(),
1675 value_type: "sparse_vector".to_string(),
1676 }),
1677 },
1678 None => match &self.defaults.sparse_vector {
1679 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1680 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1681 None => Err(SchemaError::MissingIndexConfiguration {
1682 key: key.to_string(),
1683 value_type: "sparse_vector".to_string(),
1684 }),
1685 },
1686 None => Err(SchemaError::MissingIndexConfiguration {
1687 key: key.to_string(),
1688 value_type: "sparse_vector".to_string(),
1689 }),
1690 },
1691 },
1692 }
1693 }
1694
1695 pub fn is_metadata_where_indexing_enabled(
1696 &self,
1697 where_clause: &Where,
1698 ) -> Result<(), FilterValidationError> {
1699 match where_clause {
1700 Where::Composite(composite) => {
1701 for child in &composite.children {
1702 self.is_metadata_where_indexing_enabled(child)?;
1703 }
1704 Ok(())
1705 }
1706 Where::Document(_) => Ok(()),
1707 Where::Metadata(expression) => {
1708 let value_type = match &expression.comparison {
1709 MetadataComparison::Primitive(_, value) => value.value_type(),
1710 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1711 };
1712 let is_enabled = self
1713 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1714 .map_err(FilterValidationError::Schema)?;
1715 if !is_enabled {
1716 return Err(FilterValidationError::IndexingDisabled {
1717 key: expression.key.clone(),
1718 value_type,
1719 });
1720 }
1721 Ok(())
1722 }
1723 }
1724 }
1725
1726 pub fn is_knn_key_indexing_enabled(
1727 &self,
1728 key: &str,
1729 query: &QueryVector,
1730 ) -> Result<(), FilterValidationError> {
1731 match query {
1732 QueryVector::Sparse(_) => {
1733 let is_enabled = self
1734 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1735 .map_err(FilterValidationError::Schema)?;
1736 if !is_enabled {
1737 return Err(FilterValidationError::IndexingDisabled {
1738 key: key.to_string(),
1739 value_type: MetadataValueType::SparseVector,
1740 });
1741 }
1742 Ok(())
1743 }
1744 QueryVector::Dense(_) => {
1745 Ok(())
1748 }
1749 }
1750 }
1751
1752 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1753 let value_types = self.keys.entry(key.to_string()).or_default();
1754 match value_type {
1755 MetadataValueType::Bool => {
1756 if value_types.boolean.is_none() {
1757 value_types.boolean = self.defaults.boolean.clone();
1758 return true;
1759 }
1760 }
1761 MetadataValueType::Int => {
1762 if value_types.int.is_none() {
1763 value_types.int = self.defaults.int.clone();
1764 return true;
1765 }
1766 }
1767 MetadataValueType::Float => {
1768 if value_types.float.is_none() {
1769 value_types.float = self.defaults.float.clone();
1770 return true;
1771 }
1772 }
1773 MetadataValueType::Str => {
1774 if value_types.string.is_none() {
1775 value_types.string = self.defaults.string.clone();
1776 return true;
1777 }
1778 }
1779 MetadataValueType::SparseVector => {
1780 if value_types.sparse_vector.is_none() {
1781 value_types.sparse_vector = self.defaults.sparse_vector.clone();
1782 return true;
1783 }
1784 }
1785 }
1786 false
1787 }
1788
1789 pub fn create_index(
1829 mut self,
1830 key: Option<&str>,
1831 config: IndexConfig,
1832 ) -> Result<Self, SchemaBuilderError> {
1833 match (&key, &config) {
1835 (None, IndexConfig::Vector(cfg)) => {
1836 self._set_vector_index_config_builder(cfg.clone());
1837 return Ok(self);
1838 }
1839 (None, IndexConfig::Fts(cfg)) => {
1840 self._set_fts_index_config_builder(cfg.clone());
1841 return Ok(self);
1842 }
1843 (Some(k), IndexConfig::Vector(_)) => {
1844 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
1845 }
1846 (Some(k), IndexConfig::Fts(_)) => {
1847 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
1848 }
1849 _ => {}
1850 }
1851
1852 if let Some(k) = key {
1854 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1855 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1856 key: k.to_string(),
1857 });
1858 }
1859 }
1860
1861 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
1863 return Err(SchemaBuilderError::SparseVectorRequiresKey);
1864 }
1865
1866 match key {
1868 Some(k) => self._set_index_for_key_builder(k, config, true)?,
1869 None => self._set_index_in_defaults_builder(config, true)?,
1870 }
1871
1872 Ok(self)
1873 }
1874
1875 pub fn delete_index(
1903 mut self,
1904 key: Option<&str>,
1905 config: IndexConfig,
1906 ) -> Result<Self, SchemaBuilderError> {
1907 if let Some(k) = key {
1909 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
1910 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
1911 key: k.to_string(),
1912 });
1913 }
1914 }
1915
1916 match &config {
1918 IndexConfig::Vector(_) => {
1919 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
1920 }
1921 IndexConfig::Fts(_) => {
1922 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
1923 }
1924 IndexConfig::SparseVector(_) => {
1925 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
1926 }
1927 _ => {}
1928 }
1929
1930 match key {
1932 Some(k) => self._set_index_for_key_builder(k, config, false)?,
1933 None => self._set_index_in_defaults_builder(config, false)?,
1934 }
1935
1936 Ok(self)
1937 }
1938
1939 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
1941 if let Some(float_list) = &mut self.defaults.float_list {
1943 if let Some(vector_index) = &mut float_list.vector_index {
1944 vector_index.config = config.clone();
1945 }
1946 }
1947
1948 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
1950 if let Some(float_list) = &mut embedding_types.float_list {
1951 if let Some(vector_index) = &mut float_list.vector_index {
1952 let mut updated_config = config;
1953 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
1955 vector_index.config = updated_config;
1956 }
1957 }
1958 }
1959 }
1960
1961 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
1963 if let Some(string) = &mut self.defaults.string {
1965 if let Some(fts_index) = &mut string.fts_index {
1966 fts_index.config = config.clone();
1967 }
1968 }
1969
1970 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
1972 if let Some(string) = &mut document_types.string {
1973 if let Some(fts_index) = &mut string.fts_index {
1974 fts_index.config = config;
1975 }
1976 }
1977 }
1978 }
1979
1980 fn _set_index_for_key_builder(
1982 &mut self,
1983 key: &str,
1984 config: IndexConfig,
1985 enabled: bool,
1986 ) -> Result<(), SchemaBuilderError> {
1987 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
1989 let existing_key = self
1991 .keys
1992 .iter()
1993 .find(|(k, v)| {
1994 k.as_str() != key
1995 && v.sparse_vector
1996 .as_ref()
1997 .and_then(|sv| sv.sparse_vector_index.as_ref())
1998 .map(|idx| idx.enabled)
1999 .unwrap_or(false)
2000 })
2001 .map(|(k, _)| k.clone());
2002
2003 if let Some(existing_key) = existing_key {
2004 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2005 }
2006 }
2007
2008 let value_types = self.keys.entry(key.to_string()).or_default();
2010
2011 match config {
2013 IndexConfig::Vector(_) => {
2014 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2015 key: key.to_string(),
2016 });
2017 }
2018 IndexConfig::Fts(_) => {
2019 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2020 key: key.to_string(),
2021 });
2022 }
2023 IndexConfig::SparseVector(cfg) => {
2024 value_types.sparse_vector = Some(SparseVectorValueType {
2025 sparse_vector_index: Some(SparseVectorIndexType {
2026 enabled,
2027 config: cfg,
2028 }),
2029 });
2030 }
2031 IndexConfig::StringInverted(cfg) => {
2032 if value_types.string.is_none() {
2033 value_types.string = Some(StringValueType {
2034 fts_index: None,
2035 string_inverted_index: None,
2036 });
2037 }
2038 if let Some(string) = &mut value_types.string {
2039 string.string_inverted_index = Some(StringInvertedIndexType {
2040 enabled,
2041 config: cfg,
2042 });
2043 }
2044 }
2045 IndexConfig::IntInverted(cfg) => {
2046 value_types.int = Some(IntValueType {
2047 int_inverted_index: Some(IntInvertedIndexType {
2048 enabled,
2049 config: cfg,
2050 }),
2051 });
2052 }
2053 IndexConfig::FloatInverted(cfg) => {
2054 value_types.float = Some(FloatValueType {
2055 float_inverted_index: Some(FloatInvertedIndexType {
2056 enabled,
2057 config: cfg,
2058 }),
2059 });
2060 }
2061 IndexConfig::BoolInverted(cfg) => {
2062 value_types.boolean = Some(BoolValueType {
2063 bool_inverted_index: Some(BoolInvertedIndexType {
2064 enabled,
2065 config: cfg,
2066 }),
2067 });
2068 }
2069 }
2070
2071 Ok(())
2072 }
2073
2074 fn _set_index_in_defaults_builder(
2076 &mut self,
2077 config: IndexConfig,
2078 enabled: bool,
2079 ) -> Result<(), SchemaBuilderError> {
2080 match config {
2081 IndexConfig::Vector(_) => {
2082 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2083 key: "defaults".to_string(),
2084 });
2085 }
2086 IndexConfig::Fts(_) => {
2087 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2088 key: "defaults".to_string(),
2089 });
2090 }
2091 IndexConfig::SparseVector(cfg) => {
2092 self.defaults.sparse_vector = Some(SparseVectorValueType {
2093 sparse_vector_index: Some(SparseVectorIndexType {
2094 enabled,
2095 config: cfg,
2096 }),
2097 });
2098 }
2099 IndexConfig::StringInverted(cfg) => {
2100 if self.defaults.string.is_none() {
2101 self.defaults.string = Some(StringValueType {
2102 fts_index: None,
2103 string_inverted_index: None,
2104 });
2105 }
2106 if let Some(string) = &mut self.defaults.string {
2107 string.string_inverted_index = Some(StringInvertedIndexType {
2108 enabled,
2109 config: cfg,
2110 });
2111 }
2112 }
2113 IndexConfig::IntInverted(cfg) => {
2114 self.defaults.int = Some(IntValueType {
2115 int_inverted_index: Some(IntInvertedIndexType {
2116 enabled,
2117 config: cfg,
2118 }),
2119 });
2120 }
2121 IndexConfig::FloatInverted(cfg) => {
2122 self.defaults.float = Some(FloatValueType {
2123 float_inverted_index: Some(FloatInvertedIndexType {
2124 enabled,
2125 config: cfg,
2126 }),
2127 });
2128 }
2129 IndexConfig::BoolInverted(cfg) => {
2130 self.defaults.boolean = Some(BoolValueType {
2131 bool_inverted_index: Some(BoolInvertedIndexType {
2132 enabled,
2133 config: cfg,
2134 }),
2135 });
2136 }
2137 }
2138
2139 Ok(())
2140 }
2141}
2142
2143#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2148#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2149#[serde(deny_unknown_fields)]
2150pub struct VectorIndexConfig {
2151 #[serde(skip_serializing_if = "Option::is_none")]
2153 pub space: Option<Space>,
2154 #[serde(skip_serializing_if = "Option::is_none")]
2156 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2157 #[serde(skip_serializing_if = "Option::is_none")]
2159 pub source_key: Option<String>,
2160 #[serde(skip_serializing_if = "Option::is_none")]
2162 pub hnsw: Option<HnswIndexConfig>,
2163 #[serde(skip_serializing_if = "Option::is_none")]
2165 pub spann: Option<SpannIndexConfig>,
2166}
2167
2168#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2170#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2171#[serde(deny_unknown_fields)]
2172pub struct HnswIndexConfig {
2173 #[serde(skip_serializing_if = "Option::is_none")]
2174 pub ef_construction: Option<usize>,
2175 #[serde(skip_serializing_if = "Option::is_none")]
2176 pub max_neighbors: Option<usize>,
2177 #[serde(skip_serializing_if = "Option::is_none")]
2178 pub ef_search: Option<usize>,
2179 #[serde(skip_serializing_if = "Option::is_none")]
2180 pub num_threads: Option<usize>,
2181 #[serde(skip_serializing_if = "Option::is_none")]
2182 #[validate(range(min = 2))]
2183 pub batch_size: Option<usize>,
2184 #[serde(skip_serializing_if = "Option::is_none")]
2185 #[validate(range(min = 2))]
2186 pub sync_threshold: Option<usize>,
2187 #[serde(skip_serializing_if = "Option::is_none")]
2188 pub resize_factor: Option<f64>,
2189}
2190
2191impl HnswIndexConfig {
2192 pub fn is_default(&self) -> bool {
2196 if let Some(ef_construction) = self.ef_construction {
2197 if ef_construction != default_construction_ef() {
2198 return false;
2199 }
2200 }
2201 if let Some(max_neighbors) = self.max_neighbors {
2202 if max_neighbors != default_m() {
2203 return false;
2204 }
2205 }
2206 if let Some(ef_search) = self.ef_search {
2207 if ef_search != default_search_ef() {
2208 return false;
2209 }
2210 }
2211 if let Some(batch_size) = self.batch_size {
2212 if batch_size != default_batch_size() {
2213 return false;
2214 }
2215 }
2216 if let Some(sync_threshold) = self.sync_threshold {
2217 if sync_threshold != default_sync_threshold() {
2218 return false;
2219 }
2220 }
2221 if let Some(resize_factor) = self.resize_factor {
2222 if resize_factor != default_resize_factor() {
2223 return false;
2224 }
2225 }
2226 true
2228 }
2229}
2230
2231#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2233#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2234#[serde(deny_unknown_fields)]
2235pub struct SpannIndexConfig {
2236 #[serde(skip_serializing_if = "Option::is_none")]
2237 #[validate(range(max = 128))]
2238 pub search_nprobe: Option<u32>,
2239 #[serde(skip_serializing_if = "Option::is_none")]
2240 #[validate(range(min = 1.0, max = 1.0))]
2241 pub search_rng_factor: Option<f32>,
2242 #[serde(skip_serializing_if = "Option::is_none")]
2243 #[validate(range(min = 5.0, max = 10.0))]
2244 pub search_rng_epsilon: Option<f32>,
2245 #[serde(skip_serializing_if = "Option::is_none")]
2246 #[validate(range(max = 8))]
2247 pub nreplica_count: Option<u32>,
2248 #[serde(skip_serializing_if = "Option::is_none")]
2249 #[validate(range(min = 1.0, max = 1.0))]
2250 pub write_rng_factor: Option<f32>,
2251 #[serde(skip_serializing_if = "Option::is_none")]
2252 #[validate(range(min = 5.0, max = 10.0))]
2253 pub write_rng_epsilon: Option<f32>,
2254 #[serde(skip_serializing_if = "Option::is_none")]
2255 #[validate(range(min = 50, max = 200))]
2256 pub split_threshold: Option<u32>,
2257 #[serde(skip_serializing_if = "Option::is_none")]
2258 #[validate(range(max = 1000))]
2259 pub num_samples_kmeans: Option<usize>,
2260 #[serde(skip_serializing_if = "Option::is_none")]
2261 #[validate(range(min = 100.0, max = 100.0))]
2262 pub initial_lambda: Option<f32>,
2263 #[serde(skip_serializing_if = "Option::is_none")]
2264 #[validate(range(max = 64))]
2265 pub reassign_neighbor_count: Option<u32>,
2266 #[serde(skip_serializing_if = "Option::is_none")]
2267 #[validate(range(min = 25, max = 100))]
2268 pub merge_threshold: Option<u32>,
2269 #[serde(skip_serializing_if = "Option::is_none")]
2270 #[validate(range(max = 8))]
2271 pub num_centers_to_merge_to: Option<u32>,
2272 #[serde(skip_serializing_if = "Option::is_none")]
2273 #[validate(range(max = 64))]
2274 pub write_nprobe: Option<u32>,
2275 #[serde(skip_serializing_if = "Option::is_none")]
2276 #[validate(range(max = 200))]
2277 pub ef_construction: Option<usize>,
2278 #[serde(skip_serializing_if = "Option::is_none")]
2279 #[validate(range(max = 200))]
2280 pub ef_search: Option<usize>,
2281 #[serde(skip_serializing_if = "Option::is_none")]
2282 #[validate(range(max = 64))]
2283 pub max_neighbors: Option<usize>,
2284}
2285
2286impl SpannIndexConfig {
2287 pub fn is_default(&self) -> bool {
2290 if let Some(search_nprobe) = self.search_nprobe {
2291 if search_nprobe != default_search_nprobe() {
2292 return false;
2293 }
2294 }
2295 if let Some(search_rng_factor) = self.search_rng_factor {
2296 if search_rng_factor != default_search_rng_factor() {
2297 return false;
2298 }
2299 }
2300 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2301 if search_rng_epsilon != default_search_rng_epsilon() {
2302 return false;
2303 }
2304 }
2305 if let Some(nreplica_count) = self.nreplica_count {
2306 if nreplica_count != default_nreplica_count() {
2307 return false;
2308 }
2309 }
2310 if let Some(write_rng_factor) = self.write_rng_factor {
2311 if write_rng_factor != default_write_rng_factor() {
2312 return false;
2313 }
2314 }
2315 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2316 if write_rng_epsilon != default_write_rng_epsilon() {
2317 return false;
2318 }
2319 }
2320 if let Some(split_threshold) = self.split_threshold {
2321 if split_threshold != default_split_threshold() {
2322 return false;
2323 }
2324 }
2325 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2326 if num_samples_kmeans != default_num_samples_kmeans() {
2327 return false;
2328 }
2329 }
2330 if let Some(initial_lambda) = self.initial_lambda {
2331 if initial_lambda != default_initial_lambda() {
2332 return false;
2333 }
2334 }
2335 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2336 if reassign_neighbor_count != default_reassign_neighbor_count() {
2337 return false;
2338 }
2339 }
2340 if let Some(merge_threshold) = self.merge_threshold {
2341 if merge_threshold != default_merge_threshold() {
2342 return false;
2343 }
2344 }
2345 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2346 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2347 return false;
2348 }
2349 }
2350 if let Some(write_nprobe) = self.write_nprobe {
2351 if write_nprobe != default_write_nprobe() {
2352 return false;
2353 }
2354 }
2355 if let Some(ef_construction) = self.ef_construction {
2356 if ef_construction != default_construction_ef_spann() {
2357 return false;
2358 }
2359 }
2360 if let Some(ef_search) = self.ef_search {
2361 if ef_search != default_search_ef_spann() {
2362 return false;
2363 }
2364 }
2365 if let Some(max_neighbors) = self.max_neighbors {
2366 if max_neighbors != default_m_spann() {
2367 return false;
2368 }
2369 }
2370 true
2371 }
2372}
2373
2374#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2375#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2376#[serde(deny_unknown_fields)]
2377pub struct SparseVectorIndexConfig {
2378 #[serde(skip_serializing_if = "Option::is_none")]
2380 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2381 #[serde(skip_serializing_if = "Option::is_none")]
2383 pub source_key: Option<String>,
2384 #[serde(skip_serializing_if = "Option::is_none")]
2386 pub bm25: Option<bool>,
2387}
2388
2389#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2390#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2391#[serde(deny_unknown_fields)]
2392pub struct FtsIndexConfig {
2393 }
2395
2396#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2397#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2398#[serde(deny_unknown_fields)]
2399pub struct StringInvertedIndexConfig {
2400 }
2402
2403#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2404#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2405#[serde(deny_unknown_fields)]
2406pub struct IntInvertedIndexConfig {
2407 }
2409
2410#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2411#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2412#[serde(deny_unknown_fields)]
2413pub struct FloatInvertedIndexConfig {
2414 }
2416
2417#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2418#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2419#[serde(deny_unknown_fields)]
2420pub struct BoolInvertedIndexConfig {
2421 }
2423
2424#[derive(Clone, Debug)]
2430pub enum IndexConfig {
2431 Vector(VectorIndexConfig),
2432 SparseVector(SparseVectorIndexConfig),
2433 Fts(FtsIndexConfig),
2434 StringInverted(StringInvertedIndexConfig),
2435 IntInverted(IntInvertedIndexConfig),
2436 FloatInverted(FloatInvertedIndexConfig),
2437 BoolInverted(BoolInvertedIndexConfig),
2438}
2439
2440impl From<VectorIndexConfig> for IndexConfig {
2442 fn from(config: VectorIndexConfig) -> Self {
2443 IndexConfig::Vector(config)
2444 }
2445}
2446
2447impl From<SparseVectorIndexConfig> for IndexConfig {
2448 fn from(config: SparseVectorIndexConfig) -> Self {
2449 IndexConfig::SparseVector(config)
2450 }
2451}
2452
2453impl From<FtsIndexConfig> for IndexConfig {
2454 fn from(config: FtsIndexConfig) -> Self {
2455 IndexConfig::Fts(config)
2456 }
2457}
2458
2459impl From<StringInvertedIndexConfig> for IndexConfig {
2460 fn from(config: StringInvertedIndexConfig) -> Self {
2461 IndexConfig::StringInverted(config)
2462 }
2463}
2464
2465impl From<IntInvertedIndexConfig> for IndexConfig {
2466 fn from(config: IntInvertedIndexConfig) -> Self {
2467 IndexConfig::IntInverted(config)
2468 }
2469}
2470
2471impl From<FloatInvertedIndexConfig> for IndexConfig {
2472 fn from(config: FloatInvertedIndexConfig) -> Self {
2473 IndexConfig::FloatInverted(config)
2474 }
2475}
2476
2477impl From<BoolInvertedIndexConfig> for IndexConfig {
2478 fn from(config: BoolInvertedIndexConfig) -> Self {
2479 IndexConfig::BoolInverted(config)
2480 }
2481}
2482
2483#[cfg(test)]
2484mod tests {
2485 use super::*;
2486 use crate::hnsw_configuration::Space;
2487 use crate::metadata::SparseVector;
2488 use crate::{
2489 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2490 };
2491 use serde_json::json;
2492
2493 #[test]
2494 fn test_reconcile_with_defaults_none_user_schema() {
2495 let result = Schema::reconcile_with_defaults(None).unwrap();
2497 let expected = Schema::new_default(KnnIndex::Spann);
2498 assert_eq!(result, expected);
2499 }
2500
2501 #[test]
2502 fn test_reconcile_with_defaults_empty_user_schema() {
2503 let user_schema = Schema {
2505 defaults: ValueTypes::default(),
2506 keys: HashMap::new(),
2507 };
2508
2509 let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2510 let expected = Schema::new_default(KnnIndex::Spann);
2511 assert_eq!(result, expected);
2512 }
2513
2514 #[test]
2515 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2516 let mut user_schema = Schema {
2518 defaults: ValueTypes::default(),
2519 keys: HashMap::new(),
2520 };
2521
2522 user_schema.defaults.string = Some(StringValueType {
2523 string_inverted_index: Some(StringInvertedIndexType {
2524 enabled: false, config: StringInvertedIndexConfig {},
2526 }),
2527 fts_index: None,
2528 });
2529
2530 let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2531
2532 assert!(
2534 !result
2535 .defaults
2536 .string
2537 .as_ref()
2538 .unwrap()
2539 .string_inverted_index
2540 .as_ref()
2541 .unwrap()
2542 .enabled
2543 );
2544 assert!(result.defaults.float.is_some());
2546 assert!(result.defaults.int.is_some());
2547 }
2548
2549 #[test]
2550 fn test_reconcile_with_defaults_user_overrides_vector_config() {
2551 let mut user_schema = Schema {
2553 defaults: ValueTypes::default(),
2554 keys: HashMap::new(),
2555 };
2556
2557 user_schema.defaults.float_list = Some(FloatListValueType {
2558 vector_index: Some(VectorIndexType {
2559 enabled: true, config: VectorIndexConfig {
2561 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
2565 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
2569 batch_size: None,
2570 sync_threshold: None,
2571 resize_factor: None,
2572 }),
2573 spann: None,
2574 },
2575 }),
2576 });
2577
2578 let result = {
2580 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2581 let merged_defaults =
2582 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2583 let mut merged_keys = default_schema.keys.clone();
2584 for (key, user_value_types) in user_schema.keys {
2585 if let Some(default_value_types) = merged_keys.get(&key) {
2586 let merged_value_types =
2587 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2588 merged_keys.insert(key, merged_value_types);
2589 } else {
2590 merged_keys.insert(key, user_value_types);
2591 }
2592 }
2593 Schema {
2594 defaults: merged_defaults,
2595 keys: merged_keys,
2596 }
2597 };
2598
2599 let vector_config = &result
2600 .defaults
2601 .float_list
2602 .as_ref()
2603 .unwrap()
2604 .vector_index
2605 .as_ref()
2606 .unwrap()
2607 .config;
2608
2609 assert_eq!(vector_config.space, Some(Space::L2));
2611 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2612 assert_eq!(
2613 vector_config.hnsw.as_ref().unwrap().ef_construction,
2614 Some(500)
2615 );
2616
2617 assert_eq!(vector_config.embedding_function, None);
2619 assert_eq!(
2621 vector_config.hnsw.as_ref().unwrap().max_neighbors,
2622 Some(default_m())
2623 );
2624 }
2625
2626 #[test]
2627 fn test_reconcile_with_defaults_keys() {
2628 let mut user_schema = Schema {
2630 defaults: ValueTypes::default(),
2631 keys: HashMap::new(),
2632 };
2633
2634 let custom_key_types = ValueTypes {
2636 string: Some(StringValueType {
2637 fts_index: Some(FtsIndexType {
2638 enabled: true,
2639 config: FtsIndexConfig {},
2640 }),
2641 string_inverted_index: Some(StringInvertedIndexType {
2642 enabled: false,
2643 config: StringInvertedIndexConfig {},
2644 }),
2645 }),
2646 ..Default::default()
2647 };
2648 user_schema
2649 .keys
2650 .insert("custom_key".to_string(), custom_key_types);
2651
2652 let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2653
2654 assert!(result.keys.contains_key(EMBEDDING_KEY));
2656 assert!(result.keys.contains_key(DOCUMENT_KEY));
2657
2658 assert!(result.keys.contains_key("custom_key"));
2660 let custom_override = result.keys.get("custom_key").unwrap();
2661 assert!(
2662 custom_override
2663 .string
2664 .as_ref()
2665 .unwrap()
2666 .fts_index
2667 .as_ref()
2668 .unwrap()
2669 .enabled
2670 );
2671 }
2672
2673 #[test]
2674 fn test_reconcile_with_defaults_override_existing_key() {
2675 let mut user_schema = Schema {
2677 defaults: ValueTypes::default(),
2678 keys: HashMap::new(),
2679 };
2680
2681 let embedding_override = ValueTypes {
2683 float_list: Some(FloatListValueType {
2684 vector_index: Some(VectorIndexType {
2685 enabled: false, config: VectorIndexConfig {
2687 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2689 source_key: Some("custom_embedding_key".to_string()),
2690 hnsw: None,
2691 spann: None,
2692 },
2693 }),
2694 }),
2695 ..Default::default()
2696 };
2697 user_schema
2698 .keys
2699 .insert(EMBEDDING_KEY.to_string(), embedding_override);
2700
2701 let result = Schema::reconcile_with_defaults(Some(&user_schema)).unwrap();
2702
2703 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
2704 let vector_config = &embedding_config
2705 .float_list
2706 .as_ref()
2707 .unwrap()
2708 .vector_index
2709 .as_ref()
2710 .unwrap();
2711
2712 assert!(!vector_config.enabled);
2714 assert_eq!(vector_config.config.space, Some(Space::Ip));
2715 assert_eq!(
2716 vector_config.config.source_key,
2717 Some("custom_embedding_key".to_string())
2718 );
2719 }
2720
2721 #[test]
2722 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
2723 let collection_config = InternalCollectionConfiguration {
2724 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2725 space: Space::Cosine,
2726 ef_construction: 128,
2727 ef_search: 96,
2728 max_neighbors: 42,
2729 num_threads: 8,
2730 resize_factor: 1.5,
2731 sync_threshold: 2_000,
2732 batch_size: 256,
2733 }),
2734 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2735 EmbeddingFunctionNewConfiguration {
2736 name: "custom".to_string(),
2737 config: json!({"alpha": 1}),
2738 },
2739 )),
2740 };
2741
2742 let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2743 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2744
2745 assert_eq!(reconstructed, collection_config);
2746 }
2747
2748 #[test]
2749 fn test_convert_schema_to_collection_config_spann_roundtrip() {
2750 let spann_config = InternalSpannConfiguration {
2751 space: Space::Cosine,
2752 search_nprobe: 11,
2753 search_rng_factor: 1.7,
2754 write_nprobe: 5,
2755 nreplica_count: 3,
2756 split_threshold: 150,
2757 merge_threshold: 80,
2758 ef_construction: 120,
2759 ef_search: 90,
2760 max_neighbors: 40,
2761 ..Default::default()
2762 };
2763
2764 let collection_config = InternalCollectionConfiguration {
2765 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
2766 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2767 EmbeddingFunctionNewConfiguration {
2768 name: "custom".to_string(),
2769 config: json!({"beta": true}),
2770 },
2771 )),
2772 };
2773
2774 let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2775 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2776
2777 assert_eq!(reconstructed, collection_config);
2778 }
2779
2780 #[test]
2781 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
2782 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2783 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
2784 if let Some(float_list) = &mut embedding.float_list {
2785 if let Some(vector_index) = &mut float_list.vector_index {
2786 vector_index.config.spann = Some(SpannIndexConfig {
2787 search_nprobe: Some(1),
2788 search_rng_factor: Some(1.0),
2789 search_rng_epsilon: Some(0.1),
2790 nreplica_count: Some(1),
2791 write_rng_factor: Some(1.0),
2792 write_rng_epsilon: Some(0.1),
2793 split_threshold: Some(100),
2794 num_samples_kmeans: Some(10),
2795 initial_lambda: Some(0.5),
2796 reassign_neighbor_count: Some(10),
2797 merge_threshold: Some(50),
2798 num_centers_to_merge_to: Some(3),
2799 write_nprobe: Some(1),
2800 ef_construction: Some(50),
2801 ef_search: Some(40),
2802 max_neighbors: Some(20),
2803 });
2804 }
2805 }
2806 }
2807
2808 let result = InternalCollectionConfiguration::try_from(&schema);
2809 assert!(result.is_err());
2810 }
2811
2812 #[test]
2813 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
2814 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2815 let before = schema.clone();
2816 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
2817 assert!(!modified);
2818 assert_eq!(schema, before);
2819 }
2820
2821 #[test]
2822 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
2823 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2824 assert!(!schema.keys.contains_key("custom_field"));
2825
2826 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2827
2828 assert!(modified);
2829 let entry = schema
2830 .keys
2831 .get("custom_field")
2832 .expect("expected new key override to be inserted");
2833 assert_eq!(entry.boolean, schema.defaults.boolean);
2834 assert!(entry.string.is_none());
2835 assert!(entry.int.is_none());
2836 assert!(entry.float.is_none());
2837 assert!(entry.float_list.is_none());
2838 assert!(entry.sparse_vector.is_none());
2839 }
2840
2841 #[test]
2842 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
2843 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2844 let initial_len = schema.keys.len();
2845 schema.keys.insert(
2846 "custom_field".to_string(),
2847 ValueTypes {
2848 string: schema.defaults.string.clone(),
2849 ..Default::default()
2850 },
2851 );
2852
2853 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
2854
2855 assert!(modified);
2856 assert_eq!(schema.keys.len(), initial_len + 1);
2857 let entry = schema
2858 .keys
2859 .get("custom_field")
2860 .expect("expected key override to exist after ensure call");
2861 assert!(entry.string.is_some());
2862 assert_eq!(entry.boolean, schema.defaults.boolean);
2863 }
2864
2865 #[test]
2866 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
2867 let schema = Schema::new_default(KnnIndex::Spann);
2868 let result = schema.is_knn_key_indexing_enabled(
2869 "custom_sparse",
2870 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2871 );
2872
2873 let err = result.expect_err("expected indexing disabled error");
2874 match err {
2875 FilterValidationError::IndexingDisabled { key, value_type } => {
2876 assert_eq!(key, "custom_sparse");
2877 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
2878 }
2879 other => panic!("unexpected error variant: {other:?}"),
2880 }
2881 }
2882
2883 #[test]
2884 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
2885 let mut schema = Schema::new_default(KnnIndex::Spann);
2886 schema.keys.insert(
2887 "sparse_enabled".to_string(),
2888 ValueTypes {
2889 sparse_vector: Some(SparseVectorValueType {
2890 sparse_vector_index: Some(SparseVectorIndexType {
2891 enabled: true,
2892 config: SparseVectorIndexConfig {
2893 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2894 source_key: None,
2895 bm25: None,
2896 },
2897 }),
2898 }),
2899 ..Default::default()
2900 },
2901 );
2902
2903 let result = schema.is_knn_key_indexing_enabled(
2904 "sparse_enabled",
2905 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2906 );
2907
2908 assert!(result.is_ok());
2909 }
2910
2911 #[test]
2912 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2913 let schema = Schema::new_default(KnnIndex::Spann);
2914 let result = schema.is_knn_key_indexing_enabled(
2915 EMBEDDING_KEY,
2916 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2917 );
2918
2919 assert!(result.is_ok());
2920 }
2921
2922 #[test]
2923 fn test_merge_hnsw_configs_field_level() {
2924 let default_hnsw = HnswIndexConfig {
2926 ef_construction: Some(200),
2927 max_neighbors: Some(16),
2928 ef_search: Some(10),
2929 num_threads: Some(4),
2930 batch_size: Some(100),
2931 sync_threshold: Some(1000),
2932 resize_factor: Some(1.2),
2933 };
2934
2935 let user_hnsw = HnswIndexConfig {
2936 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
2944
2945 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2946
2947 assert_eq!(result.ef_construction, Some(300));
2949 assert_eq!(result.ef_search, Some(20));
2950 assert_eq!(result.sync_threshold, Some(2000));
2951
2952 assert_eq!(result.max_neighbors, Some(16));
2954 assert_eq!(result.num_threads, Some(4));
2955 assert_eq!(result.batch_size, Some(100));
2956 assert_eq!(result.resize_factor, Some(1.2));
2957 }
2958
2959 #[test]
2960 fn test_merge_spann_configs_field_level() {
2961 let default_spann = SpannIndexConfig {
2963 search_nprobe: Some(10),
2964 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
2967 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
2971 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
2973 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
2976 ef_construction: Some(100),
2977 ef_search: Some(10),
2978 max_neighbors: Some(16),
2979 };
2980
2981 let user_spann = SpannIndexConfig {
2982 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
2987 write_rng_epsilon: None,
2988 split_threshold: Some(150), num_samples_kmeans: None,
2990 initial_lambda: None,
2991 reassign_neighbor_count: None,
2992 merge_threshold: None,
2993 num_centers_to_merge_to: None,
2994 write_nprobe: None,
2995 ef_construction: None,
2996 ef_search: None,
2997 max_neighbors: None,
2998 };
2999
3000 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3001
3002 assert_eq!(result.search_nprobe, Some(20));
3004 assert_eq!(result.search_rng_epsilon, Some(8.0));
3005 assert_eq!(result.split_threshold, Some(150));
3006
3007 assert_eq!(result.search_rng_factor, Some(1.0));
3009 assert_eq!(result.nreplica_count, Some(3));
3010 assert_eq!(result.initial_lambda, Some(100.0));
3011 }
3012
3013 #[test]
3014 fn test_spann_index_config_into_internal_configuration() {
3015 let config = SpannIndexConfig {
3016 search_nprobe: Some(33),
3017 search_rng_factor: Some(1.2),
3018 search_rng_epsilon: None,
3019 nreplica_count: None,
3020 write_rng_factor: Some(1.5),
3021 write_rng_epsilon: None,
3022 split_threshold: Some(75),
3023 num_samples_kmeans: None,
3024 initial_lambda: Some(0.9),
3025 reassign_neighbor_count: Some(40),
3026 merge_threshold: None,
3027 num_centers_to_merge_to: Some(4),
3028 write_nprobe: Some(60),
3029 ef_construction: Some(180),
3030 ef_search: Some(170),
3031 max_neighbors: Some(32),
3032 };
3033
3034 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3035 assert_eq!(with_space.space, Space::Cosine);
3036 assert_eq!(with_space.search_nprobe, 33);
3037 assert_eq!(with_space.search_rng_factor, 1.2);
3038 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3039 assert_eq!(with_space.write_rng_factor, 1.5);
3040 assert_eq!(with_space.write_nprobe, 60);
3041 assert_eq!(with_space.ef_construction, 180);
3042 assert_eq!(with_space.ef_search, 170);
3043 assert_eq!(with_space.max_neighbors, 32);
3044 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3045
3046 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3047 assert_eq!(default_space_config.space, default_space());
3048 }
3049
3050 #[test]
3051 fn test_merge_string_type_combinations() {
3052 let default = StringValueType {
3056 string_inverted_index: Some(StringInvertedIndexType {
3057 enabled: true,
3058 config: StringInvertedIndexConfig {},
3059 }),
3060 fts_index: Some(FtsIndexType {
3061 enabled: false,
3062 config: FtsIndexConfig {},
3063 }),
3064 };
3065
3066 let user = StringValueType {
3067 string_inverted_index: Some(StringInvertedIndexType {
3068 enabled: false, config: StringInvertedIndexConfig {},
3070 }),
3071 fts_index: None, };
3073
3074 let result = Schema::merge_string_type(Some(&default), Some(&user))
3075 .unwrap()
3076 .unwrap();
3077 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3082 .unwrap()
3083 .unwrap();
3084 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3085
3086 let result = Schema::merge_string_type(None, Some(&user))
3088 .unwrap()
3089 .unwrap();
3090 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3091
3092 let result = Schema::merge_string_type(None, None).unwrap();
3094 assert!(result.is_none());
3095 }
3096
3097 #[test]
3098 fn test_merge_vector_index_config_comprehensive() {
3099 let default_config = VectorIndexConfig {
3101 space: Some(Space::Cosine),
3102 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3103 source_key: Some("default_key".to_string()),
3104 hnsw: Some(HnswIndexConfig {
3105 ef_construction: Some(200),
3106 max_neighbors: Some(16),
3107 ef_search: Some(10),
3108 num_threads: Some(4),
3109 batch_size: Some(100),
3110 sync_threshold: Some(1000),
3111 resize_factor: Some(1.2),
3112 }),
3113 spann: None,
3114 };
3115
3116 let user_config = VectorIndexConfig {
3117 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3121 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3125 batch_size: None,
3126 sync_threshold: None,
3127 resize_factor: None,
3128 }),
3129 spann: Some(SpannIndexConfig {
3130 search_nprobe: Some(15),
3131 search_rng_factor: None,
3132 search_rng_epsilon: None,
3133 nreplica_count: None,
3134 write_rng_factor: None,
3135 write_rng_epsilon: None,
3136 split_threshold: None,
3137 num_samples_kmeans: None,
3138 initial_lambda: None,
3139 reassign_neighbor_count: None,
3140 merge_threshold: None,
3141 num_centers_to_merge_to: None,
3142 write_nprobe: None,
3143 ef_construction: None,
3144 ef_search: None,
3145 max_neighbors: None,
3146 }), };
3148
3149 let result = Schema::merge_vector_index_config(&default_config, &user_config);
3150
3151 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3154 result.embedding_function,
3155 Some(EmbeddingFunctionConfiguration::Legacy)
3156 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_some());
3165 assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
3166 }
3167
3168 #[test]
3169 fn test_merge_sparse_vector_index_config() {
3170 let default_config = SparseVectorIndexConfig {
3172 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3173 source_key: Some("default_sparse_key".to_string()),
3174 bm25: None,
3175 };
3176
3177 let user_config = SparseVectorIndexConfig {
3178 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
3181 };
3182
3183 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3184
3185 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3187 assert_eq!(
3189 result.embedding_function,
3190 Some(EmbeddingFunctionConfiguration::Legacy)
3191 );
3192 }
3193
3194 #[test]
3195 fn test_complex_nested_merging_scenario() {
3196 let mut user_schema = Schema {
3198 defaults: ValueTypes::default(),
3199 keys: HashMap::new(),
3200 };
3201
3202 user_schema.defaults.string = Some(StringValueType {
3204 string_inverted_index: Some(StringInvertedIndexType {
3205 enabled: false,
3206 config: StringInvertedIndexConfig {},
3207 }),
3208 fts_index: Some(FtsIndexType {
3209 enabled: true,
3210 config: FtsIndexConfig {},
3211 }),
3212 });
3213
3214 user_schema.defaults.float_list = Some(FloatListValueType {
3215 vector_index: Some(VectorIndexType {
3216 enabled: true,
3217 config: VectorIndexConfig {
3218 space: Some(Space::Ip),
3219 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
3221 hnsw: Some(HnswIndexConfig {
3222 ef_construction: Some(400),
3223 max_neighbors: Some(32),
3224 ef_search: None, num_threads: None,
3226 batch_size: None,
3227 sync_threshold: None,
3228 resize_factor: None,
3229 }),
3230 spann: None,
3231 },
3232 }),
3233 });
3234
3235 let custom_key_override = ValueTypes {
3237 string: Some(StringValueType {
3238 fts_index: Some(FtsIndexType {
3239 enabled: true,
3240 config: FtsIndexConfig {},
3241 }),
3242 string_inverted_index: None,
3243 }),
3244 ..Default::default()
3245 };
3246 user_schema
3247 .keys
3248 .insert("custom_field".to_string(), custom_key_override);
3249
3250 let result = {
3252 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3253 let merged_defaults =
3254 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
3255 let mut merged_keys = default_schema.keys.clone();
3256 for (key, user_value_types) in user_schema.keys {
3257 if let Some(default_value_types) = merged_keys.get(&key) {
3258 let merged_value_types =
3259 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
3260 merged_keys.insert(key, merged_value_types);
3261 } else {
3262 merged_keys.insert(key, user_value_types);
3263 }
3264 }
3265 Schema {
3266 defaults: merged_defaults,
3267 keys: merged_keys,
3268 }
3269 };
3270
3271 assert!(
3275 !result
3276 .defaults
3277 .string
3278 .as_ref()
3279 .unwrap()
3280 .string_inverted_index
3281 .as_ref()
3282 .unwrap()
3283 .enabled
3284 );
3285 assert!(
3286 result
3287 .defaults
3288 .string
3289 .as_ref()
3290 .unwrap()
3291 .fts_index
3292 .as_ref()
3293 .unwrap()
3294 .enabled
3295 );
3296
3297 let vector_config = &result
3298 .defaults
3299 .float_list
3300 .as_ref()
3301 .unwrap()
3302 .vector_index
3303 .as_ref()
3304 .unwrap()
3305 .config;
3306 assert_eq!(vector_config.space, Some(Space::Ip));
3307 assert_eq!(vector_config.embedding_function, None); assert_eq!(
3309 vector_config.source_key,
3310 Some("custom_vector_key".to_string())
3311 );
3312 assert_eq!(
3313 vector_config.hnsw.as_ref().unwrap().ef_construction,
3314 Some(400)
3315 );
3316 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3317 assert_eq!(
3318 vector_config.hnsw.as_ref().unwrap().ef_search,
3319 Some(default_search_ef())
3320 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
3328 assert!(
3329 custom_override
3330 .string
3331 .as_ref()
3332 .unwrap()
3333 .fts_index
3334 .as_ref()
3335 .unwrap()
3336 .enabled
3337 );
3338 assert!(custom_override
3339 .string
3340 .as_ref()
3341 .unwrap()
3342 .string_inverted_index
3343 .is_none());
3344 }
3345
3346 #[test]
3347 fn test_reconcile_with_collection_config_default_config() {
3348 let schema = Schema::new_default(KnnIndex::Hnsw);
3350 let collection_config = InternalCollectionConfiguration::default_hnsw();
3351
3352 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3353 assert_eq!(result, schema);
3354 }
3355
3356 #[test]
3357 fn test_reconcile_with_collection_config_both_non_default() {
3358 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3360 schema.defaults.string = Some(StringValueType {
3361 fts_index: Some(FtsIndexType {
3362 enabled: true,
3363 config: FtsIndexConfig {},
3364 }),
3365 string_inverted_index: None,
3366 });
3367
3368 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
3369 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
3371 {
3372 hnsw_config.ef_construction = 500; }
3374
3375 let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&collection_config));
3377 assert!(result.is_err());
3378 assert!(matches!(
3379 result.unwrap_err(),
3380 SchemaError::ConfigAndSchemaConflict
3381 ));
3382 }
3383
3384 #[test]
3385 fn test_reconcile_with_collection_config_hnsw_override() {
3386 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
3390 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3391 ef_construction: 300,
3392 max_neighbors: 32,
3393 ef_search: 50,
3394 num_threads: 8,
3395 batch_size: 200,
3396 sync_threshold: 2000,
3397 resize_factor: 1.5,
3398 space: Space::L2,
3399 }),
3400 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3401 };
3402
3403 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3404
3405 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3407 let vector_index = embedding_override
3408 .float_list
3409 .as_ref()
3410 .unwrap()
3411 .vector_index
3412 .as_ref()
3413 .unwrap();
3414
3415 assert!(vector_index.enabled);
3416 assert_eq!(vector_index.config.space, Some(Space::L2));
3417 assert_eq!(
3418 vector_index.config.embedding_function,
3419 Some(EmbeddingFunctionConfiguration::Legacy)
3420 );
3421 assert_eq!(
3422 vector_index.config.source_key,
3423 Some(DOCUMENT_KEY.to_string())
3424 );
3425
3426 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
3427 assert_eq!(hnsw_config.ef_construction, Some(300));
3428 assert_eq!(hnsw_config.max_neighbors, Some(32));
3429 assert_eq!(hnsw_config.ef_search, Some(50));
3430 assert_eq!(hnsw_config.num_threads, Some(8));
3431 assert_eq!(hnsw_config.batch_size, Some(200));
3432 assert_eq!(hnsw_config.sync_threshold, Some(2000));
3433 assert_eq!(hnsw_config.resize_factor, Some(1.5));
3434
3435 assert!(vector_index.config.spann.is_none());
3436 }
3437
3438 #[test]
3439 fn test_reconcile_with_collection_config_spann_override() {
3440 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
3444 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
3445 search_nprobe: 20,
3446 search_rng_factor: 3.0,
3447 search_rng_epsilon: 0.2,
3448 nreplica_count: 5,
3449 write_rng_factor: 2.0,
3450 write_rng_epsilon: 0.1,
3451 split_threshold: 2000,
3452 num_samples_kmeans: 200,
3453 initial_lambda: 0.8,
3454 reassign_neighbor_count: 100,
3455 merge_threshold: 800,
3456 num_centers_to_merge_to: 20,
3457 write_nprobe: 10,
3458 ef_construction: 400,
3459 ef_search: 60,
3460 max_neighbors: 24,
3461 space: Space::Cosine,
3462 }),
3463 embedding_function: None,
3464 };
3465
3466 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3467
3468 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3470 let vector_index = embedding_override
3471 .float_list
3472 .as_ref()
3473 .unwrap()
3474 .vector_index
3475 .as_ref()
3476 .unwrap();
3477
3478 assert!(vector_index.enabled);
3479 assert_eq!(vector_index.config.space, Some(Space::Cosine));
3480 assert_eq!(vector_index.config.embedding_function, None);
3481 assert_eq!(
3482 vector_index.config.source_key,
3483 Some(DOCUMENT_KEY.to_string())
3484 );
3485
3486 assert!(vector_index.config.hnsw.is_none());
3487
3488 let spann_config = vector_index.config.spann.as_ref().unwrap();
3489 assert_eq!(spann_config.search_nprobe, Some(20));
3490 assert_eq!(spann_config.search_rng_factor, Some(3.0));
3491 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
3492 assert_eq!(spann_config.nreplica_count, Some(5));
3493 assert_eq!(spann_config.write_rng_factor, Some(2.0));
3494 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
3495 assert_eq!(spann_config.split_threshold, Some(2000));
3496 assert_eq!(spann_config.num_samples_kmeans, Some(200));
3497 assert_eq!(spann_config.initial_lambda, Some(0.8));
3498 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
3499 assert_eq!(spann_config.merge_threshold, Some(800));
3500 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
3501 assert_eq!(spann_config.write_nprobe, Some(10));
3502 assert_eq!(spann_config.ef_construction, Some(400));
3503 assert_eq!(spann_config.ef_search, Some(60));
3504 assert_eq!(spann_config.max_neighbors, Some(24));
3505 }
3506
3507 #[test]
3508 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
3509 let schema = Schema::new_default(KnnIndex::Hnsw);
3512
3513 let collection_config = InternalCollectionConfiguration {
3514 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3515 ef_construction: 300,
3516 max_neighbors: 32,
3517 ef_search: 50,
3518 num_threads: 8,
3519 batch_size: 200,
3520 sync_threshold: 2000,
3521 resize_factor: 1.5,
3522 space: Space::L2,
3523 }),
3524 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3525 };
3526
3527 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3528
3529 let defaults_vector_index = result
3531 .defaults
3532 .float_list
3533 .as_ref()
3534 .unwrap()
3535 .vector_index
3536 .as_ref()
3537 .unwrap();
3538
3539 assert!(!defaults_vector_index.enabled);
3541 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
3543 assert_eq!(
3544 defaults_vector_index.config.embedding_function,
3545 Some(EmbeddingFunctionConfiguration::Legacy)
3546 );
3547 assert_eq!(
3548 defaults_vector_index.config.source_key,
3549 Some(DOCUMENT_KEY.to_string())
3550 );
3551 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
3552 assert_eq!(defaults_hnsw.ef_construction, Some(300));
3553 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
3554
3555 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3557 let embedding_vector_index = embedding_override
3558 .float_list
3559 .as_ref()
3560 .unwrap()
3561 .vector_index
3562 .as_ref()
3563 .unwrap();
3564
3565 assert!(embedding_vector_index.enabled);
3567 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
3569 assert_eq!(
3570 embedding_vector_index.config.embedding_function,
3571 Some(EmbeddingFunctionConfiguration::Legacy)
3572 );
3573 assert_eq!(
3574 embedding_vector_index.config.source_key,
3575 Some(DOCUMENT_KEY.to_string())
3576 );
3577 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
3578 assert_eq!(embedding_hnsw.ef_construction, Some(300));
3579 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
3580 }
3581
3582 #[test]
3583 fn test_is_schema_default() {
3584 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
3586 assert!(default_hnsw_schema.is_default());
3587
3588 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
3589 assert!(default_spann_schema.is_default());
3590
3591 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
3593 if let Some(ref mut string_type) = modified_schema.defaults.string {
3595 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
3596 string_inverted.enabled = false; }
3598 }
3599 assert!(!modified_schema.is_default());
3600
3601 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
3603 schema_with_extra_overrides
3604 .keys
3605 .insert("custom_key".to_string(), ValueTypes::default());
3606 assert!(!schema_with_extra_overrides.is_default());
3607 }
3608
3609 #[test]
3610 fn test_add_merges_keys_by_value_type() {
3611 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3612 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3613
3614 let string_override = ValueTypes {
3615 string: Some(StringValueType {
3616 string_inverted_index: Some(StringInvertedIndexType {
3617 enabled: true,
3618 config: StringInvertedIndexConfig {},
3619 }),
3620 fts_index: None,
3621 }),
3622 ..Default::default()
3623 };
3624 schema_a
3625 .keys
3626 .insert("custom_field".to_string(), string_override);
3627
3628 let float_override = ValueTypes {
3629 float: Some(FloatValueType {
3630 float_inverted_index: Some(FloatInvertedIndexType {
3631 enabled: true,
3632 config: FloatInvertedIndexConfig {},
3633 }),
3634 }),
3635 ..Default::default()
3636 };
3637 schema_b
3638 .keys
3639 .insert("custom_field".to_string(), float_override);
3640
3641 let merged = schema_a.merge(&schema_b).unwrap();
3642 let merged_override = merged.keys.get("custom_field").unwrap();
3643
3644 assert!(merged_override.string.is_some());
3645 assert!(merged_override.float.is_some());
3646 assert!(
3647 merged_override
3648 .string
3649 .as_ref()
3650 .unwrap()
3651 .string_inverted_index
3652 .as_ref()
3653 .unwrap()
3654 .enabled
3655 );
3656 assert!(
3657 merged_override
3658 .float
3659 .as_ref()
3660 .unwrap()
3661 .float_inverted_index
3662 .as_ref()
3663 .unwrap()
3664 .enabled
3665 );
3666 }
3667
3668 #[test]
3669 fn test_add_rejects_different_defaults() {
3670 let schema_a = Schema::new_default(KnnIndex::Hnsw);
3671 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3672
3673 if let Some(string_type) = schema_b.defaults.string.as_mut() {
3674 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
3675 string_index.enabled = false;
3676 }
3677 }
3678
3679 let err = schema_a.merge(&schema_b).unwrap_err();
3680 assert!(matches!(err, SchemaError::DefaultsMismatch));
3681 }
3682
3683 #[test]
3684 fn test_add_detects_conflicting_value_type_configuration() {
3685 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3686 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3687
3688 let string_override_enabled = ValueTypes {
3689 string: Some(StringValueType {
3690 string_inverted_index: Some(StringInvertedIndexType {
3691 enabled: true,
3692 config: StringInvertedIndexConfig {},
3693 }),
3694 fts_index: None,
3695 }),
3696 ..Default::default()
3697 };
3698 schema_a
3699 .keys
3700 .insert("custom_field".to_string(), string_override_enabled);
3701
3702 let string_override_disabled = ValueTypes {
3703 string: Some(StringValueType {
3704 string_inverted_index: Some(StringInvertedIndexType {
3705 enabled: false,
3706 config: StringInvertedIndexConfig {},
3707 }),
3708 fts_index: None,
3709 }),
3710 ..Default::default()
3711 };
3712 schema_b
3713 .keys
3714 .insert("custom_field".to_string(), string_override_disabled);
3715
3716 let err = schema_a.merge(&schema_b).unwrap_err();
3717 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
3718 }
3719
3720 #[test]
3722 fn test_backward_compatibility_aliases() {
3723 let old_format_json = r###"{
3725 "defaults": {
3726 "#string": {
3727 "$fts_index": {
3728 "enabled": true,
3729 "config": {}
3730 }
3731 },
3732 "#int": {
3733 "$int_inverted_index": {
3734 "enabled": true,
3735 "config": {}
3736 }
3737 },
3738 "#float_list": {
3739 "$vector_index": {
3740 "enabled": true,
3741 "config": {
3742 "spann": {
3743 "search_nprobe": 10
3744 }
3745 }
3746 }
3747 }
3748 },
3749 "key_overrides": {
3750 "#document": {
3751 "#string": {
3752 "$fts_index": {
3753 "enabled": false,
3754 "config": {}
3755 }
3756 }
3757 }
3758 }
3759 }"###;
3760
3761 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
3762
3763 let new_format_json = r###"{
3765 "defaults": {
3766 "string": {
3767 "fts_index": {
3768 "enabled": true,
3769 "config": {}
3770 }
3771 },
3772 "int": {
3773 "int_inverted_index": {
3774 "enabled": true,
3775 "config": {}
3776 }
3777 },
3778 "float_list": {
3779 "vector_index": {
3780 "enabled": true,
3781 "config": {
3782 "spann": {
3783 "search_nprobe": 10
3784 }
3785 }
3786 }
3787 }
3788 },
3789 "keys": {
3790 "#document": {
3791 "string": {
3792 "fts_index": {
3793 "enabled": false,
3794 "config": {}
3795 }
3796 }
3797 }
3798 }
3799 }"###;
3800
3801 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
3802
3803 assert_eq!(schema_from_old, schema_from_new);
3805
3806 assert!(schema_from_old.defaults.string.is_some());
3808 assert!(schema_from_old
3809 .defaults
3810 .string
3811 .as_ref()
3812 .unwrap()
3813 .fts_index
3814 .is_some());
3815 assert!(
3816 schema_from_old
3817 .defaults
3818 .string
3819 .as_ref()
3820 .unwrap()
3821 .fts_index
3822 .as_ref()
3823 .unwrap()
3824 .enabled
3825 );
3826
3827 assert!(schema_from_old.defaults.int.is_some());
3828 assert!(schema_from_old
3829 .defaults
3830 .int
3831 .as_ref()
3832 .unwrap()
3833 .int_inverted_index
3834 .is_some());
3835
3836 assert!(schema_from_old.defaults.float_list.is_some());
3837 assert!(schema_from_old
3838 .defaults
3839 .float_list
3840 .as_ref()
3841 .unwrap()
3842 .vector_index
3843 .is_some());
3844
3845 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
3846 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
3847 assert!(doc_override.string.is_some());
3848 assert!(
3849 !doc_override
3850 .string
3851 .as_ref()
3852 .unwrap()
3853 .fts_index
3854 .as_ref()
3855 .unwrap()
3856 .enabled
3857 );
3858
3859 let serialized = serde_json::to_string(&schema_from_old).unwrap();
3861
3862 assert!(serialized.contains(r#""keys":"#));
3864 assert!(serialized.contains(r#""string":"#));
3865 assert!(serialized.contains(r#""fts_index":"#));
3866 assert!(serialized.contains(r#""int_inverted_index":"#));
3867 assert!(serialized.contains(r#""vector_index":"#));
3868
3869 assert!(!serialized.contains(r#""key_overrides":"#));
3871 assert!(!serialized.contains(r###""#string":"###));
3872 assert!(!serialized.contains(r###""$fts_index":"###));
3873 assert!(!serialized.contains(r###""$int_inverted_index":"###));
3874 assert!(!serialized.contains(r###""$vector_index":"###));
3875 }
3876
3877 #[test]
3878 fn test_hnsw_index_config_validation() {
3879 use validator::Validate;
3880
3881 let valid_config = HnswIndexConfig {
3883 batch_size: Some(10),
3884 sync_threshold: Some(100),
3885 ef_construction: Some(100),
3886 max_neighbors: Some(16),
3887 ..Default::default()
3888 };
3889 assert!(valid_config.validate().is_ok());
3890
3891 let invalid_batch_size = HnswIndexConfig {
3893 batch_size: Some(1),
3894 ..Default::default()
3895 };
3896 assert!(invalid_batch_size.validate().is_err());
3897
3898 let invalid_sync_threshold = HnswIndexConfig {
3900 sync_threshold: Some(1),
3901 ..Default::default()
3902 };
3903 assert!(invalid_sync_threshold.validate().is_err());
3904
3905 let boundary_config = HnswIndexConfig {
3907 batch_size: Some(2),
3908 sync_threshold: Some(2),
3909 ..Default::default()
3910 };
3911 assert!(boundary_config.validate().is_ok());
3912
3913 let all_none_config = HnswIndexConfig {
3915 ..Default::default()
3916 };
3917 assert!(all_none_config.validate().is_ok());
3918
3919 let other_fields_config = HnswIndexConfig {
3921 ef_construction: Some(1),
3922 max_neighbors: Some(1),
3923 ef_search: Some(1),
3924 num_threads: Some(1),
3925 resize_factor: Some(0.1),
3926 ..Default::default()
3927 };
3928 assert!(other_fields_config.validate().is_ok());
3929 }
3930
3931 #[test]
3932 fn test_spann_index_config_validation() {
3933 use validator::Validate;
3934
3935 let valid_config = SpannIndexConfig {
3937 write_nprobe: Some(32),
3938 nreplica_count: Some(4),
3939 split_threshold: Some(100),
3940 merge_threshold: Some(50),
3941 reassign_neighbor_count: Some(32),
3942 num_centers_to_merge_to: Some(4),
3943 ef_construction: Some(100),
3944 ef_search: Some(100),
3945 max_neighbors: Some(32),
3946 search_rng_factor: Some(1.0),
3947 write_rng_factor: Some(1.0),
3948 search_rng_epsilon: Some(7.5),
3949 write_rng_epsilon: Some(7.5),
3950 ..Default::default()
3951 };
3952 assert!(valid_config.validate().is_ok());
3953
3954 let invalid_write_nprobe = SpannIndexConfig {
3956 write_nprobe: Some(200),
3957 ..Default::default()
3958 };
3959 assert!(invalid_write_nprobe.validate().is_err());
3960
3961 let invalid_split_threshold = SpannIndexConfig {
3963 split_threshold: Some(10),
3964 ..Default::default()
3965 };
3966 assert!(invalid_split_threshold.validate().is_err());
3967
3968 let invalid_split_threshold_high = SpannIndexConfig {
3970 split_threshold: Some(250),
3971 ..Default::default()
3972 };
3973 assert!(invalid_split_threshold_high.validate().is_err());
3974
3975 let invalid_nreplica = SpannIndexConfig {
3977 nreplica_count: Some(10),
3978 ..Default::default()
3979 };
3980 assert!(invalid_nreplica.validate().is_err());
3981
3982 let invalid_reassign = SpannIndexConfig {
3984 reassign_neighbor_count: Some(100),
3985 ..Default::default()
3986 };
3987 assert!(invalid_reassign.validate().is_err());
3988
3989 let invalid_merge_threshold_low = SpannIndexConfig {
3991 merge_threshold: Some(5),
3992 ..Default::default()
3993 };
3994 assert!(invalid_merge_threshold_low.validate().is_err());
3995
3996 let invalid_merge_threshold_high = SpannIndexConfig {
3997 merge_threshold: Some(150),
3998 ..Default::default()
3999 };
4000 assert!(invalid_merge_threshold_high.validate().is_err());
4001
4002 let invalid_num_centers = SpannIndexConfig {
4004 num_centers_to_merge_to: Some(10),
4005 ..Default::default()
4006 };
4007 assert!(invalid_num_centers.validate().is_err());
4008
4009 let invalid_ef_construction = SpannIndexConfig {
4011 ef_construction: Some(300),
4012 ..Default::default()
4013 };
4014 assert!(invalid_ef_construction.validate().is_err());
4015
4016 let invalid_ef_search = SpannIndexConfig {
4018 ef_search: Some(300),
4019 ..Default::default()
4020 };
4021 assert!(invalid_ef_search.validate().is_err());
4022
4023 let invalid_max_neighbors = SpannIndexConfig {
4025 max_neighbors: Some(100),
4026 ..Default::default()
4027 };
4028 assert!(invalid_max_neighbors.validate().is_err());
4029
4030 let invalid_search_nprobe = SpannIndexConfig {
4032 search_nprobe: Some(200),
4033 ..Default::default()
4034 };
4035 assert!(invalid_search_nprobe.validate().is_err());
4036
4037 let invalid_search_rng_factor_low = SpannIndexConfig {
4039 search_rng_factor: Some(0.9),
4040 ..Default::default()
4041 };
4042 assert!(invalid_search_rng_factor_low.validate().is_err());
4043
4044 let invalid_search_rng_factor_high = SpannIndexConfig {
4045 search_rng_factor: Some(1.1),
4046 ..Default::default()
4047 };
4048 assert!(invalid_search_rng_factor_high.validate().is_err());
4049
4050 let valid_search_rng_factor = SpannIndexConfig {
4052 search_rng_factor: Some(1.0),
4053 ..Default::default()
4054 };
4055 assert!(valid_search_rng_factor.validate().is_ok());
4056
4057 let invalid_search_rng_epsilon_low = SpannIndexConfig {
4059 search_rng_epsilon: Some(4.0),
4060 ..Default::default()
4061 };
4062 assert!(invalid_search_rng_epsilon_low.validate().is_err());
4063
4064 let invalid_search_rng_epsilon_high = SpannIndexConfig {
4065 search_rng_epsilon: Some(11.0),
4066 ..Default::default()
4067 };
4068 assert!(invalid_search_rng_epsilon_high.validate().is_err());
4069
4070 let valid_search_rng_epsilon = SpannIndexConfig {
4072 search_rng_epsilon: Some(7.5),
4073 ..Default::default()
4074 };
4075 assert!(valid_search_rng_epsilon.validate().is_ok());
4076
4077 let invalid_write_rng_factor_low = SpannIndexConfig {
4079 write_rng_factor: Some(0.9),
4080 ..Default::default()
4081 };
4082 assert!(invalid_write_rng_factor_low.validate().is_err());
4083
4084 let invalid_write_rng_factor_high = SpannIndexConfig {
4085 write_rng_factor: Some(1.1),
4086 ..Default::default()
4087 };
4088 assert!(invalid_write_rng_factor_high.validate().is_err());
4089
4090 let valid_write_rng_factor = SpannIndexConfig {
4092 write_rng_factor: Some(1.0),
4093 ..Default::default()
4094 };
4095 assert!(valid_write_rng_factor.validate().is_ok());
4096
4097 let invalid_write_rng_epsilon_low = SpannIndexConfig {
4099 write_rng_epsilon: Some(4.0),
4100 ..Default::default()
4101 };
4102 assert!(invalid_write_rng_epsilon_low.validate().is_err());
4103
4104 let invalid_write_rng_epsilon_high = SpannIndexConfig {
4105 write_rng_epsilon: Some(11.0),
4106 ..Default::default()
4107 };
4108 assert!(invalid_write_rng_epsilon_high.validate().is_err());
4109
4110 let valid_write_rng_epsilon = SpannIndexConfig {
4112 write_rng_epsilon: Some(7.5),
4113 ..Default::default()
4114 };
4115 assert!(valid_write_rng_epsilon.validate().is_ok());
4116
4117 let invalid_num_samples_kmeans = SpannIndexConfig {
4119 num_samples_kmeans: Some(1500),
4120 ..Default::default()
4121 };
4122 assert!(invalid_num_samples_kmeans.validate().is_err());
4123
4124 let valid_num_samples_kmeans = SpannIndexConfig {
4126 num_samples_kmeans: Some(500),
4127 ..Default::default()
4128 };
4129 assert!(valid_num_samples_kmeans.validate().is_ok());
4130
4131 let invalid_initial_lambda_high = SpannIndexConfig {
4133 initial_lambda: Some(150.0),
4134 ..Default::default()
4135 };
4136 assert!(invalid_initial_lambda_high.validate().is_err());
4137
4138 let invalid_initial_lambda_low = SpannIndexConfig {
4139 initial_lambda: Some(50.0),
4140 ..Default::default()
4141 };
4142 assert!(invalid_initial_lambda_low.validate().is_err());
4143
4144 let valid_initial_lambda = SpannIndexConfig {
4146 initial_lambda: Some(100.0),
4147 ..Default::default()
4148 };
4149 assert!(valid_initial_lambda.validate().is_ok());
4150
4151 let all_none_config = SpannIndexConfig {
4153 ..Default::default()
4154 };
4155 assert!(all_none_config.validate().is_ok());
4156 }
4157
4158 #[test]
4159 fn test_builder_pattern_crud_workflow() {
4160 let schema = Schema::new_default(KnnIndex::Hnsw)
4164 .create_index(
4165 None,
4166 IndexConfig::Vector(VectorIndexConfig {
4167 space: Some(Space::Cosine),
4168 embedding_function: None,
4169 source_key: None,
4170 hnsw: Some(HnswIndexConfig {
4171 ef_construction: Some(200),
4172 max_neighbors: Some(32),
4173 ef_search: Some(50),
4174 num_threads: None,
4175 batch_size: None,
4176 sync_threshold: None,
4177 resize_factor: None,
4178 }),
4179 spann: None,
4180 }),
4181 )
4182 .expect("vector config should succeed")
4183 .create_index(
4184 Some("category"),
4185 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4186 )
4187 .expect("string inverted on key should succeed")
4188 .create_index(
4189 Some("year"),
4190 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4191 )
4192 .expect("int inverted on key should succeed")
4193 .create_index(
4194 Some("rating"),
4195 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
4196 )
4197 .expect("float inverted on key should succeed")
4198 .create_index(
4199 Some("is_active"),
4200 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
4201 )
4202 .expect("bool inverted on key should succeed");
4203
4204 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4207 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4208 assert!(embedding.float_list.is_some());
4209 let vector_index = embedding
4210 .float_list
4211 .as_ref()
4212 .unwrap()
4213 .vector_index
4214 .as_ref()
4215 .unwrap();
4216 assert!(vector_index.enabled);
4217 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4218 assert_eq!(
4219 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
4220 Some(200)
4221 );
4222
4223 assert!(schema.keys.contains_key("category"));
4225 assert!(schema.keys.contains_key("year"));
4226 assert!(schema.keys.contains_key("rating"));
4227 assert!(schema.keys.contains_key("is_active"));
4228
4229 let category = schema.keys.get("category").unwrap();
4231 assert!(category.string.is_some());
4232 let string_idx = category
4233 .string
4234 .as_ref()
4235 .unwrap()
4236 .string_inverted_index
4237 .as_ref()
4238 .unwrap();
4239 assert!(string_idx.enabled);
4240
4241 let year = schema.keys.get("year").unwrap();
4243 assert!(year.int.is_some());
4244 let int_idx = year
4245 .int
4246 .as_ref()
4247 .unwrap()
4248 .int_inverted_index
4249 .as_ref()
4250 .unwrap();
4251 assert!(int_idx.enabled);
4252
4253 let schema = schema
4255 .delete_index(
4256 Some("category"),
4257 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4258 )
4259 .expect("delete string inverted should succeed")
4260 .delete_index(
4261 Some("year"),
4262 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4263 )
4264 .expect("delete int inverted should succeed");
4265
4266 let category = schema.keys.get("category").unwrap();
4268 let string_idx = category
4269 .string
4270 .as_ref()
4271 .unwrap()
4272 .string_inverted_index
4273 .as_ref()
4274 .unwrap();
4275 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
4278 let int_idx = year
4279 .int
4280 .as_ref()
4281 .unwrap()
4282 .int_inverted_index
4283 .as_ref()
4284 .unwrap();
4285 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
4289 let float_idx = rating
4290 .float
4291 .as_ref()
4292 .unwrap()
4293 .float_inverted_index
4294 .as_ref()
4295 .unwrap();
4296 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
4299 let bool_idx = is_active
4300 .boolean
4301 .as_ref()
4302 .unwrap()
4303 .bool_inverted_index
4304 .as_ref()
4305 .unwrap();
4306 assert!(bool_idx.enabled); }
4308
4309 #[test]
4310 fn test_builder_create_index_validation_errors() {
4311 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4318 Some("my_vectors"),
4319 IndexConfig::Vector(VectorIndexConfig {
4320 space: Some(Space::L2),
4321 embedding_function: None,
4322 source_key: None,
4323 hnsw: None,
4324 spann: None,
4325 }),
4326 );
4327 assert!(result.is_err());
4328 assert!(matches!(
4329 result.unwrap_err(),
4330 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
4331 ));
4332
4333 let result = Schema::new_default(KnnIndex::Hnsw)
4335 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
4336 assert!(result.is_err());
4337 assert!(matches!(
4338 result.unwrap_err(),
4339 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
4340 ));
4341
4342 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4344 Some(DOCUMENT_KEY),
4345 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4346 );
4347 assert!(result.is_err());
4348 assert!(matches!(
4349 result.unwrap_err(),
4350 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4351 ));
4352
4353 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4355 Some(EMBEDDING_KEY),
4356 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4357 );
4358 assert!(result.is_err());
4359 assert!(matches!(
4360 result.unwrap_err(),
4361 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4362 ));
4363
4364 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4366 None,
4367 IndexConfig::SparseVector(SparseVectorIndexConfig {
4368 embedding_function: None,
4369 source_key: None,
4370 bm25: None,
4371 }),
4372 );
4373 assert!(result.is_err());
4374 assert!(matches!(
4375 result.unwrap_err(),
4376 SchemaBuilderError::SparseVectorRequiresKey
4377 ));
4378
4379 let result = Schema::new_default(KnnIndex::Hnsw)
4381 .create_index(
4382 Some("sparse1"),
4383 IndexConfig::SparseVector(SparseVectorIndexConfig {
4384 embedding_function: None,
4385 source_key: None,
4386 bm25: None,
4387 }),
4388 )
4389 .expect("first sparse should succeed")
4390 .create_index(
4391 Some("sparse2"),
4392 IndexConfig::SparseVector(SparseVectorIndexConfig {
4393 embedding_function: None,
4394 source_key: None,
4395 bm25: None,
4396 }),
4397 );
4398 assert!(result.is_err());
4399 assert!(matches!(
4400 result.unwrap_err(),
4401 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
4402 ));
4403 }
4404
4405 #[test]
4406 fn test_builder_delete_index_validation_errors() {
4407 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4413 Some(EMBEDDING_KEY),
4414 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4415 );
4416 assert!(result.is_err());
4417 assert!(matches!(
4418 result.unwrap_err(),
4419 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4420 ));
4421
4422 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4424 Some(DOCUMENT_KEY),
4425 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4426 );
4427 assert!(result.is_err());
4428 assert!(matches!(
4429 result.unwrap_err(),
4430 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4431 ));
4432
4433 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4435 None,
4436 IndexConfig::Vector(VectorIndexConfig {
4437 space: None,
4438 embedding_function: None,
4439 source_key: None,
4440 hnsw: None,
4441 spann: None,
4442 }),
4443 );
4444 assert!(result.is_err());
4445 assert!(matches!(
4446 result.unwrap_err(),
4447 SchemaBuilderError::VectorIndexDeletionNotSupported
4448 ));
4449
4450 let result = Schema::new_default(KnnIndex::Hnsw)
4452 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
4453 assert!(result.is_err());
4454 assert!(matches!(
4455 result.unwrap_err(),
4456 SchemaBuilderError::FtsIndexDeletionNotSupported
4457 ));
4458
4459 let result = Schema::new_default(KnnIndex::Hnsw)
4461 .create_index(
4462 Some("sparse"),
4463 IndexConfig::SparseVector(SparseVectorIndexConfig {
4464 embedding_function: None,
4465 source_key: None,
4466 bm25: None,
4467 }),
4468 )
4469 .expect("create should succeed")
4470 .delete_index(
4471 Some("sparse"),
4472 IndexConfig::SparseVector(SparseVectorIndexConfig {
4473 embedding_function: None,
4474 source_key: None,
4475 bm25: None,
4476 }),
4477 );
4478 assert!(result.is_err());
4479 assert!(matches!(
4480 result.unwrap_err(),
4481 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
4482 ));
4483 }
4484
4485 #[test]
4486 fn test_builder_pattern_chaining() {
4487 let schema = Schema::new_default(KnnIndex::Hnsw)
4489 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
4490 .unwrap()
4491 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4492 .unwrap()
4493 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
4494 .unwrap()
4495 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
4496 .unwrap()
4497 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4498 .unwrap()
4499 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
4500 .unwrap();
4501
4502 assert!(
4504 schema
4505 .keys
4506 .get("tag1")
4507 .unwrap()
4508 .string
4509 .as_ref()
4510 .unwrap()
4511 .string_inverted_index
4512 .as_ref()
4513 .unwrap()
4514 .enabled
4515 );
4516
4517 assert!(
4519 !schema
4520 .keys
4521 .get("tag2")
4522 .unwrap()
4523 .string
4524 .as_ref()
4525 .unwrap()
4526 .string_inverted_index
4527 .as_ref()
4528 .unwrap()
4529 .enabled
4530 );
4531
4532 assert!(
4534 schema
4535 .keys
4536 .get("tag3")
4537 .unwrap()
4538 .string
4539 .as_ref()
4540 .unwrap()
4541 .string_inverted_index
4542 .as_ref()
4543 .unwrap()
4544 .enabled
4545 );
4546
4547 assert!(
4549 schema
4550 .keys
4551 .get("count")
4552 .unwrap()
4553 .int
4554 .as_ref()
4555 .unwrap()
4556 .int_inverted_index
4557 .as_ref()
4558 .unwrap()
4559 .enabled
4560 );
4561
4562 assert!(
4564 schema
4565 .keys
4566 .get("score")
4567 .unwrap()
4568 .float
4569 .as_ref()
4570 .unwrap()
4571 .float_inverted_index
4572 .as_ref()
4573 .unwrap()
4574 .enabled
4575 );
4576 }
4577
4578 #[test]
4579 fn test_schema_default_matches_python() {
4580 let schema = Schema::default();
4582
4583 assert!(schema.defaults.string.is_some());
4589 let string = schema.defaults.string.as_ref().unwrap();
4590 assert!(!string.fts_index.as_ref().unwrap().enabled);
4591 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
4592
4593 assert!(schema.defaults.float_list.is_some());
4595 let float_list = schema.defaults.float_list.as_ref().unwrap();
4596 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
4597 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
4598 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
4602
4603 assert!(schema.defaults.sparse_vector.is_some());
4605 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
4606 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
4607
4608 assert!(schema.defaults.int.is_some());
4610 assert!(
4611 schema
4612 .defaults
4613 .int
4614 .as_ref()
4615 .unwrap()
4616 .int_inverted_index
4617 .as_ref()
4618 .unwrap()
4619 .enabled
4620 );
4621
4622 assert!(schema.defaults.float.is_some());
4624 assert!(
4625 schema
4626 .defaults
4627 .float
4628 .as_ref()
4629 .unwrap()
4630 .float_inverted_index
4631 .as_ref()
4632 .unwrap()
4633 .enabled
4634 );
4635
4636 assert!(schema.defaults.boolean.is_some());
4638 assert!(
4639 schema
4640 .defaults
4641 .boolean
4642 .as_ref()
4643 .unwrap()
4644 .bool_inverted_index
4645 .as_ref()
4646 .unwrap()
4647 .enabled
4648 );
4649
4650 assert!(schema.keys.contains_key(DOCUMENT_KEY));
4656 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
4657 assert!(doc.string.is_some());
4658 assert!(
4659 doc.string
4660 .as_ref()
4661 .unwrap()
4662 .fts_index
4663 .as_ref()
4664 .unwrap()
4665 .enabled
4666 );
4667 assert!(
4668 !doc.string
4669 .as_ref()
4670 .unwrap()
4671 .string_inverted_index
4672 .as_ref()
4673 .unwrap()
4674 .enabled
4675 );
4676
4677 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4679 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4680 assert!(embedding.float_list.is_some());
4681 let vec_idx = embedding
4682 .float_list
4683 .as_ref()
4684 .unwrap()
4685 .vector_index
4686 .as_ref()
4687 .unwrap();
4688 assert!(vec_idx.enabled);
4689 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
4690 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
4696 }
4697
4698 #[test]
4699 fn test_schema_default_works_with_builder() {
4700 let schema = Schema::default()
4702 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
4703 .expect("should succeed");
4704
4705 assert!(schema.keys.contains_key("category"));
4707 assert!(schema.keys.contains_key(DOCUMENT_KEY));
4708 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4709 assert_eq!(schema.keys.len(), 3);
4710 }
4711}