1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8 EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
9 UpdateVectorIndexConfiguration, VectorIndexConfiguration,
10};
11use crate::hnsw_configuration::Space;
12use crate::metadata::{MetadataComparison, MetadataValueType, Where};
13use crate::operator::QueryVector;
14use crate::{
15 default_batch_size, default_construction_ef, default_construction_ef_spann,
16 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
17 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
18 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
19 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
20 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
21 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
22 HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
23 InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
24};
25
26impl ChromaError for SchemaError {
27 fn code(&self) -> ErrorCodes {
28 ErrorCodes::Internal
29 }
30}
31
32#[derive(Debug, Error)]
33pub enum SchemaError {
34 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
35 MissingIndexConfiguration { key: String, value_type: String },
36 #[error("Schema reconciliation failed: {reason}")]
37 InvalidSchema { reason: String },
38 #[error("Cannot set both collection config and schema simultaneously")]
39 ConfigAndSchemaConflict,
40 #[error("Cannot merge schemas with differing defaults")]
41 DefaultsMismatch,
42 #[error("Conflicting configuration for {context}")]
43 ConfigurationConflict { context: String },
44 #[error("Invalid HNSW configuration: {0}")]
45 InvalidHnswConfig(validator::ValidationErrors),
46 #[error("Invalid SPANN configuration: {0}")]
47 InvalidSpannConfig(validator::ValidationErrors),
48 #[error(transparent)]
49 Builder(#[from] SchemaBuilderError),
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaBuilderError {
54 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
55 VectorIndexMustBeGlobal { key: String },
56 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
57 FtsIndexMustBeGlobal { key: String },
58 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
59 SpecialKeyModificationNotAllowed { key: String },
60 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
61 SparseVectorRequiresKey,
62 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
63 MultipleSparseVectorIndexes { existing_key: String },
64 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
65 VectorIndexDeletionNotSupported,
66 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
67 FtsIndexDeletionNotSupported,
68 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
69 SparseVectorIndexDeletionNotSupported,
70}
71
72#[derive(Debug, Error)]
73pub enum FilterValidationError {
74 #[error(
75 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
76 )]
77 IndexingDisabled {
78 key: String,
79 value_type: MetadataValueType,
80 },
81 #[error(transparent)]
82 Schema(#[from] SchemaError),
83}
84
85impl ChromaError for SchemaBuilderError {
86 fn code(&self) -> ErrorCodes {
87 ErrorCodes::InvalidArgument
88 }
89}
90
91impl ChromaError for FilterValidationError {
92 fn code(&self) -> ErrorCodes {
93 match self {
94 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
95 FilterValidationError::Schema(_) => ErrorCodes::Internal,
96 }
97 }
98}
99
100pub const STRING_VALUE_NAME: &str = "string";
107pub const INT_VALUE_NAME: &str = "int";
108pub const BOOL_VALUE_NAME: &str = "bool";
109pub const FLOAT_VALUE_NAME: &str = "float";
110pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
111pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
112
113pub const FTS_INDEX_NAME: &str = "fts_index";
115pub const VECTOR_INDEX_NAME: &str = "vector_index";
116pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
117pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
118pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
119pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
120pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
121
122pub const DOCUMENT_KEY: &str = "#document";
124pub const EMBEDDING_KEY: &str = "#embedding";
125
126#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
135#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
136pub struct Schema {
137 pub defaults: ValueTypes,
139 #[serde(rename = "keys", alias = "key_overrides")]
142 pub keys: HashMap<String, ValueTypes>,
143}
144
145impl Schema {
146 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
147 if let Some(vector_update) = &configuration.vector_index {
148 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
149 Self::apply_vector_index_update(default_vector_index, vector_update);
150 }
151 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
152 Self::apply_vector_index_update(embedding_vector_index, vector_update);
153 }
154 }
155
156 if let Some(embedding_function) = configuration.embedding_function.as_ref() {
157 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
158 default_vector_index.config.embedding_function = Some(embedding_function.clone());
159 }
160 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
161 embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
162 }
163 }
164 }
165
166 fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
167 self.defaults
168 .float_list
169 .as_mut()
170 .and_then(|float_list| float_list.vector_index.as_mut())
171 }
172
173 fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
174 self.keys
175 .get_mut(EMBEDDING_KEY)
176 .and_then(|value_types| value_types.float_list.as_mut())
177 .and_then(|float_list| float_list.vector_index.as_mut())
178 }
179
180 fn apply_vector_index_update(
181 vector_index: &mut VectorIndexType,
182 update: &UpdateVectorIndexConfiguration,
183 ) {
184 match update {
185 UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
186 if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
187 if let Some(ef_search) = hnsw_update.ef_search {
188 hnsw_config.ef_search = Some(ef_search);
189 }
190 if let Some(max_neighbors) = hnsw_update.max_neighbors {
191 hnsw_config.max_neighbors = Some(max_neighbors);
192 }
193 if let Some(num_threads) = hnsw_update.num_threads {
194 hnsw_config.num_threads = Some(num_threads);
195 }
196 if let Some(resize_factor) = hnsw_update.resize_factor {
197 hnsw_config.resize_factor = Some(resize_factor);
198 }
199 if let Some(sync_threshold) = hnsw_update.sync_threshold {
200 hnsw_config.sync_threshold = Some(sync_threshold);
201 }
202 if let Some(batch_size) = hnsw_update.batch_size {
203 hnsw_config.batch_size = Some(batch_size);
204 }
205 }
206 }
207 UpdateVectorIndexConfiguration::Hnsw(None) => {}
208 UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
209 if let Some(spann_config) = vector_index.config.spann.as_mut() {
210 if let Some(search_nprobe) = spann_update.search_nprobe {
211 spann_config.search_nprobe = Some(search_nprobe);
212 }
213 if let Some(ef_search) = spann_update.ef_search {
214 spann_config.ef_search = Some(ef_search);
215 }
216 }
217 }
218 UpdateVectorIndexConfiguration::Spann(None) => {}
219 }
220 }
221
222 pub fn is_sparse_index_enabled(&self) -> bool {
223 let defaults_enabled = self
224 .defaults
225 .sparse_vector
226 .as_ref()
227 .and_then(|sv| sv.sparse_vector_index.as_ref())
228 .is_some_and(|idx| idx.enabled);
229 let key_enabled = self.keys.values().any(|value_types| {
230 value_types
231 .sparse_vector
232 .as_ref()
233 .and_then(|sv| sv.sparse_vector_index.as_ref())
234 .is_some_and(|idx| idx.enabled)
235 });
236 defaults_enabled || key_enabled
237 }
238}
239
240impl Default for Schema {
241 fn default() -> Self {
258 let defaults = ValueTypes {
260 string: Some(StringValueType {
261 fts_index: Some(FtsIndexType {
262 enabled: false,
263 config: FtsIndexConfig {},
264 }),
265 string_inverted_index: Some(StringInvertedIndexType {
266 enabled: true,
267 config: StringInvertedIndexConfig {},
268 }),
269 }),
270 float_list: Some(FloatListValueType {
271 vector_index: Some(VectorIndexType {
272 enabled: false,
273 config: VectorIndexConfig {
274 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
276 source_key: None,
277 hnsw: None, spann: None, },
280 }),
281 }),
282 sparse_vector: Some(SparseVectorValueType {
283 sparse_vector_index: Some(SparseVectorIndexType {
284 enabled: false,
285 config: SparseVectorIndexConfig {
286 embedding_function: None,
287 source_key: None,
288 bm25: None,
289 },
290 }),
291 }),
292 int: Some(IntValueType {
293 int_inverted_index: Some(IntInvertedIndexType {
294 enabled: true,
295 config: IntInvertedIndexConfig {},
296 }),
297 }),
298 float: Some(FloatValueType {
299 float_inverted_index: Some(FloatInvertedIndexType {
300 enabled: true,
301 config: FloatInvertedIndexConfig {},
302 }),
303 }),
304 boolean: Some(BoolValueType {
305 bool_inverted_index: Some(BoolInvertedIndexType {
306 enabled: true,
307 config: BoolInvertedIndexConfig {},
308 }),
309 }),
310 };
311
312 let mut keys = HashMap::new();
314
315 keys.insert(
317 DOCUMENT_KEY.to_string(),
318 ValueTypes {
319 string: Some(StringValueType {
320 fts_index: Some(FtsIndexType {
321 enabled: true,
322 config: FtsIndexConfig {},
323 }),
324 string_inverted_index: Some(StringInvertedIndexType {
325 enabled: false,
326 config: StringInvertedIndexConfig {},
327 }),
328 }),
329 ..Default::default()
330 },
331 );
332
333 keys.insert(
335 EMBEDDING_KEY.to_string(),
336 ValueTypes {
337 float_list: Some(FloatListValueType {
338 vector_index: Some(VectorIndexType {
339 enabled: true,
340 config: VectorIndexConfig {
341 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
343 source_key: Some(DOCUMENT_KEY.to_string()),
344 hnsw: None, spann: None, },
347 }),
348 }),
349 ..Default::default()
350 },
351 );
352
353 Schema { defaults, keys }
354 }
355}
356
357pub fn is_embedding_function_default(
358 embedding_function: &Option<EmbeddingFunctionConfiguration>,
359) -> bool {
360 match embedding_function {
361 None => true,
362 Some(embedding_function) => embedding_function.is_default(),
363 }
364}
365
366pub fn is_space_default(space: &Option<Space>) -> bool {
368 match space {
369 None => true, Some(s) => *s == default_space(), }
372}
373
374pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
376 hnsw_config.ef_construction == Some(default_construction_ef())
377 && hnsw_config.ef_search == Some(default_search_ef())
378 && hnsw_config.max_neighbors == Some(default_m())
379 && hnsw_config.num_threads == Some(default_num_threads())
380 && hnsw_config.batch_size == Some(default_batch_size())
381 && hnsw_config.sync_threshold == Some(default_sync_threshold())
382 && hnsw_config.resize_factor == Some(default_resize_factor())
383}
384
385#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
392#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
393pub struct ValueTypes {
394 #[serde(
395 rename = "string",
396 alias = "#string",
397 skip_serializing_if = "Option::is_none"
398 )] pub string: Option<StringValueType>,
400
401 #[serde(
402 rename = "float_list",
403 alias = "#float_list",
404 skip_serializing_if = "Option::is_none"
405 )]
406 pub float_list: Option<FloatListValueType>,
408
409 #[serde(
410 rename = "sparse_vector",
411 alias = "#sparse_vector",
412 skip_serializing_if = "Option::is_none"
413 )]
414 pub sparse_vector: Option<SparseVectorValueType>,
416
417 #[serde(
418 rename = "int",
419 alias = "#int",
420 skip_serializing_if = "Option::is_none"
421 )] pub int: Option<IntValueType>,
423
424 #[serde(
425 rename = "float",
426 alias = "#float",
427 skip_serializing_if = "Option::is_none"
428 )] pub float: Option<FloatValueType>,
430
431 #[serde(
432 rename = "bool",
433 alias = "#bool",
434 skip_serializing_if = "Option::is_none"
435 )] pub boolean: Option<BoolValueType>,
437}
438
439#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
441#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
442pub struct StringValueType {
443 #[serde(
444 rename = "fts_index",
445 alias = "$fts_index",
446 skip_serializing_if = "Option::is_none"
447 )] pub fts_index: Option<FtsIndexType>,
449
450 #[serde(
451 rename = "string_inverted_index", alias = "$string_inverted_index",
453 skip_serializing_if = "Option::is_none"
454 )]
455 pub string_inverted_index: Option<StringInvertedIndexType>,
456}
457
458#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
460#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
461pub struct FloatListValueType {
462 #[serde(
463 rename = "vector_index",
464 alias = "$vector_index",
465 skip_serializing_if = "Option::is_none"
466 )] pub vector_index: Option<VectorIndexType>,
468}
469
470#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
472#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
473pub struct SparseVectorValueType {
474 #[serde(
475 rename = "sparse_vector_index", alias = "$sparse_vector_index",
477 skip_serializing_if = "Option::is_none"
478 )]
479 pub sparse_vector_index: Option<SparseVectorIndexType>,
480}
481
482#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
484#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
485pub struct IntValueType {
486 #[serde(
487 rename = "int_inverted_index",
488 alias = "$int_inverted_index",
489 skip_serializing_if = "Option::is_none"
490 )]
491 pub int_inverted_index: Option<IntInvertedIndexType>,
493}
494
495#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
497#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
498pub struct FloatValueType {
499 #[serde(
500 rename = "float_inverted_index", alias = "$float_inverted_index",
502 skip_serializing_if = "Option::is_none"
503 )]
504 pub float_inverted_index: Option<FloatInvertedIndexType>,
505}
506
507#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
509#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
510pub struct BoolValueType {
511 #[serde(
512 rename = "bool_inverted_index", alias = "$bool_inverted_index",
514 skip_serializing_if = "Option::is_none"
515 )]
516 pub bool_inverted_index: Option<BoolInvertedIndexType>,
517}
518
519#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
521#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
522pub struct FtsIndexType {
523 pub enabled: bool,
524 pub config: FtsIndexConfig,
525}
526
527#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
528#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
529pub struct VectorIndexType {
530 pub enabled: bool,
531 pub config: VectorIndexConfig,
532}
533
534#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
535#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
536pub struct SparseVectorIndexType {
537 pub enabled: bool,
538 pub config: SparseVectorIndexConfig,
539}
540
541#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
542#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
543pub struct StringInvertedIndexType {
544 pub enabled: bool,
545 pub config: StringInvertedIndexConfig,
546}
547
548#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
549#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
550pub struct IntInvertedIndexType {
551 pub enabled: bool,
552 pub config: IntInvertedIndexConfig,
553}
554
555#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
556#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
557pub struct FloatInvertedIndexType {
558 pub enabled: bool,
559 pub config: FloatInvertedIndexConfig,
560}
561
562#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
563#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
564pub struct BoolInvertedIndexType {
565 pub enabled: bool,
566 pub config: BoolInvertedIndexConfig,
567}
568
569impl Schema {
570 pub fn new_default(default_knn_index: KnnIndex) -> Self {
572 let vector_config = VectorIndexType {
574 enabled: false,
575 config: VectorIndexConfig {
576 space: Some(default_space()),
577 embedding_function: None,
578 source_key: None,
579 hnsw: match default_knn_index {
580 KnnIndex::Hnsw => Some(HnswIndexConfig {
581 ef_construction: Some(default_construction_ef()),
582 max_neighbors: Some(default_m()),
583 ef_search: Some(default_search_ef()),
584 num_threads: Some(default_num_threads()),
585 batch_size: Some(default_batch_size()),
586 sync_threshold: Some(default_sync_threshold()),
587 resize_factor: Some(default_resize_factor()),
588 }),
589 KnnIndex::Spann => None,
590 },
591 spann: match default_knn_index {
592 KnnIndex::Hnsw => None,
593 KnnIndex::Spann => Some(SpannIndexConfig {
594 search_nprobe: Some(default_search_nprobe()),
595 search_rng_factor: Some(default_search_rng_factor()),
596 search_rng_epsilon: Some(default_search_rng_epsilon()),
597 nreplica_count: Some(default_nreplica_count()),
598 write_rng_factor: Some(default_write_rng_factor()),
599 write_rng_epsilon: Some(default_write_rng_epsilon()),
600 split_threshold: Some(default_split_threshold()),
601 num_samples_kmeans: Some(default_num_samples_kmeans()),
602 initial_lambda: Some(default_initial_lambda()),
603 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
604 merge_threshold: Some(default_merge_threshold()),
605 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
606 write_nprobe: Some(default_write_nprobe()),
607 ef_construction: Some(default_construction_ef_spann()),
608 ef_search: Some(default_search_ef_spann()),
609 max_neighbors: Some(default_m_spann()),
610 }),
611 },
612 },
613 };
614
615 let defaults = ValueTypes {
617 string: Some(StringValueType {
618 string_inverted_index: Some(StringInvertedIndexType {
619 enabled: true,
620 config: StringInvertedIndexConfig {},
621 }),
622 fts_index: Some(FtsIndexType {
623 enabled: false,
624 config: FtsIndexConfig {},
625 }),
626 }),
627 float: Some(FloatValueType {
628 float_inverted_index: Some(FloatInvertedIndexType {
629 enabled: true,
630 config: FloatInvertedIndexConfig {},
631 }),
632 }),
633 int: Some(IntValueType {
634 int_inverted_index: Some(IntInvertedIndexType {
635 enabled: true,
636 config: IntInvertedIndexConfig {},
637 }),
638 }),
639 boolean: Some(BoolValueType {
640 bool_inverted_index: Some(BoolInvertedIndexType {
641 enabled: true,
642 config: BoolInvertedIndexConfig {},
643 }),
644 }),
645 float_list: Some(FloatListValueType {
646 vector_index: Some(vector_config),
647 }),
648 sparse_vector: Some(SparseVectorValueType {
649 sparse_vector_index: Some(SparseVectorIndexType {
650 enabled: false,
651 config: SparseVectorIndexConfig {
652 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
653 source_key: None,
654 bm25: Some(false),
655 },
656 }),
657 }),
658 };
659
660 let mut keys = HashMap::new();
662
663 let embedding_defaults = ValueTypes {
665 float_list: Some(FloatListValueType {
666 vector_index: Some(VectorIndexType {
667 enabled: true,
668 config: VectorIndexConfig {
669 space: Some(default_space()),
670 embedding_function: None,
671 source_key: Some(DOCUMENT_KEY.to_string()),
672 hnsw: match default_knn_index {
673 KnnIndex::Hnsw => Some(HnswIndexConfig {
674 ef_construction: Some(default_construction_ef()),
675 max_neighbors: Some(default_m()),
676 ef_search: Some(default_search_ef()),
677 num_threads: Some(default_num_threads()),
678 batch_size: Some(default_batch_size()),
679 sync_threshold: Some(default_sync_threshold()),
680 resize_factor: Some(default_resize_factor()),
681 }),
682 KnnIndex::Spann => None,
683 },
684 spann: match default_knn_index {
685 KnnIndex::Hnsw => None,
686 KnnIndex::Spann => Some(SpannIndexConfig {
687 search_nprobe: Some(default_search_nprobe()),
688 search_rng_factor: Some(default_search_rng_factor()),
689 search_rng_epsilon: Some(default_search_rng_epsilon()),
690 nreplica_count: Some(default_nreplica_count()),
691 write_rng_factor: Some(default_write_rng_factor()),
692 write_rng_epsilon: Some(default_write_rng_epsilon()),
693 split_threshold: Some(default_split_threshold()),
694 num_samples_kmeans: Some(default_num_samples_kmeans()),
695 initial_lambda: Some(default_initial_lambda()),
696 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
697 merge_threshold: Some(default_merge_threshold()),
698 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
699 write_nprobe: Some(default_write_nprobe()),
700 ef_construction: Some(default_construction_ef_spann()),
701 ef_search: Some(default_search_ef_spann()),
702 max_neighbors: Some(default_m_spann()),
703 }),
704 },
705 },
706 }),
707 }),
708 ..Default::default()
709 };
710 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
711
712 let document_defaults = ValueTypes {
714 string: Some(StringValueType {
715 fts_index: Some(FtsIndexType {
716 enabled: true,
717 config: FtsIndexConfig {},
718 }),
719 string_inverted_index: Some(StringInvertedIndexType {
720 enabled: false,
721 config: StringInvertedIndexConfig {},
722 }),
723 }),
724 ..Default::default()
725 };
726 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
727
728 Schema { defaults, keys }
729 }
730
731 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
732 let to_internal = |vector_index: &VectorIndexType| {
733 let space = vector_index.config.space.clone();
734 vector_index
735 .config
736 .spann
737 .clone()
738 .map(|config| (space.as_ref(), &config).into())
739 };
740
741 self.keys
742 .get(EMBEDDING_KEY)
743 .and_then(|value_types| value_types.float_list.as_ref())
744 .and_then(|float_list| float_list.vector_index.as_ref())
745 .and_then(to_internal)
746 .or_else(|| {
747 self.defaults
748 .float_list
749 .as_ref()
750 .and_then(|float_list| float_list.vector_index.as_ref())
751 .and_then(to_internal)
752 })
753 }
754
755 pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
756 let to_internal = |vector_index: &VectorIndexType| {
757 if vector_index.config.spann.is_some() {
758 return None;
759 }
760 let space = vector_index.config.space.as_ref();
761 let hnsw_config = vector_index.config.hnsw.as_ref();
762 Some((space, hnsw_config).into())
763 };
764
765 self.keys
766 .get(EMBEDDING_KEY)
767 .and_then(|value_types| value_types.float_list.as_ref())
768 .and_then(|float_list| float_list.vector_index.as_ref())
769 .and_then(to_internal)
770 .or_else(|| {
771 self.defaults
772 .float_list
773 .as_ref()
774 .and_then(|float_list| float_list.vector_index.as_ref())
775 .and_then(to_internal)
776 })
777 }
778
779 pub fn get_internal_hnsw_config_with_legacy_fallback(
780 &self,
781 segment: &Segment,
782 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
783 if let Some(config) = self.get_internal_hnsw_config() {
784 let config_from_metadata =
785 InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
786
787 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
788 return Ok(Some(config_from_metadata));
789 }
790
791 return Ok(Some(config));
792 }
793
794 Ok(None)
795 }
796
797 pub fn reconcile_with_defaults(
804 user_schema: Option<&Schema>,
805 knn_index: KnnIndex,
806 ) -> Result<Self, SchemaError> {
807 let default_schema = Schema::new_default(knn_index);
808
809 match user_schema {
810 Some(user) => {
811 let merged_defaults =
813 Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
814
815 let mut merged_keys = default_schema.keys.clone();
817 for (key, user_value_types) in &user.keys {
818 if let Some(default_value_types) = merged_keys.get(key) {
819 let merged_value_types = Self::merge_value_types(
821 default_value_types,
822 user_value_types,
823 knn_index,
824 )?;
825 merged_keys.insert(key.clone(), merged_value_types);
826 } else {
827 merged_keys.insert(key.clone(), user_value_types.clone());
829 }
830 }
831
832 Ok(Schema {
833 defaults: merged_defaults,
834 keys: merged_keys,
835 })
836 }
837 None => Ok(default_schema),
838 }
839 }
840
841 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
843 if self.defaults != other.defaults {
844 return Err(SchemaError::DefaultsMismatch);
845 }
846
847 let mut keys = self.keys.clone();
848
849 for (key, other_value_types) in &other.keys {
850 if let Some(existing) = keys.get(key).cloned() {
851 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
852 keys.insert(key.clone(), merged);
853 } else {
854 keys.insert(key.clone(), other_value_types.clone());
855 }
856 }
857
858 Ok(Schema {
859 defaults: self.defaults.clone(),
860 keys,
861 })
862 }
863
864 fn merge_override_value_types(
865 key: &str,
866 left: &ValueTypes,
867 right: &ValueTypes,
868 ) -> Result<ValueTypes, SchemaError> {
869 Ok(ValueTypes {
870 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
871 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
872 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
873 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
874 float_list: Self::merge_float_list_override(
875 key,
876 left.float_list.as_ref(),
877 right.float_list.as_ref(),
878 )?,
879 sparse_vector: Self::merge_sparse_vector_override(
880 key,
881 left.sparse_vector.as_ref(),
882 right.sparse_vector.as_ref(),
883 )?,
884 })
885 }
886
887 fn merge_string_override(
888 key: &str,
889 left: Option<&StringValueType>,
890 right: Option<&StringValueType>,
891 ) -> Result<Option<StringValueType>, SchemaError> {
892 match (left, right) {
893 (Some(l), Some(r)) => Ok(Some(StringValueType {
894 string_inverted_index: Self::merge_index_or_error(
895 l.string_inverted_index.as_ref(),
896 r.string_inverted_index.as_ref(),
897 &format!("key '{key}' string.string_inverted_index"),
898 )?,
899 fts_index: Self::merge_index_or_error(
900 l.fts_index.as_ref(),
901 r.fts_index.as_ref(),
902 &format!("key '{key}' string.fts_index"),
903 )?,
904 })),
905 (Some(l), None) => Ok(Some(l.clone())),
906 (None, Some(r)) => Ok(Some(r.clone())),
907 (None, None) => Ok(None),
908 }
909 }
910
911 fn merge_float_override(
912 key: &str,
913 left: Option<&FloatValueType>,
914 right: Option<&FloatValueType>,
915 ) -> Result<Option<FloatValueType>, SchemaError> {
916 match (left, right) {
917 (Some(l), Some(r)) => Ok(Some(FloatValueType {
918 float_inverted_index: Self::merge_index_or_error(
919 l.float_inverted_index.as_ref(),
920 r.float_inverted_index.as_ref(),
921 &format!("key '{key}' float.float_inverted_index"),
922 )?,
923 })),
924 (Some(l), None) => Ok(Some(l.clone())),
925 (None, Some(r)) => Ok(Some(r.clone())),
926 (None, None) => Ok(None),
927 }
928 }
929
930 fn merge_int_override(
931 key: &str,
932 left: Option<&IntValueType>,
933 right: Option<&IntValueType>,
934 ) -> Result<Option<IntValueType>, SchemaError> {
935 match (left, right) {
936 (Some(l), Some(r)) => Ok(Some(IntValueType {
937 int_inverted_index: Self::merge_index_or_error(
938 l.int_inverted_index.as_ref(),
939 r.int_inverted_index.as_ref(),
940 &format!("key '{key}' int.int_inverted_index"),
941 )?,
942 })),
943 (Some(l), None) => Ok(Some(l.clone())),
944 (None, Some(r)) => Ok(Some(r.clone())),
945 (None, None) => Ok(None),
946 }
947 }
948
949 fn merge_bool_override(
950 key: &str,
951 left: Option<&BoolValueType>,
952 right: Option<&BoolValueType>,
953 ) -> Result<Option<BoolValueType>, SchemaError> {
954 match (left, right) {
955 (Some(l), Some(r)) => Ok(Some(BoolValueType {
956 bool_inverted_index: Self::merge_index_or_error(
957 l.bool_inverted_index.as_ref(),
958 r.bool_inverted_index.as_ref(),
959 &format!("key '{key}' bool.bool_inverted_index"),
960 )?,
961 })),
962 (Some(l), None) => Ok(Some(l.clone())),
963 (None, Some(r)) => Ok(Some(r.clone())),
964 (None, None) => Ok(None),
965 }
966 }
967
968 fn merge_float_list_override(
969 key: &str,
970 left: Option<&FloatListValueType>,
971 right: Option<&FloatListValueType>,
972 ) -> Result<Option<FloatListValueType>, SchemaError> {
973 match (left, right) {
974 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
975 vector_index: Self::merge_index_or_error(
976 l.vector_index.as_ref(),
977 r.vector_index.as_ref(),
978 &format!("key '{key}' float_list.vector_index"),
979 )?,
980 })),
981 (Some(l), None) => Ok(Some(l.clone())),
982 (None, Some(r)) => Ok(Some(r.clone())),
983 (None, None) => Ok(None),
984 }
985 }
986
987 fn merge_sparse_vector_override(
988 key: &str,
989 left: Option<&SparseVectorValueType>,
990 right: Option<&SparseVectorValueType>,
991 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
992 match (left, right) {
993 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
994 sparse_vector_index: Self::merge_index_or_error(
995 l.sparse_vector_index.as_ref(),
996 r.sparse_vector_index.as_ref(),
997 &format!("key '{key}' sparse_vector.sparse_vector_index"),
998 )?,
999 })),
1000 (Some(l), None) => Ok(Some(l.clone())),
1001 (None, Some(r)) => Ok(Some(r.clone())),
1002 (None, None) => Ok(None),
1003 }
1004 }
1005
1006 fn merge_index_or_error<T: Clone + PartialEq>(
1007 left: Option<&T>,
1008 right: Option<&T>,
1009 context: &str,
1010 ) -> Result<Option<T>, SchemaError> {
1011 match (left, right) {
1012 (Some(l), Some(r)) => {
1013 if l == r {
1014 Ok(Some(l.clone()))
1015 } else {
1016 Err(SchemaError::ConfigurationConflict {
1017 context: context.to_string(),
1018 })
1019 }
1020 }
1021 (Some(l), None) => Ok(Some(l.clone())),
1022 (None, Some(r)) => Ok(Some(r.clone())),
1023 (None, None) => Ok(None),
1024 }
1025 }
1026
1027 fn merge_value_types(
1030 default: &ValueTypes,
1031 user: &ValueTypes,
1032 knn_index: KnnIndex,
1033 ) -> Result<ValueTypes, SchemaError> {
1034 let float_list = Self::merge_float_list_type(
1036 default.float_list.as_ref(),
1037 user.float_list.as_ref(),
1038 knn_index,
1039 );
1040
1041 if let Some(ref fl) = float_list {
1043 Self::validate_float_list_value_type(fl)?;
1044 }
1045
1046 Ok(ValueTypes {
1047 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1048 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1049 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1050 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1051 float_list,
1052 sparse_vector: Self::merge_sparse_vector_type(
1053 default.sparse_vector.as_ref(),
1054 user.sparse_vector.as_ref(),
1055 )?,
1056 })
1057 }
1058
1059 fn merge_string_type(
1061 default: Option<&StringValueType>,
1062 user: Option<&StringValueType>,
1063 ) -> Result<Option<StringValueType>, SchemaError> {
1064 match (default, user) {
1065 (Some(default), Some(user)) => Ok(Some(StringValueType {
1066 string_inverted_index: Self::merge_string_inverted_index_type(
1067 default.string_inverted_index.as_ref(),
1068 user.string_inverted_index.as_ref(),
1069 )?,
1070 fts_index: Self::merge_fts_index_type(
1071 default.fts_index.as_ref(),
1072 user.fts_index.as_ref(),
1073 )?,
1074 })),
1075 (Some(default), None) => Ok(Some(default.clone())),
1076 (None, Some(user)) => Ok(Some(user.clone())),
1077 (None, None) => Ok(None),
1078 }
1079 }
1080
1081 fn merge_float_type(
1083 default: Option<&FloatValueType>,
1084 user: Option<&FloatValueType>,
1085 ) -> Result<Option<FloatValueType>, SchemaError> {
1086 match (default, user) {
1087 (Some(default), Some(user)) => Ok(Some(FloatValueType {
1088 float_inverted_index: Self::merge_float_inverted_index_type(
1089 default.float_inverted_index.as_ref(),
1090 user.float_inverted_index.as_ref(),
1091 )?,
1092 })),
1093 (Some(default), None) => Ok(Some(default.clone())),
1094 (None, Some(user)) => Ok(Some(user.clone())),
1095 (None, None) => Ok(None),
1096 }
1097 }
1098
1099 fn merge_int_type(
1101 default: Option<&IntValueType>,
1102 user: Option<&IntValueType>,
1103 ) -> Result<Option<IntValueType>, SchemaError> {
1104 match (default, user) {
1105 (Some(default), Some(user)) => Ok(Some(IntValueType {
1106 int_inverted_index: Self::merge_int_inverted_index_type(
1107 default.int_inverted_index.as_ref(),
1108 user.int_inverted_index.as_ref(),
1109 )?,
1110 })),
1111 (Some(default), None) => Ok(Some(default.clone())),
1112 (None, Some(user)) => Ok(Some(user.clone())),
1113 (None, None) => Ok(None),
1114 }
1115 }
1116
1117 fn merge_bool_type(
1119 default: Option<&BoolValueType>,
1120 user: Option<&BoolValueType>,
1121 ) -> Result<Option<BoolValueType>, SchemaError> {
1122 match (default, user) {
1123 (Some(default), Some(user)) => Ok(Some(BoolValueType {
1124 bool_inverted_index: Self::merge_bool_inverted_index_type(
1125 default.bool_inverted_index.as_ref(),
1126 user.bool_inverted_index.as_ref(),
1127 )?,
1128 })),
1129 (Some(default), None) => Ok(Some(default.clone())),
1130 (None, Some(user)) => Ok(Some(user.clone())),
1131 (None, None) => Ok(None),
1132 }
1133 }
1134
1135 fn merge_float_list_type(
1137 default: Option<&FloatListValueType>,
1138 user: Option<&FloatListValueType>,
1139 knn_index: KnnIndex,
1140 ) -> Option<FloatListValueType> {
1141 match (default, user) {
1142 (Some(default), Some(user)) => Some(FloatListValueType {
1143 vector_index: Self::merge_vector_index_type(
1144 default.vector_index.as_ref(),
1145 user.vector_index.as_ref(),
1146 knn_index,
1147 ),
1148 }),
1149 (Some(default), None) => Some(default.clone()),
1150 (None, Some(user)) => Some(user.clone()),
1151 (None, None) => None,
1152 }
1153 }
1154
1155 fn merge_sparse_vector_type(
1157 default: Option<&SparseVectorValueType>,
1158 user: Option<&SparseVectorValueType>,
1159 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1160 match (default, user) {
1161 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1162 sparse_vector_index: Self::merge_sparse_vector_index_type(
1163 default.sparse_vector_index.as_ref(),
1164 user.sparse_vector_index.as_ref(),
1165 )?,
1166 })),
1167 (Some(default), None) => Ok(Some(default.clone())),
1168 (None, Some(user)) => Ok(Some(user.clone())),
1169 (None, None) => Ok(None),
1170 }
1171 }
1172
1173 fn merge_string_inverted_index_type(
1175 default: Option<&StringInvertedIndexType>,
1176 user: Option<&StringInvertedIndexType>,
1177 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1178 match (default, user) {
1179 (Some(_default), Some(user)) => {
1180 Ok(Some(StringInvertedIndexType {
1181 enabled: user.enabled, config: user.config.clone(), }))
1184 }
1185 (Some(default), None) => Ok(Some(default.clone())),
1186 (None, Some(user)) => Ok(Some(user.clone())),
1187 (None, None) => Ok(None),
1188 }
1189 }
1190
1191 fn merge_fts_index_type(
1192 default: Option<&FtsIndexType>,
1193 user: Option<&FtsIndexType>,
1194 ) -> Result<Option<FtsIndexType>, SchemaError> {
1195 match (default, user) {
1196 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1197 enabled: user.enabled,
1198 config: user.config.clone(),
1199 })),
1200 (Some(default), None) => Ok(Some(default.clone())),
1201 (None, Some(user)) => Ok(Some(user.clone())),
1202 (None, None) => Ok(None),
1203 }
1204 }
1205
1206 fn merge_float_inverted_index_type(
1207 default: Option<&FloatInvertedIndexType>,
1208 user: Option<&FloatInvertedIndexType>,
1209 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1210 match (default, user) {
1211 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1212 enabled: user.enabled,
1213 config: user.config.clone(),
1214 })),
1215 (Some(default), None) => Ok(Some(default.clone())),
1216 (None, Some(user)) => Ok(Some(user.clone())),
1217 (None, None) => Ok(None),
1218 }
1219 }
1220
1221 fn merge_int_inverted_index_type(
1222 default: Option<&IntInvertedIndexType>,
1223 user: Option<&IntInvertedIndexType>,
1224 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1225 match (default, user) {
1226 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1227 enabled: user.enabled,
1228 config: user.config.clone(),
1229 })),
1230 (Some(default), None) => Ok(Some(default.clone())),
1231 (None, Some(user)) => Ok(Some(user.clone())),
1232 (None, None) => Ok(None),
1233 }
1234 }
1235
1236 fn merge_bool_inverted_index_type(
1237 default: Option<&BoolInvertedIndexType>,
1238 user: Option<&BoolInvertedIndexType>,
1239 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1240 match (default, user) {
1241 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1242 enabled: user.enabled,
1243 config: user.config.clone(),
1244 })),
1245 (Some(default), None) => Ok(Some(default.clone())),
1246 (None, Some(user)) => Ok(Some(user.clone())),
1247 (None, None) => Ok(None),
1248 }
1249 }
1250
1251 fn merge_vector_index_type(
1252 default: Option<&VectorIndexType>,
1253 user: Option<&VectorIndexType>,
1254 knn_index: KnnIndex,
1255 ) -> Option<VectorIndexType> {
1256 match (default, user) {
1257 (Some(default), Some(user)) => Some(VectorIndexType {
1258 enabled: user.enabled,
1259 config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1260 }),
1261 (Some(default), None) => Some(default.clone()),
1262 (None, Some(user)) => Some(user.clone()),
1263 (None, None) => None,
1264 }
1265 }
1266
1267 fn merge_sparse_vector_index_type(
1268 default: Option<&SparseVectorIndexType>,
1269 user: Option<&SparseVectorIndexType>,
1270 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1271 match (default, user) {
1272 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1273 enabled: user.enabled,
1274 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1275 })),
1276 (Some(default), None) => Ok(Some(default.clone())),
1277 (None, Some(user)) => Ok(Some(user.clone())),
1278 (None, None) => Ok(None),
1279 }
1280 }
1281
1282 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1285 if let Some(vector_index) = &float_list.vector_index {
1286 if let Some(hnsw) = &vector_index.config.hnsw {
1287 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1288 }
1289 if let Some(spann) = &vector_index.config.spann {
1290 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1291 }
1292 }
1293 Ok(())
1294 }
1295
1296 fn merge_vector_index_config(
1298 default: &VectorIndexConfig,
1299 user: &VectorIndexConfig,
1300 knn_index: KnnIndex,
1301 ) -> VectorIndexConfig {
1302 match knn_index {
1303 KnnIndex::Hnsw => VectorIndexConfig {
1304 space: user.space.clone().or(default.space.clone()),
1305 embedding_function: user
1306 .embedding_function
1307 .clone()
1308 .or(default.embedding_function.clone()),
1309 source_key: user.source_key.clone().or(default.source_key.clone()),
1310 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1311 spann: None,
1312 },
1313 KnnIndex::Spann => VectorIndexConfig {
1314 space: user.space.clone().or(default.space.clone()),
1315 embedding_function: user
1316 .embedding_function
1317 .clone()
1318 .or(default.embedding_function.clone()),
1319 source_key: user.source_key.clone().or(default.source_key.clone()),
1320 hnsw: None,
1321 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1322 },
1323 }
1324 }
1325
1326 fn merge_sparse_vector_index_config(
1328 default: &SparseVectorIndexConfig,
1329 user: &SparseVectorIndexConfig,
1330 ) -> SparseVectorIndexConfig {
1331 SparseVectorIndexConfig {
1332 embedding_function: user
1333 .embedding_function
1334 .clone()
1335 .or(default.embedding_function.clone()),
1336 source_key: user.source_key.clone().or(default.source_key.clone()),
1337 bm25: user.bm25.or(default.bm25),
1338 }
1339 }
1340
1341 fn merge_hnsw_configs(
1343 default_hnsw: Option<&HnswIndexConfig>,
1344 user_hnsw: Option<&HnswIndexConfig>,
1345 ) -> Option<HnswIndexConfig> {
1346 match (default_hnsw, user_hnsw) {
1347 (Some(default), Some(user)) => Some(HnswIndexConfig {
1348 ef_construction: user.ef_construction.or(default.ef_construction),
1349 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1350 ef_search: user.ef_search.or(default.ef_search),
1351 num_threads: user.num_threads.or(default.num_threads),
1352 batch_size: user.batch_size.or(default.batch_size),
1353 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1354 resize_factor: user.resize_factor.or(default.resize_factor),
1355 }),
1356 (Some(default), None) => Some(default.clone()),
1357 (None, Some(user)) => Some(user.clone()),
1358 (None, None) => None,
1359 }
1360 }
1361
1362 fn merge_spann_configs(
1364 default_spann: Option<&SpannIndexConfig>,
1365 user_spann: Option<&SpannIndexConfig>,
1366 ) -> Option<SpannIndexConfig> {
1367 match (default_spann, user_spann) {
1368 (Some(default), Some(user)) => Some(SpannIndexConfig {
1369 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1370 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1371 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1372 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1373 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1374 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1375 split_threshold: user.split_threshold.or(default.split_threshold),
1376 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1377 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1378 reassign_neighbor_count: user
1379 .reassign_neighbor_count
1380 .or(default.reassign_neighbor_count),
1381 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1382 num_centers_to_merge_to: user
1383 .num_centers_to_merge_to
1384 .or(default.num_centers_to_merge_to),
1385 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1386 ef_construction: user.ef_construction.or(default.ef_construction),
1387 ef_search: user.ef_search.or(default.ef_search),
1388 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1389 }),
1390 (Some(default), None) => Some(default.clone()),
1391 (None, Some(user)) => Some(user.clone()),
1392 (None, None) => None,
1393 }
1394 }
1395
1396 pub fn reconcile_with_collection_config(
1404 schema: &Schema,
1405 collection_config: &InternalCollectionConfiguration,
1406 ) -> Result<Schema, SchemaError> {
1407 if collection_config.is_default() {
1409 if schema.is_default() {
1410 let new_schema = Self::convert_collection_config_to_schema(collection_config)?;
1415 return Ok(new_schema);
1416 } else {
1417 return Ok(schema.clone());
1419 }
1420 }
1421
1422 Self::convert_collection_config_to_schema(collection_config)
1425 }
1426
1427 pub fn reconcile_schema_and_config(
1428 schema: Option<&Schema>,
1429 configuration: Option<&InternalCollectionConfiguration>,
1430 knn_index: KnnIndex,
1431 ) -> Result<Schema, SchemaError> {
1432 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1434 if !user_schema.is_default() && !config.is_default() {
1435 return Err(SchemaError::ConfigAndSchemaConflict);
1436 }
1437 }
1438
1439 let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1440 if let Some(config) = configuration {
1441 Self::reconcile_with_collection_config(&reconciled_schema, config)
1442 } else {
1443 Ok(reconciled_schema)
1444 }
1445 }
1446
1447 pub fn default_with_embedding_function(
1448 embedding_function: EmbeddingFunctionConfiguration,
1449 ) -> Schema {
1450 let mut schema = Schema::new_default(KnnIndex::Spann);
1451 if let Some(float_list) = &mut schema.defaults.float_list {
1452 if let Some(vector_index) = &mut float_list.vector_index {
1453 vector_index.config.embedding_function = Some(embedding_function.clone());
1454 }
1455 }
1456 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1457 if let Some(float_list) = &mut embedding_types.float_list {
1458 if let Some(vector_index) = &mut float_list.vector_index {
1459 vector_index.config.embedding_function = Some(embedding_function);
1460 }
1461 }
1462 }
1463 schema
1464 }
1465
1466 pub fn is_default(&self) -> bool {
1468 if !Self::is_value_types_default(&self.defaults) {
1470 return false;
1471 }
1472
1473 for key in self.keys.keys() {
1474 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1475 return false;
1476 }
1477 }
1478
1479 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1481 if !Self::is_embedding_value_types_default(embedding_value) {
1482 return false;
1483 }
1484 }
1485
1486 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1488 if !Self::is_document_value_types_default(document_value) {
1489 return false;
1490 }
1491 }
1492
1493 true
1494 }
1495
1496 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1498 if let Some(string) = &value_types.string {
1500 if let Some(string_inverted) = &string.string_inverted_index {
1501 if !string_inverted.enabled {
1502 return false;
1503 }
1504 }
1506 if let Some(fts) = &string.fts_index {
1507 if fts.enabled {
1508 return false;
1509 }
1510 }
1512 }
1513
1514 if let Some(float) = &value_types.float {
1516 if let Some(float_inverted) = &float.float_inverted_index {
1517 if !float_inverted.enabled {
1518 return false;
1519 }
1520 }
1522 }
1523
1524 if let Some(int) = &value_types.int {
1526 if let Some(int_inverted) = &int.int_inverted_index {
1527 if !int_inverted.enabled {
1528 return false;
1529 }
1530 }
1532 }
1533
1534 if let Some(boolean) = &value_types.boolean {
1536 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1537 if !bool_inverted.enabled {
1538 return false;
1539 }
1540 }
1542 }
1543
1544 if let Some(float_list) = &value_types.float_list {
1546 if let Some(vector_index) = &float_list.vector_index {
1547 if vector_index.enabled {
1548 return false;
1549 }
1550 if vector_index.config.source_key.is_some() {
1553 return false;
1554 }
1555 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1558 (Some(hnsw_config), None) => {
1559 if !hnsw_config.is_default() {
1560 return false;
1561 }
1562 }
1563 (None, Some(spann_config)) => {
1564 if !spann_config.is_default() {
1565 return false;
1566 }
1567 }
1568 (Some(_), Some(_)) => return false, (None, None) => {}
1570 }
1571 }
1572 }
1573
1574 if let Some(sparse_vector) = &value_types.sparse_vector {
1576 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1577 if sparse_index.enabled {
1578 return false;
1579 }
1580 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1582 return false;
1583 }
1584 if sparse_index.config.source_key.is_some() {
1585 return false;
1586 }
1587 if let Some(bm25) = &sparse_index.config.bm25 {
1588 if bm25 != &false {
1589 return false;
1590 }
1591 }
1592 }
1593 }
1594
1595 true
1596 }
1597
1598 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1600 if value_types.string.is_some()
1602 || value_types.float.is_some()
1603 || value_types.int.is_some()
1604 || value_types.boolean.is_some()
1605 || value_types.sparse_vector.is_some()
1606 {
1607 return false;
1608 }
1609
1610 if let Some(float_list) = &value_types.float_list {
1612 if let Some(vector_index) = &float_list.vector_index {
1613 if !vector_index.enabled {
1614 return false;
1615 }
1616 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1618 return false;
1619 }
1620 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1622 return false;
1623 }
1624 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1627 (Some(hnsw_config), None) => {
1628 if !hnsw_config.is_default() {
1629 return false;
1630 }
1631 }
1632 (None, Some(spann_config)) => {
1633 if !spann_config.is_default() {
1634 return false;
1635 }
1636 }
1637 (Some(_), Some(_)) => return false, (None, None) => {}
1639 }
1640 }
1641 }
1642
1643 true
1644 }
1645
1646 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1648 if value_types.float_list.is_some()
1650 || value_types.float.is_some()
1651 || value_types.int.is_some()
1652 || value_types.boolean.is_some()
1653 || value_types.sparse_vector.is_some()
1654 {
1655 return false;
1656 }
1657
1658 if let Some(string) = &value_types.string {
1660 if let Some(fts) = &string.fts_index {
1661 if !fts.enabled {
1662 return false;
1663 }
1664 }
1666 if let Some(string_inverted) = &string.string_inverted_index {
1667 if string_inverted.enabled {
1668 return false;
1669 }
1670 }
1672 }
1673
1674 true
1675 }
1676
1677 fn convert_collection_config_to_schema(
1679 collection_config: &InternalCollectionConfiguration,
1680 ) -> Result<Schema, SchemaError> {
1681 let mut schema = Schema::new_default(KnnIndex::Spann); let vector_config = match &collection_config.vector_index {
1686 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1687 space: Some(hnsw_config.space.clone()),
1688 embedding_function: collection_config.embedding_function.clone(),
1689 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: Some(HnswIndexConfig {
1691 ef_construction: Some(hnsw_config.ef_construction),
1692 max_neighbors: Some(hnsw_config.max_neighbors),
1693 ef_search: Some(hnsw_config.ef_search),
1694 num_threads: Some(hnsw_config.num_threads),
1695 batch_size: Some(hnsw_config.batch_size),
1696 sync_threshold: Some(hnsw_config.sync_threshold),
1697 resize_factor: Some(hnsw_config.resize_factor),
1698 }),
1699 spann: None,
1700 },
1701 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1702 space: Some(spann_config.space.clone()),
1703 embedding_function: collection_config.embedding_function.clone(),
1704 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: None,
1706 spann: Some(SpannIndexConfig {
1707 search_nprobe: Some(spann_config.search_nprobe),
1708 search_rng_factor: Some(spann_config.search_rng_factor),
1709 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1710 nreplica_count: Some(spann_config.nreplica_count),
1711 write_rng_factor: Some(spann_config.write_rng_factor),
1712 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1713 split_threshold: Some(spann_config.split_threshold),
1714 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1715 initial_lambda: Some(spann_config.initial_lambda),
1716 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1717 merge_threshold: Some(spann_config.merge_threshold),
1718 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1719 write_nprobe: Some(spann_config.write_nprobe),
1720 ef_construction: Some(spann_config.ef_construction),
1721 ef_search: Some(spann_config.ef_search),
1722 max_neighbors: Some(spann_config.max_neighbors),
1723 }),
1724 },
1725 };
1726
1727 if let Some(float_list) = &mut schema.defaults.float_list {
1730 if let Some(vector_index) = &mut float_list.vector_index {
1731 vector_index.config = vector_config.clone();
1732 }
1733 }
1734
1735 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1738 if let Some(float_list) = &mut embedding_types.float_list {
1739 if let Some(vector_index) = &mut float_list.vector_index {
1740 vector_index.config = vector_config;
1741 }
1742 }
1743 }
1744
1745 Ok(schema)
1746 }
1747
1748 pub fn is_metadata_type_index_enabled(
1750 &self,
1751 key: &str,
1752 value_type: MetadataValueType,
1753 ) -> Result<bool, SchemaError> {
1754 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1755
1756 match value_type {
1757 MetadataValueType::Bool => match &v_type.boolean {
1758 Some(bool_type) => match &bool_type.bool_inverted_index {
1759 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1760 None => Err(SchemaError::MissingIndexConfiguration {
1761 key: key.to_string(),
1762 value_type: "bool".to_string(),
1763 }),
1764 },
1765 None => match &self.defaults.boolean {
1766 Some(bool_type) => match &bool_type.bool_inverted_index {
1767 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1768 None => Err(SchemaError::MissingIndexConfiguration {
1769 key: key.to_string(),
1770 value_type: "bool".to_string(),
1771 }),
1772 },
1773 None => Err(SchemaError::MissingIndexConfiguration {
1774 key: key.to_string(),
1775 value_type: "bool".to_string(),
1776 }),
1777 },
1778 },
1779 MetadataValueType::Int => match &v_type.int {
1780 Some(int_type) => match &int_type.int_inverted_index {
1781 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1782 None => Err(SchemaError::MissingIndexConfiguration {
1783 key: key.to_string(),
1784 value_type: "int".to_string(),
1785 }),
1786 },
1787 None => match &self.defaults.int {
1788 Some(int_type) => match &int_type.int_inverted_index {
1789 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1790 None => Err(SchemaError::MissingIndexConfiguration {
1791 key: key.to_string(),
1792 value_type: "int".to_string(),
1793 }),
1794 },
1795 None => Err(SchemaError::MissingIndexConfiguration {
1796 key: key.to_string(),
1797 value_type: "int".to_string(),
1798 }),
1799 },
1800 },
1801 MetadataValueType::Float => match &v_type.float {
1802 Some(float_type) => match &float_type.float_inverted_index {
1803 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1804 None => Err(SchemaError::MissingIndexConfiguration {
1805 key: key.to_string(),
1806 value_type: "float".to_string(),
1807 }),
1808 },
1809 None => match &self.defaults.float {
1810 Some(float_type) => match &float_type.float_inverted_index {
1811 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1812 None => Err(SchemaError::MissingIndexConfiguration {
1813 key: key.to_string(),
1814 value_type: "float".to_string(),
1815 }),
1816 },
1817 None => Err(SchemaError::MissingIndexConfiguration {
1818 key: key.to_string(),
1819 value_type: "float".to_string(),
1820 }),
1821 },
1822 },
1823 MetadataValueType::Str => match &v_type.string {
1824 Some(string_type) => match &string_type.string_inverted_index {
1825 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1826 None => Err(SchemaError::MissingIndexConfiguration {
1827 key: key.to_string(),
1828 value_type: "string".to_string(),
1829 }),
1830 },
1831 None => match &self.defaults.string {
1832 Some(string_type) => match &string_type.string_inverted_index {
1833 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1834 None => Err(SchemaError::MissingIndexConfiguration {
1835 key: key.to_string(),
1836 value_type: "string".to_string(),
1837 }),
1838 },
1839 None => Err(SchemaError::MissingIndexConfiguration {
1840 key: key.to_string(),
1841 value_type: "string".to_string(),
1842 }),
1843 },
1844 },
1845 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1846 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1847 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1848 None => Err(SchemaError::MissingIndexConfiguration {
1849 key: key.to_string(),
1850 value_type: "sparse_vector".to_string(),
1851 }),
1852 },
1853 None => match &self.defaults.sparse_vector {
1854 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1855 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1856 None => Err(SchemaError::MissingIndexConfiguration {
1857 key: key.to_string(),
1858 value_type: "sparse_vector".to_string(),
1859 }),
1860 },
1861 None => Err(SchemaError::MissingIndexConfiguration {
1862 key: key.to_string(),
1863 value_type: "sparse_vector".to_string(),
1864 }),
1865 },
1866 },
1867 }
1868 }
1869
1870 pub fn is_metadata_where_indexing_enabled(
1871 &self,
1872 where_clause: &Where,
1873 ) -> Result<(), FilterValidationError> {
1874 match where_clause {
1875 Where::Composite(composite) => {
1876 for child in &composite.children {
1877 self.is_metadata_where_indexing_enabled(child)?;
1878 }
1879 Ok(())
1880 }
1881 Where::Document(_) => Ok(()),
1882 Where::Metadata(expression) => {
1883 let value_type = match &expression.comparison {
1884 MetadataComparison::Primitive(_, value) => value.value_type(),
1885 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1886 };
1887 let is_enabled = self
1888 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1889 .map_err(FilterValidationError::Schema)?;
1890 if !is_enabled {
1891 return Err(FilterValidationError::IndexingDisabled {
1892 key: expression.key.clone(),
1893 value_type,
1894 });
1895 }
1896 Ok(())
1897 }
1898 }
1899 }
1900
1901 pub fn is_knn_key_indexing_enabled(
1902 &self,
1903 key: &str,
1904 query: &QueryVector,
1905 ) -> Result<(), FilterValidationError> {
1906 match query {
1907 QueryVector::Sparse(_) => {
1908 let is_enabled = self
1909 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1910 .map_err(FilterValidationError::Schema)?;
1911 if !is_enabled {
1912 return Err(FilterValidationError::IndexingDisabled {
1913 key: key.to_string(),
1914 value_type: MetadataValueType::SparseVector,
1915 });
1916 }
1917 Ok(())
1918 }
1919 QueryVector::Dense(_) => {
1920 Ok(())
1923 }
1924 }
1925 }
1926
1927 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1928 if key.starts_with(CHROMA_KEY) {
1929 return false;
1930 }
1931 let value_types = self.keys.entry(key.to_string()).or_default();
1932 match value_type {
1933 MetadataValueType::Bool => {
1934 if value_types.boolean.is_none() {
1935 value_types.boolean = self.defaults.boolean.clone();
1936 return true;
1937 }
1938 }
1939 MetadataValueType::Int => {
1940 if value_types.int.is_none() {
1941 value_types.int = self.defaults.int.clone();
1942 return true;
1943 }
1944 }
1945 MetadataValueType::Float => {
1946 if value_types.float.is_none() {
1947 value_types.float = self.defaults.float.clone();
1948 return true;
1949 }
1950 }
1951 MetadataValueType::Str => {
1952 if value_types.string.is_none() {
1953 value_types.string = self.defaults.string.clone();
1954 return true;
1955 }
1956 }
1957 MetadataValueType::SparseVector => {
1958 if value_types.sparse_vector.is_none() {
1959 value_types.sparse_vector = self.defaults.sparse_vector.clone();
1960 return true;
1961 }
1962 }
1963 }
1964 false
1965 }
1966
1967 pub fn create_index(
2007 mut self,
2008 key: Option<&str>,
2009 config: IndexConfig,
2010 ) -> Result<Self, SchemaBuilderError> {
2011 match (&key, &config) {
2013 (None, IndexConfig::Vector(cfg)) => {
2014 self._set_vector_index_config_builder(cfg.clone());
2015 return Ok(self);
2016 }
2017 (None, IndexConfig::Fts(cfg)) => {
2018 self._set_fts_index_config_builder(cfg.clone());
2019 return Ok(self);
2020 }
2021 (Some(k), IndexConfig::Vector(_)) => {
2022 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2023 }
2024 (Some(k), IndexConfig::Fts(_)) => {
2025 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2026 }
2027 _ => {}
2028 }
2029
2030 if let Some(k) = key {
2032 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2033 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2034 key: k.to_string(),
2035 });
2036 }
2037 }
2038
2039 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2041 return Err(SchemaBuilderError::SparseVectorRequiresKey);
2042 }
2043
2044 match key {
2046 Some(k) => self._set_index_for_key_builder(k, config, true)?,
2047 None => self._set_index_in_defaults_builder(config, true)?,
2048 }
2049
2050 Ok(self)
2051 }
2052
2053 pub fn delete_index(
2081 mut self,
2082 key: Option<&str>,
2083 config: IndexConfig,
2084 ) -> Result<Self, SchemaBuilderError> {
2085 if let Some(k) = key {
2087 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2088 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2089 key: k.to_string(),
2090 });
2091 }
2092 }
2093
2094 match &config {
2096 IndexConfig::Vector(_) => {
2097 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2098 }
2099 IndexConfig::Fts(_) => {
2100 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2101 }
2102 IndexConfig::SparseVector(_) => {
2103 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2104 }
2105 _ => {}
2106 }
2107
2108 match key {
2110 Some(k) => self._set_index_for_key_builder(k, config, false)?,
2111 None => self._set_index_in_defaults_builder(config, false)?,
2112 }
2113
2114 Ok(self)
2115 }
2116
2117 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2119 if let Some(float_list) = &mut self.defaults.float_list {
2121 if let Some(vector_index) = &mut float_list.vector_index {
2122 vector_index.config = config.clone();
2123 }
2124 }
2125
2126 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2128 if let Some(float_list) = &mut embedding_types.float_list {
2129 if let Some(vector_index) = &mut float_list.vector_index {
2130 let mut updated_config = config;
2131 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2133 vector_index.config = updated_config;
2134 }
2135 }
2136 }
2137 }
2138
2139 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2141 if let Some(string) = &mut self.defaults.string {
2143 if let Some(fts_index) = &mut string.fts_index {
2144 fts_index.config = config.clone();
2145 }
2146 }
2147
2148 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2150 if let Some(string) = &mut document_types.string {
2151 if let Some(fts_index) = &mut string.fts_index {
2152 fts_index.config = config;
2153 }
2154 }
2155 }
2156 }
2157
2158 fn _set_index_for_key_builder(
2160 &mut self,
2161 key: &str,
2162 config: IndexConfig,
2163 enabled: bool,
2164 ) -> Result<(), SchemaBuilderError> {
2165 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2167 let existing_key = self
2169 .keys
2170 .iter()
2171 .find(|(k, v)| {
2172 k.as_str() != key
2173 && v.sparse_vector
2174 .as_ref()
2175 .and_then(|sv| sv.sparse_vector_index.as_ref())
2176 .map(|idx| idx.enabled)
2177 .unwrap_or(false)
2178 })
2179 .map(|(k, _)| k.clone());
2180
2181 if let Some(existing_key) = existing_key {
2182 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2183 }
2184 }
2185
2186 let value_types = self.keys.entry(key.to_string()).or_default();
2188
2189 match config {
2191 IndexConfig::Vector(_) => {
2192 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2193 key: key.to_string(),
2194 });
2195 }
2196 IndexConfig::Fts(_) => {
2197 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2198 key: key.to_string(),
2199 });
2200 }
2201 IndexConfig::SparseVector(cfg) => {
2202 value_types.sparse_vector = Some(SparseVectorValueType {
2203 sparse_vector_index: Some(SparseVectorIndexType {
2204 enabled,
2205 config: cfg,
2206 }),
2207 });
2208 }
2209 IndexConfig::StringInverted(cfg) => {
2210 if value_types.string.is_none() {
2211 value_types.string = Some(StringValueType {
2212 fts_index: None,
2213 string_inverted_index: None,
2214 });
2215 }
2216 if let Some(string) = &mut value_types.string {
2217 string.string_inverted_index = Some(StringInvertedIndexType {
2218 enabled,
2219 config: cfg,
2220 });
2221 }
2222 }
2223 IndexConfig::IntInverted(cfg) => {
2224 value_types.int = Some(IntValueType {
2225 int_inverted_index: Some(IntInvertedIndexType {
2226 enabled,
2227 config: cfg,
2228 }),
2229 });
2230 }
2231 IndexConfig::FloatInverted(cfg) => {
2232 value_types.float = Some(FloatValueType {
2233 float_inverted_index: Some(FloatInvertedIndexType {
2234 enabled,
2235 config: cfg,
2236 }),
2237 });
2238 }
2239 IndexConfig::BoolInverted(cfg) => {
2240 value_types.boolean = Some(BoolValueType {
2241 bool_inverted_index: Some(BoolInvertedIndexType {
2242 enabled,
2243 config: cfg,
2244 }),
2245 });
2246 }
2247 }
2248
2249 Ok(())
2250 }
2251
2252 fn _set_index_in_defaults_builder(
2254 &mut self,
2255 config: IndexConfig,
2256 enabled: bool,
2257 ) -> Result<(), SchemaBuilderError> {
2258 match config {
2259 IndexConfig::Vector(_) => {
2260 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2261 key: "defaults".to_string(),
2262 });
2263 }
2264 IndexConfig::Fts(_) => {
2265 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2266 key: "defaults".to_string(),
2267 });
2268 }
2269 IndexConfig::SparseVector(cfg) => {
2270 self.defaults.sparse_vector = Some(SparseVectorValueType {
2271 sparse_vector_index: Some(SparseVectorIndexType {
2272 enabled,
2273 config: cfg,
2274 }),
2275 });
2276 }
2277 IndexConfig::StringInverted(cfg) => {
2278 if self.defaults.string.is_none() {
2279 self.defaults.string = Some(StringValueType {
2280 fts_index: None,
2281 string_inverted_index: None,
2282 });
2283 }
2284 if let Some(string) = &mut self.defaults.string {
2285 string.string_inverted_index = Some(StringInvertedIndexType {
2286 enabled,
2287 config: cfg,
2288 });
2289 }
2290 }
2291 IndexConfig::IntInverted(cfg) => {
2292 self.defaults.int = Some(IntValueType {
2293 int_inverted_index: Some(IntInvertedIndexType {
2294 enabled,
2295 config: cfg,
2296 }),
2297 });
2298 }
2299 IndexConfig::FloatInverted(cfg) => {
2300 self.defaults.float = Some(FloatValueType {
2301 float_inverted_index: Some(FloatInvertedIndexType {
2302 enabled,
2303 config: cfg,
2304 }),
2305 });
2306 }
2307 IndexConfig::BoolInverted(cfg) => {
2308 self.defaults.boolean = Some(BoolValueType {
2309 bool_inverted_index: Some(BoolInvertedIndexType {
2310 enabled,
2311 config: cfg,
2312 }),
2313 });
2314 }
2315 }
2316
2317 Ok(())
2318 }
2319}
2320
2321#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2326#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2327#[serde(deny_unknown_fields)]
2328pub struct VectorIndexConfig {
2329 #[serde(skip_serializing_if = "Option::is_none")]
2331 pub space: Option<Space>,
2332 #[serde(skip_serializing_if = "Option::is_none")]
2334 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2335 #[serde(skip_serializing_if = "Option::is_none")]
2337 pub source_key: Option<String>,
2338 #[serde(skip_serializing_if = "Option::is_none")]
2340 pub hnsw: Option<HnswIndexConfig>,
2341 #[serde(skip_serializing_if = "Option::is_none")]
2343 pub spann: Option<SpannIndexConfig>,
2344}
2345
2346#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2348#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2349#[serde(deny_unknown_fields)]
2350pub struct HnswIndexConfig {
2351 #[serde(skip_serializing_if = "Option::is_none")]
2352 pub ef_construction: Option<usize>,
2353 #[serde(skip_serializing_if = "Option::is_none")]
2354 pub max_neighbors: Option<usize>,
2355 #[serde(skip_serializing_if = "Option::is_none")]
2356 pub ef_search: Option<usize>,
2357 #[serde(skip_serializing_if = "Option::is_none")]
2358 pub num_threads: Option<usize>,
2359 #[serde(skip_serializing_if = "Option::is_none")]
2360 #[validate(range(min = 2))]
2361 pub batch_size: Option<usize>,
2362 #[serde(skip_serializing_if = "Option::is_none")]
2363 #[validate(range(min = 2))]
2364 pub sync_threshold: Option<usize>,
2365 #[serde(skip_serializing_if = "Option::is_none")]
2366 pub resize_factor: Option<f64>,
2367}
2368
2369impl HnswIndexConfig {
2370 pub fn is_default(&self) -> bool {
2374 if let Some(ef_construction) = self.ef_construction {
2375 if ef_construction != default_construction_ef() {
2376 return false;
2377 }
2378 }
2379 if let Some(max_neighbors) = self.max_neighbors {
2380 if max_neighbors != default_m() {
2381 return false;
2382 }
2383 }
2384 if let Some(ef_search) = self.ef_search {
2385 if ef_search != default_search_ef() {
2386 return false;
2387 }
2388 }
2389 if let Some(batch_size) = self.batch_size {
2390 if batch_size != default_batch_size() {
2391 return false;
2392 }
2393 }
2394 if let Some(sync_threshold) = self.sync_threshold {
2395 if sync_threshold != default_sync_threshold() {
2396 return false;
2397 }
2398 }
2399 if let Some(resize_factor) = self.resize_factor {
2400 if resize_factor != default_resize_factor() {
2401 return false;
2402 }
2403 }
2404 true
2406 }
2407}
2408
2409#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2411#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2412#[serde(deny_unknown_fields)]
2413pub struct SpannIndexConfig {
2414 #[serde(skip_serializing_if = "Option::is_none")]
2415 #[validate(range(max = 128))]
2416 pub search_nprobe: Option<u32>,
2417 #[serde(skip_serializing_if = "Option::is_none")]
2418 #[validate(range(min = 1.0, max = 1.0))]
2419 pub search_rng_factor: Option<f32>,
2420 #[serde(skip_serializing_if = "Option::is_none")]
2421 #[validate(range(min = 5.0, max = 10.0))]
2422 pub search_rng_epsilon: Option<f32>,
2423 #[serde(skip_serializing_if = "Option::is_none")]
2424 #[validate(range(max = 8))]
2425 pub nreplica_count: Option<u32>,
2426 #[serde(skip_serializing_if = "Option::is_none")]
2427 #[validate(range(min = 1.0, max = 1.0))]
2428 pub write_rng_factor: Option<f32>,
2429 #[serde(skip_serializing_if = "Option::is_none")]
2430 #[validate(range(min = 5.0, max = 10.0))]
2431 pub write_rng_epsilon: Option<f32>,
2432 #[serde(skip_serializing_if = "Option::is_none")]
2433 #[validate(range(min = 50, max = 200))]
2434 pub split_threshold: Option<u32>,
2435 #[serde(skip_serializing_if = "Option::is_none")]
2436 #[validate(range(max = 1000))]
2437 pub num_samples_kmeans: Option<usize>,
2438 #[serde(skip_serializing_if = "Option::is_none")]
2439 #[validate(range(min = 100.0, max = 100.0))]
2440 pub initial_lambda: Option<f32>,
2441 #[serde(skip_serializing_if = "Option::is_none")]
2442 #[validate(range(max = 64))]
2443 pub reassign_neighbor_count: Option<u32>,
2444 #[serde(skip_serializing_if = "Option::is_none")]
2445 #[validate(range(min = 25, max = 100))]
2446 pub merge_threshold: Option<u32>,
2447 #[serde(skip_serializing_if = "Option::is_none")]
2448 #[validate(range(max = 8))]
2449 pub num_centers_to_merge_to: Option<u32>,
2450 #[serde(skip_serializing_if = "Option::is_none")]
2451 #[validate(range(max = 64))]
2452 pub write_nprobe: Option<u32>,
2453 #[serde(skip_serializing_if = "Option::is_none")]
2454 #[validate(range(max = 200))]
2455 pub ef_construction: Option<usize>,
2456 #[serde(skip_serializing_if = "Option::is_none")]
2457 #[validate(range(max = 200))]
2458 pub ef_search: Option<usize>,
2459 #[serde(skip_serializing_if = "Option::is_none")]
2460 #[validate(range(max = 64))]
2461 pub max_neighbors: Option<usize>,
2462}
2463
2464impl SpannIndexConfig {
2465 pub fn is_default(&self) -> bool {
2468 if let Some(search_nprobe) = self.search_nprobe {
2469 if search_nprobe != default_search_nprobe() {
2470 return false;
2471 }
2472 }
2473 if let Some(search_rng_factor) = self.search_rng_factor {
2474 if search_rng_factor != default_search_rng_factor() {
2475 return false;
2476 }
2477 }
2478 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2479 if search_rng_epsilon != default_search_rng_epsilon() {
2480 return false;
2481 }
2482 }
2483 if let Some(nreplica_count) = self.nreplica_count {
2484 if nreplica_count != default_nreplica_count() {
2485 return false;
2486 }
2487 }
2488 if let Some(write_rng_factor) = self.write_rng_factor {
2489 if write_rng_factor != default_write_rng_factor() {
2490 return false;
2491 }
2492 }
2493 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2494 if write_rng_epsilon != default_write_rng_epsilon() {
2495 return false;
2496 }
2497 }
2498 if let Some(split_threshold) = self.split_threshold {
2499 if split_threshold != default_split_threshold() {
2500 return false;
2501 }
2502 }
2503 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2504 if num_samples_kmeans != default_num_samples_kmeans() {
2505 return false;
2506 }
2507 }
2508 if let Some(initial_lambda) = self.initial_lambda {
2509 if initial_lambda != default_initial_lambda() {
2510 return false;
2511 }
2512 }
2513 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2514 if reassign_neighbor_count != default_reassign_neighbor_count() {
2515 return false;
2516 }
2517 }
2518 if let Some(merge_threshold) = self.merge_threshold {
2519 if merge_threshold != default_merge_threshold() {
2520 return false;
2521 }
2522 }
2523 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2524 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2525 return false;
2526 }
2527 }
2528 if let Some(write_nprobe) = self.write_nprobe {
2529 if write_nprobe != default_write_nprobe() {
2530 return false;
2531 }
2532 }
2533 if let Some(ef_construction) = self.ef_construction {
2534 if ef_construction != default_construction_ef_spann() {
2535 return false;
2536 }
2537 }
2538 if let Some(ef_search) = self.ef_search {
2539 if ef_search != default_search_ef_spann() {
2540 return false;
2541 }
2542 }
2543 if let Some(max_neighbors) = self.max_neighbors {
2544 if max_neighbors != default_m_spann() {
2545 return false;
2546 }
2547 }
2548 true
2549 }
2550}
2551
2552#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2553#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2554#[serde(deny_unknown_fields)]
2555pub struct SparseVectorIndexConfig {
2556 #[serde(skip_serializing_if = "Option::is_none")]
2558 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2559 #[serde(skip_serializing_if = "Option::is_none")]
2561 pub source_key: Option<String>,
2562 #[serde(skip_serializing_if = "Option::is_none")]
2564 pub bm25: Option<bool>,
2565}
2566
2567#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2568#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2569#[serde(deny_unknown_fields)]
2570pub struct FtsIndexConfig {
2571 }
2573
2574#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2575#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2576#[serde(deny_unknown_fields)]
2577pub struct StringInvertedIndexConfig {
2578 }
2580
2581#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2582#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2583#[serde(deny_unknown_fields)]
2584pub struct IntInvertedIndexConfig {
2585 }
2587
2588#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2589#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2590#[serde(deny_unknown_fields)]
2591pub struct FloatInvertedIndexConfig {
2592 }
2594
2595#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2596#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2597#[serde(deny_unknown_fields)]
2598pub struct BoolInvertedIndexConfig {
2599 }
2601
2602#[derive(Clone, Debug)]
2608pub enum IndexConfig {
2609 Vector(VectorIndexConfig),
2610 SparseVector(SparseVectorIndexConfig),
2611 Fts(FtsIndexConfig),
2612 StringInverted(StringInvertedIndexConfig),
2613 IntInverted(IntInvertedIndexConfig),
2614 FloatInverted(FloatInvertedIndexConfig),
2615 BoolInverted(BoolInvertedIndexConfig),
2616}
2617
2618impl From<VectorIndexConfig> for IndexConfig {
2620 fn from(config: VectorIndexConfig) -> Self {
2621 IndexConfig::Vector(config)
2622 }
2623}
2624
2625impl From<SparseVectorIndexConfig> for IndexConfig {
2626 fn from(config: SparseVectorIndexConfig) -> Self {
2627 IndexConfig::SparseVector(config)
2628 }
2629}
2630
2631impl From<FtsIndexConfig> for IndexConfig {
2632 fn from(config: FtsIndexConfig) -> Self {
2633 IndexConfig::Fts(config)
2634 }
2635}
2636
2637impl From<StringInvertedIndexConfig> for IndexConfig {
2638 fn from(config: StringInvertedIndexConfig) -> Self {
2639 IndexConfig::StringInverted(config)
2640 }
2641}
2642
2643impl From<IntInvertedIndexConfig> for IndexConfig {
2644 fn from(config: IntInvertedIndexConfig) -> Self {
2645 IndexConfig::IntInverted(config)
2646 }
2647}
2648
2649impl From<FloatInvertedIndexConfig> for IndexConfig {
2650 fn from(config: FloatInvertedIndexConfig) -> Self {
2651 IndexConfig::FloatInverted(config)
2652 }
2653}
2654
2655impl From<BoolInvertedIndexConfig> for IndexConfig {
2656 fn from(config: BoolInvertedIndexConfig) -> Self {
2657 IndexConfig::BoolInverted(config)
2658 }
2659}
2660
2661#[cfg(test)]
2662mod tests {
2663 use super::*;
2664 use crate::hnsw_configuration::Space;
2665 use crate::metadata::SparseVector;
2666 use crate::{
2667 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2668 };
2669 use serde_json::json;
2670
2671 #[test]
2672 fn test_reconcile_with_defaults_none_user_schema() {
2673 let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2675 let expected = Schema::new_default(KnnIndex::Spann);
2676 assert_eq!(result, expected);
2677 }
2678
2679 #[test]
2680 fn test_reconcile_with_defaults_empty_user_schema() {
2681 let user_schema = Schema {
2683 defaults: ValueTypes::default(),
2684 keys: HashMap::new(),
2685 };
2686
2687 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2688 let expected = Schema::new_default(KnnIndex::Spann);
2689 assert_eq!(result, expected);
2690 }
2691
2692 #[test]
2693 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2694 let mut user_schema = Schema {
2696 defaults: ValueTypes::default(),
2697 keys: HashMap::new(),
2698 };
2699
2700 user_schema.defaults.string = Some(StringValueType {
2701 string_inverted_index: Some(StringInvertedIndexType {
2702 enabled: false, config: StringInvertedIndexConfig {},
2704 }),
2705 fts_index: None,
2706 });
2707
2708 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2709
2710 assert!(
2712 !result
2713 .defaults
2714 .string
2715 .as_ref()
2716 .unwrap()
2717 .string_inverted_index
2718 .as_ref()
2719 .unwrap()
2720 .enabled
2721 );
2722 assert!(result.defaults.float.is_some());
2724 assert!(result.defaults.int.is_some());
2725 }
2726
2727 #[test]
2728 fn test_reconcile_with_defaults_user_overrides_vector_config() {
2729 let mut user_schema = Schema {
2731 defaults: ValueTypes::default(),
2732 keys: HashMap::new(),
2733 };
2734
2735 user_schema.defaults.float_list = Some(FloatListValueType {
2736 vector_index: Some(VectorIndexType {
2737 enabled: true, config: VectorIndexConfig {
2739 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
2743 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
2747 batch_size: None,
2748 sync_threshold: None,
2749 resize_factor: None,
2750 }),
2751 spann: None,
2752 },
2753 }),
2754 });
2755
2756 let result = {
2758 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2759 let merged_defaults = Schema::merge_value_types(
2760 &default_schema.defaults,
2761 &user_schema.defaults,
2762 KnnIndex::Hnsw,
2763 )
2764 .unwrap();
2765 let mut merged_keys = default_schema.keys.clone();
2766 for (key, user_value_types) in user_schema.keys {
2767 if let Some(default_value_types) = merged_keys.get(&key) {
2768 let merged_value_types = Schema::merge_value_types(
2769 default_value_types,
2770 &user_value_types,
2771 KnnIndex::Hnsw,
2772 )
2773 .unwrap();
2774 merged_keys.insert(key, merged_value_types);
2775 } else {
2776 merged_keys.insert(key, user_value_types);
2777 }
2778 }
2779 Schema {
2780 defaults: merged_defaults,
2781 keys: merged_keys,
2782 }
2783 };
2784
2785 let vector_config = &result
2786 .defaults
2787 .float_list
2788 .as_ref()
2789 .unwrap()
2790 .vector_index
2791 .as_ref()
2792 .unwrap()
2793 .config;
2794
2795 assert_eq!(vector_config.space, Some(Space::L2));
2797 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2798 assert_eq!(
2799 vector_config.hnsw.as_ref().unwrap().ef_construction,
2800 Some(500)
2801 );
2802
2803 assert_eq!(vector_config.embedding_function, None);
2805 assert_eq!(
2807 vector_config.hnsw.as_ref().unwrap().max_neighbors,
2808 Some(default_m())
2809 );
2810 }
2811
2812 #[test]
2813 fn test_reconcile_with_defaults_keys() {
2814 let mut user_schema = Schema {
2816 defaults: ValueTypes::default(),
2817 keys: HashMap::new(),
2818 };
2819
2820 let custom_key_types = ValueTypes {
2822 string: Some(StringValueType {
2823 fts_index: Some(FtsIndexType {
2824 enabled: true,
2825 config: FtsIndexConfig {},
2826 }),
2827 string_inverted_index: Some(StringInvertedIndexType {
2828 enabled: false,
2829 config: StringInvertedIndexConfig {},
2830 }),
2831 }),
2832 ..Default::default()
2833 };
2834 user_schema
2835 .keys
2836 .insert("custom_key".to_string(), custom_key_types);
2837
2838 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2839
2840 assert!(result.keys.contains_key(EMBEDDING_KEY));
2842 assert!(result.keys.contains_key(DOCUMENT_KEY));
2843
2844 assert!(result.keys.contains_key("custom_key"));
2846 let custom_override = result.keys.get("custom_key").unwrap();
2847 assert!(
2848 custom_override
2849 .string
2850 .as_ref()
2851 .unwrap()
2852 .fts_index
2853 .as_ref()
2854 .unwrap()
2855 .enabled
2856 );
2857 }
2858
2859 #[test]
2860 fn test_reconcile_with_defaults_override_existing_key() {
2861 let mut user_schema = Schema {
2863 defaults: ValueTypes::default(),
2864 keys: HashMap::new(),
2865 };
2866
2867 let embedding_override = ValueTypes {
2869 float_list: Some(FloatListValueType {
2870 vector_index: Some(VectorIndexType {
2871 enabled: false, config: VectorIndexConfig {
2873 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2875 source_key: Some("custom_embedding_key".to_string()),
2876 hnsw: None,
2877 spann: None,
2878 },
2879 }),
2880 }),
2881 ..Default::default()
2882 };
2883 user_schema
2884 .keys
2885 .insert(EMBEDDING_KEY.to_string(), embedding_override);
2886
2887 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2888
2889 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
2890 let vector_config = &embedding_config
2891 .float_list
2892 .as_ref()
2893 .unwrap()
2894 .vector_index
2895 .as_ref()
2896 .unwrap();
2897
2898 assert!(!vector_config.enabled);
2900 assert_eq!(vector_config.config.space, Some(Space::Ip));
2901 assert_eq!(
2902 vector_config.config.source_key,
2903 Some("custom_embedding_key".to_string())
2904 );
2905 }
2906
2907 #[test]
2908 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
2909 let collection_config = InternalCollectionConfiguration {
2910 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2911 space: Space::Cosine,
2912 ef_construction: 128,
2913 ef_search: 96,
2914 max_neighbors: 42,
2915 num_threads: 8,
2916 resize_factor: 1.5,
2917 sync_threshold: 2_000,
2918 batch_size: 256,
2919 }),
2920 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2921 EmbeddingFunctionNewConfiguration {
2922 name: "custom".to_string(),
2923 config: json!({"alpha": 1}),
2924 },
2925 )),
2926 };
2927
2928 let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2929 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2930
2931 assert_eq!(reconstructed, collection_config);
2932 }
2933
2934 #[test]
2935 fn test_convert_schema_to_collection_config_spann_roundtrip() {
2936 let spann_config = InternalSpannConfiguration {
2937 space: Space::Cosine,
2938 search_nprobe: 11,
2939 search_rng_factor: 1.7,
2940 write_nprobe: 5,
2941 nreplica_count: 3,
2942 split_threshold: 150,
2943 merge_threshold: 80,
2944 ef_construction: 120,
2945 ef_search: 90,
2946 max_neighbors: 40,
2947 ..Default::default()
2948 };
2949
2950 let collection_config = InternalCollectionConfiguration {
2951 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
2952 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
2953 EmbeddingFunctionNewConfiguration {
2954 name: "custom".to_string(),
2955 config: json!({"beta": true}),
2956 },
2957 )),
2958 };
2959
2960 let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
2961 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
2962
2963 assert_eq!(reconstructed, collection_config);
2964 }
2965
2966 #[test]
2967 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
2968 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2969 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
2970 if let Some(float_list) = &mut embedding.float_list {
2971 if let Some(vector_index) = &mut float_list.vector_index {
2972 vector_index.config.spann = Some(SpannIndexConfig {
2973 search_nprobe: Some(1),
2974 search_rng_factor: Some(1.0),
2975 search_rng_epsilon: Some(0.1),
2976 nreplica_count: Some(1),
2977 write_rng_factor: Some(1.0),
2978 write_rng_epsilon: Some(0.1),
2979 split_threshold: Some(100),
2980 num_samples_kmeans: Some(10),
2981 initial_lambda: Some(0.5),
2982 reassign_neighbor_count: Some(10),
2983 merge_threshold: Some(50),
2984 num_centers_to_merge_to: Some(3),
2985 write_nprobe: Some(1),
2986 ef_construction: Some(50),
2987 ef_search: Some(40),
2988 max_neighbors: Some(20),
2989 });
2990 }
2991 }
2992 }
2993
2994 let result = InternalCollectionConfiguration::try_from(&schema);
2995 assert!(result.is_err());
2996 }
2997
2998 #[test]
2999 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3000 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3001 let before = schema.clone();
3002 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3003 assert!(!modified);
3004 assert_eq!(schema, before);
3005 }
3006
3007 #[test]
3008 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3009 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3010 assert!(!schema.keys.contains_key("custom_field"));
3011
3012 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3013
3014 assert!(modified);
3015 let entry = schema
3016 .keys
3017 .get("custom_field")
3018 .expect("expected new key override to be inserted");
3019 assert_eq!(entry.boolean, schema.defaults.boolean);
3020 assert!(entry.string.is_none());
3021 assert!(entry.int.is_none());
3022 assert!(entry.float.is_none());
3023 assert!(entry.float_list.is_none());
3024 assert!(entry.sparse_vector.is_none());
3025 }
3026
3027 #[test]
3028 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3029 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3030 let initial_len = schema.keys.len();
3031 schema.keys.insert(
3032 "custom_field".to_string(),
3033 ValueTypes {
3034 string: schema.defaults.string.clone(),
3035 ..Default::default()
3036 },
3037 );
3038
3039 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3040
3041 assert!(modified);
3042 assert_eq!(schema.keys.len(), initial_len + 1);
3043 let entry = schema
3044 .keys
3045 .get("custom_field")
3046 .expect("expected key override to exist after ensure call");
3047 assert!(entry.string.is_some());
3048 assert_eq!(entry.boolean, schema.defaults.boolean);
3049 }
3050
3051 #[test]
3052 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3053 let schema = Schema::new_default(KnnIndex::Spann);
3054 let result = schema.is_knn_key_indexing_enabled(
3055 "custom_sparse",
3056 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
3057 );
3058
3059 let err = result.expect_err("expected indexing disabled error");
3060 match err {
3061 FilterValidationError::IndexingDisabled { key, value_type } => {
3062 assert_eq!(key, "custom_sparse");
3063 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3064 }
3065 other => panic!("unexpected error variant: {other:?}"),
3066 }
3067 }
3068
3069 #[test]
3070 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3071 let mut schema = Schema::new_default(KnnIndex::Spann);
3072 schema.keys.insert(
3073 "sparse_enabled".to_string(),
3074 ValueTypes {
3075 sparse_vector: Some(SparseVectorValueType {
3076 sparse_vector_index: Some(SparseVectorIndexType {
3077 enabled: true,
3078 config: SparseVectorIndexConfig {
3079 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3080 source_key: None,
3081 bm25: None,
3082 },
3083 }),
3084 }),
3085 ..Default::default()
3086 },
3087 );
3088
3089 let result = schema.is_knn_key_indexing_enabled(
3090 "sparse_enabled",
3091 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
3092 );
3093
3094 assert!(result.is_ok());
3095 }
3096
3097 #[test]
3098 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3099 let schema = Schema::new_default(KnnIndex::Spann);
3100 let result = schema.is_knn_key_indexing_enabled(
3101 EMBEDDING_KEY,
3102 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3103 );
3104
3105 assert!(result.is_ok());
3106 }
3107
3108 #[test]
3109 fn test_merge_hnsw_configs_field_level() {
3110 let default_hnsw = HnswIndexConfig {
3112 ef_construction: Some(200),
3113 max_neighbors: Some(16),
3114 ef_search: Some(10),
3115 num_threads: Some(4),
3116 batch_size: Some(100),
3117 sync_threshold: Some(1000),
3118 resize_factor: Some(1.2),
3119 };
3120
3121 let user_hnsw = HnswIndexConfig {
3122 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
3130
3131 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3132
3133 assert_eq!(result.ef_construction, Some(300));
3135 assert_eq!(result.ef_search, Some(20));
3136 assert_eq!(result.sync_threshold, Some(2000));
3137
3138 assert_eq!(result.max_neighbors, Some(16));
3140 assert_eq!(result.num_threads, Some(4));
3141 assert_eq!(result.batch_size, Some(100));
3142 assert_eq!(result.resize_factor, Some(1.2));
3143 }
3144
3145 #[test]
3146 fn test_merge_spann_configs_field_level() {
3147 let default_spann = SpannIndexConfig {
3149 search_nprobe: Some(10),
3150 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
3153 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
3157 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
3159 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
3162 ef_construction: Some(100),
3163 ef_search: Some(10),
3164 max_neighbors: Some(16),
3165 };
3166
3167 let user_spann = SpannIndexConfig {
3168 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
3173 write_rng_epsilon: None,
3174 split_threshold: Some(150), num_samples_kmeans: None,
3176 initial_lambda: None,
3177 reassign_neighbor_count: None,
3178 merge_threshold: None,
3179 num_centers_to_merge_to: None,
3180 write_nprobe: None,
3181 ef_construction: None,
3182 ef_search: None,
3183 max_neighbors: None,
3184 };
3185
3186 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3187
3188 assert_eq!(result.search_nprobe, Some(20));
3190 assert_eq!(result.search_rng_epsilon, Some(8.0));
3191 assert_eq!(result.split_threshold, Some(150));
3192
3193 assert_eq!(result.search_rng_factor, Some(1.0));
3195 assert_eq!(result.nreplica_count, Some(3));
3196 assert_eq!(result.initial_lambda, Some(100.0));
3197 }
3198
3199 #[test]
3200 fn test_spann_index_config_into_internal_configuration() {
3201 let config = SpannIndexConfig {
3202 search_nprobe: Some(33),
3203 search_rng_factor: Some(1.2),
3204 search_rng_epsilon: None,
3205 nreplica_count: None,
3206 write_rng_factor: Some(1.5),
3207 write_rng_epsilon: None,
3208 split_threshold: Some(75),
3209 num_samples_kmeans: None,
3210 initial_lambda: Some(0.9),
3211 reassign_neighbor_count: Some(40),
3212 merge_threshold: None,
3213 num_centers_to_merge_to: Some(4),
3214 write_nprobe: Some(60),
3215 ef_construction: Some(180),
3216 ef_search: Some(170),
3217 max_neighbors: Some(32),
3218 };
3219
3220 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3221 assert_eq!(with_space.space, Space::Cosine);
3222 assert_eq!(with_space.search_nprobe, 33);
3223 assert_eq!(with_space.search_rng_factor, 1.2);
3224 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3225 assert_eq!(with_space.write_rng_factor, 1.5);
3226 assert_eq!(with_space.write_nprobe, 60);
3227 assert_eq!(with_space.ef_construction, 180);
3228 assert_eq!(with_space.ef_search, 170);
3229 assert_eq!(with_space.max_neighbors, 32);
3230 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3231
3232 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3233 assert_eq!(default_space_config.space, default_space());
3234 }
3235
3236 #[test]
3237 fn test_merge_string_type_combinations() {
3238 let default = StringValueType {
3242 string_inverted_index: Some(StringInvertedIndexType {
3243 enabled: true,
3244 config: StringInvertedIndexConfig {},
3245 }),
3246 fts_index: Some(FtsIndexType {
3247 enabled: false,
3248 config: FtsIndexConfig {},
3249 }),
3250 };
3251
3252 let user = StringValueType {
3253 string_inverted_index: Some(StringInvertedIndexType {
3254 enabled: false, config: StringInvertedIndexConfig {},
3256 }),
3257 fts_index: None, };
3259
3260 let result = Schema::merge_string_type(Some(&default), Some(&user))
3261 .unwrap()
3262 .unwrap();
3263 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3268 .unwrap()
3269 .unwrap();
3270 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3271
3272 let result = Schema::merge_string_type(None, Some(&user))
3274 .unwrap()
3275 .unwrap();
3276 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3277
3278 let result = Schema::merge_string_type(None, None).unwrap();
3280 assert!(result.is_none());
3281 }
3282
3283 #[test]
3284 fn test_merge_vector_index_config_comprehensive() {
3285 let default_config = VectorIndexConfig {
3287 space: Some(Space::Cosine),
3288 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3289 source_key: Some("default_key".to_string()),
3290 hnsw: Some(HnswIndexConfig {
3291 ef_construction: Some(200),
3292 max_neighbors: Some(16),
3293 ef_search: Some(10),
3294 num_threads: Some(4),
3295 batch_size: Some(100),
3296 sync_threshold: Some(1000),
3297 resize_factor: Some(1.2),
3298 }),
3299 spann: None,
3300 };
3301
3302 let user_config = VectorIndexConfig {
3303 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3307 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3311 batch_size: None,
3312 sync_threshold: None,
3313 resize_factor: None,
3314 }),
3315 spann: Some(SpannIndexConfig {
3316 search_nprobe: Some(15),
3317 search_rng_factor: None,
3318 search_rng_epsilon: None,
3319 nreplica_count: None,
3320 write_rng_factor: None,
3321 write_rng_epsilon: None,
3322 split_threshold: None,
3323 num_samples_kmeans: None,
3324 initial_lambda: None,
3325 reassign_neighbor_count: None,
3326 merge_threshold: None,
3327 num_centers_to_merge_to: None,
3328 write_nprobe: None,
3329 ef_construction: None,
3330 ef_search: None,
3331 max_neighbors: None,
3332 }), };
3334
3335 let result =
3336 Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3337
3338 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3341 result.embedding_function,
3342 Some(EmbeddingFunctionConfiguration::Legacy)
3343 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_none());
3352 }
3353
3354 #[test]
3355 fn test_merge_sparse_vector_index_config() {
3356 let default_config = SparseVectorIndexConfig {
3358 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3359 source_key: Some("default_sparse_key".to_string()),
3360 bm25: None,
3361 };
3362
3363 let user_config = SparseVectorIndexConfig {
3364 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
3367 };
3368
3369 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3370
3371 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3373 assert_eq!(
3375 result.embedding_function,
3376 Some(EmbeddingFunctionConfiguration::Legacy)
3377 );
3378 }
3379
3380 #[test]
3381 fn test_complex_nested_merging_scenario() {
3382 let mut user_schema = Schema {
3384 defaults: ValueTypes::default(),
3385 keys: HashMap::new(),
3386 };
3387
3388 user_schema.defaults.string = Some(StringValueType {
3390 string_inverted_index: Some(StringInvertedIndexType {
3391 enabled: false,
3392 config: StringInvertedIndexConfig {},
3393 }),
3394 fts_index: Some(FtsIndexType {
3395 enabled: true,
3396 config: FtsIndexConfig {},
3397 }),
3398 });
3399
3400 user_schema.defaults.float_list = Some(FloatListValueType {
3401 vector_index: Some(VectorIndexType {
3402 enabled: true,
3403 config: VectorIndexConfig {
3404 space: Some(Space::Ip),
3405 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
3407 hnsw: Some(HnswIndexConfig {
3408 ef_construction: Some(400),
3409 max_neighbors: Some(32),
3410 ef_search: None, num_threads: None,
3412 batch_size: None,
3413 sync_threshold: None,
3414 resize_factor: None,
3415 }),
3416 spann: None,
3417 },
3418 }),
3419 });
3420
3421 let custom_key_override = ValueTypes {
3423 string: Some(StringValueType {
3424 fts_index: Some(FtsIndexType {
3425 enabled: true,
3426 config: FtsIndexConfig {},
3427 }),
3428 string_inverted_index: None,
3429 }),
3430 ..Default::default()
3431 };
3432 user_schema
3433 .keys
3434 .insert("custom_field".to_string(), custom_key_override);
3435
3436 let result = {
3438 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3439 let merged_defaults = Schema::merge_value_types(
3440 &default_schema.defaults,
3441 &user_schema.defaults,
3442 KnnIndex::Hnsw,
3443 )
3444 .unwrap();
3445 let mut merged_keys = default_schema.keys.clone();
3446 for (key, user_value_types) in user_schema.keys {
3447 if let Some(default_value_types) = merged_keys.get(&key) {
3448 let merged_value_types = Schema::merge_value_types(
3449 default_value_types,
3450 &user_value_types,
3451 KnnIndex::Hnsw,
3452 )
3453 .unwrap();
3454 merged_keys.insert(key, merged_value_types);
3455 } else {
3456 merged_keys.insert(key, user_value_types);
3457 }
3458 }
3459 Schema {
3460 defaults: merged_defaults,
3461 keys: merged_keys,
3462 }
3463 };
3464
3465 assert!(
3469 !result
3470 .defaults
3471 .string
3472 .as_ref()
3473 .unwrap()
3474 .string_inverted_index
3475 .as_ref()
3476 .unwrap()
3477 .enabled
3478 );
3479 assert!(
3480 result
3481 .defaults
3482 .string
3483 .as_ref()
3484 .unwrap()
3485 .fts_index
3486 .as_ref()
3487 .unwrap()
3488 .enabled
3489 );
3490
3491 let vector_config = &result
3492 .defaults
3493 .float_list
3494 .as_ref()
3495 .unwrap()
3496 .vector_index
3497 .as_ref()
3498 .unwrap()
3499 .config;
3500 assert_eq!(vector_config.space, Some(Space::Ip));
3501 assert_eq!(vector_config.embedding_function, None); assert_eq!(
3503 vector_config.source_key,
3504 Some("custom_vector_key".to_string())
3505 );
3506 assert_eq!(
3507 vector_config.hnsw.as_ref().unwrap().ef_construction,
3508 Some(400)
3509 );
3510 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3511 assert_eq!(
3512 vector_config.hnsw.as_ref().unwrap().ef_search,
3513 Some(default_search_ef())
3514 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
3522 assert!(
3523 custom_override
3524 .string
3525 .as_ref()
3526 .unwrap()
3527 .fts_index
3528 .as_ref()
3529 .unwrap()
3530 .enabled
3531 );
3532 assert!(custom_override
3533 .string
3534 .as_ref()
3535 .unwrap()
3536 .string_inverted_index
3537 .is_none());
3538 }
3539
3540 #[test]
3541 fn test_reconcile_with_collection_config_default_config() {
3542 let collection_config = InternalCollectionConfiguration::default_hnsw();
3544 let schema = Schema::convert_collection_config_to_schema(&collection_config).unwrap();
3545
3546 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3547 assert_eq!(result, schema);
3548 }
3549
3550 #[test]
3551 fn test_reconcile_with_collection_config_both_non_default() {
3552 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3554 schema.defaults.string = Some(StringValueType {
3555 fts_index: Some(FtsIndexType {
3556 enabled: true,
3557 config: FtsIndexConfig {},
3558 }),
3559 string_inverted_index: None,
3560 });
3561
3562 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
3563 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
3565 {
3566 hnsw_config.ef_construction = 500; }
3568
3569 let result = Schema::reconcile_schema_and_config(
3571 Some(&schema),
3572 Some(&collection_config),
3573 KnnIndex::Spann,
3574 );
3575 assert!(result.is_err());
3576 assert!(matches!(
3577 result.unwrap_err(),
3578 SchemaError::ConfigAndSchemaConflict
3579 ));
3580 }
3581
3582 #[test]
3583 fn test_reconcile_with_collection_config_hnsw_override() {
3584 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
3588 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3589 ef_construction: 300,
3590 max_neighbors: 32,
3591 ef_search: 50,
3592 num_threads: 8,
3593 batch_size: 200,
3594 sync_threshold: 2000,
3595 resize_factor: 1.5,
3596 space: Space::L2,
3597 }),
3598 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3599 };
3600
3601 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3602
3603 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3605 let vector_index = embedding_override
3606 .float_list
3607 .as_ref()
3608 .unwrap()
3609 .vector_index
3610 .as_ref()
3611 .unwrap();
3612
3613 assert!(vector_index.enabled);
3614 assert_eq!(vector_index.config.space, Some(Space::L2));
3615 assert_eq!(
3616 vector_index.config.embedding_function,
3617 Some(EmbeddingFunctionConfiguration::Legacy)
3618 );
3619 assert_eq!(
3620 vector_index.config.source_key,
3621 Some(DOCUMENT_KEY.to_string())
3622 );
3623
3624 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
3625 assert_eq!(hnsw_config.ef_construction, Some(300));
3626 assert_eq!(hnsw_config.max_neighbors, Some(32));
3627 assert_eq!(hnsw_config.ef_search, Some(50));
3628 assert_eq!(hnsw_config.num_threads, Some(8));
3629 assert_eq!(hnsw_config.batch_size, Some(200));
3630 assert_eq!(hnsw_config.sync_threshold, Some(2000));
3631 assert_eq!(hnsw_config.resize_factor, Some(1.5));
3632
3633 assert!(vector_index.config.spann.is_none());
3634 }
3635
3636 #[test]
3637 fn test_reconcile_with_collection_config_spann_override() {
3638 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
3642 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
3643 search_nprobe: 20,
3644 search_rng_factor: 3.0,
3645 search_rng_epsilon: 0.2,
3646 nreplica_count: 5,
3647 write_rng_factor: 2.0,
3648 write_rng_epsilon: 0.1,
3649 split_threshold: 2000,
3650 num_samples_kmeans: 200,
3651 initial_lambda: 0.8,
3652 reassign_neighbor_count: 100,
3653 merge_threshold: 800,
3654 num_centers_to_merge_to: 20,
3655 write_nprobe: 10,
3656 ef_construction: 400,
3657 ef_search: 60,
3658 max_neighbors: 24,
3659 space: Space::Cosine,
3660 }),
3661 embedding_function: None,
3662 };
3663
3664 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3665
3666 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3668 let vector_index = embedding_override
3669 .float_list
3670 .as_ref()
3671 .unwrap()
3672 .vector_index
3673 .as_ref()
3674 .unwrap();
3675
3676 assert!(vector_index.enabled);
3677 assert_eq!(vector_index.config.space, Some(Space::Cosine));
3678 assert_eq!(vector_index.config.embedding_function, None);
3679 assert_eq!(
3680 vector_index.config.source_key,
3681 Some(DOCUMENT_KEY.to_string())
3682 );
3683
3684 assert!(vector_index.config.hnsw.is_none());
3685
3686 let spann_config = vector_index.config.spann.as_ref().unwrap();
3687 assert_eq!(spann_config.search_nprobe, Some(20));
3688 assert_eq!(spann_config.search_rng_factor, Some(3.0));
3689 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
3690 assert_eq!(spann_config.nreplica_count, Some(5));
3691 assert_eq!(spann_config.write_rng_factor, Some(2.0));
3692 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
3693 assert_eq!(spann_config.split_threshold, Some(2000));
3694 assert_eq!(spann_config.num_samples_kmeans, Some(200));
3695 assert_eq!(spann_config.initial_lambda, Some(0.8));
3696 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
3697 assert_eq!(spann_config.merge_threshold, Some(800));
3698 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
3699 assert_eq!(spann_config.write_nprobe, Some(10));
3700 assert_eq!(spann_config.ef_construction, Some(400));
3701 assert_eq!(spann_config.ef_search, Some(60));
3702 assert_eq!(spann_config.max_neighbors, Some(24));
3703 }
3704
3705 #[test]
3706 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
3707 let schema = Schema::new_default(KnnIndex::Hnsw);
3710
3711 let collection_config = InternalCollectionConfiguration {
3712 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3713 ef_construction: 300,
3714 max_neighbors: 32,
3715 ef_search: 50,
3716 num_threads: 8,
3717 batch_size: 200,
3718 sync_threshold: 2000,
3719 resize_factor: 1.5,
3720 space: Space::L2,
3721 }),
3722 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3723 };
3724
3725 let result = Schema::reconcile_with_collection_config(&schema, &collection_config).unwrap();
3726
3727 let defaults_vector_index = result
3729 .defaults
3730 .float_list
3731 .as_ref()
3732 .unwrap()
3733 .vector_index
3734 .as_ref()
3735 .unwrap();
3736
3737 assert!(!defaults_vector_index.enabled);
3739 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
3741 assert_eq!(
3742 defaults_vector_index.config.embedding_function,
3743 Some(EmbeddingFunctionConfiguration::Legacy)
3744 );
3745 assert_eq!(
3746 defaults_vector_index.config.source_key,
3747 Some(DOCUMENT_KEY.to_string())
3748 );
3749 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
3750 assert_eq!(defaults_hnsw.ef_construction, Some(300));
3751 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
3752
3753 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
3755 let embedding_vector_index = embedding_override
3756 .float_list
3757 .as_ref()
3758 .unwrap()
3759 .vector_index
3760 .as_ref()
3761 .unwrap();
3762
3763 assert!(embedding_vector_index.enabled);
3765 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
3767 assert_eq!(
3768 embedding_vector_index.config.embedding_function,
3769 Some(EmbeddingFunctionConfiguration::Legacy)
3770 );
3771 assert_eq!(
3772 embedding_vector_index.config.source_key,
3773 Some(DOCUMENT_KEY.to_string())
3774 );
3775 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
3776 assert_eq!(embedding_hnsw.ef_construction, Some(300));
3777 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
3778 }
3779
3780 #[test]
3781 fn test_is_schema_default() {
3782 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
3784 assert!(default_hnsw_schema.is_default());
3785
3786 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
3787 assert!(default_spann_schema.is_default());
3788
3789 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
3791 if let Some(ref mut string_type) = modified_schema.defaults.string {
3793 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
3794 string_inverted.enabled = false; }
3796 }
3797 assert!(!modified_schema.is_default());
3798
3799 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
3801 schema_with_extra_overrides
3802 .keys
3803 .insert("custom_key".to_string(), ValueTypes::default());
3804 assert!(!schema_with_extra_overrides.is_default());
3805 }
3806
3807 #[test]
3808 fn test_add_merges_keys_by_value_type() {
3809 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3810 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3811
3812 let string_override = ValueTypes {
3813 string: Some(StringValueType {
3814 string_inverted_index: Some(StringInvertedIndexType {
3815 enabled: true,
3816 config: StringInvertedIndexConfig {},
3817 }),
3818 fts_index: None,
3819 }),
3820 ..Default::default()
3821 };
3822 schema_a
3823 .keys
3824 .insert("custom_field".to_string(), string_override);
3825
3826 let float_override = ValueTypes {
3827 float: Some(FloatValueType {
3828 float_inverted_index: Some(FloatInvertedIndexType {
3829 enabled: true,
3830 config: FloatInvertedIndexConfig {},
3831 }),
3832 }),
3833 ..Default::default()
3834 };
3835 schema_b
3836 .keys
3837 .insert("custom_field".to_string(), float_override);
3838
3839 let merged = schema_a.merge(&schema_b).unwrap();
3840 let merged_override = merged.keys.get("custom_field").unwrap();
3841
3842 assert!(merged_override.string.is_some());
3843 assert!(merged_override.float.is_some());
3844 assert!(
3845 merged_override
3846 .string
3847 .as_ref()
3848 .unwrap()
3849 .string_inverted_index
3850 .as_ref()
3851 .unwrap()
3852 .enabled
3853 );
3854 assert!(
3855 merged_override
3856 .float
3857 .as_ref()
3858 .unwrap()
3859 .float_inverted_index
3860 .as_ref()
3861 .unwrap()
3862 .enabled
3863 );
3864 }
3865
3866 #[test]
3867 fn test_add_rejects_different_defaults() {
3868 let schema_a = Schema::new_default(KnnIndex::Hnsw);
3869 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3870
3871 if let Some(string_type) = schema_b.defaults.string.as_mut() {
3872 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
3873 string_index.enabled = false;
3874 }
3875 }
3876
3877 let err = schema_a.merge(&schema_b).unwrap_err();
3878 assert!(matches!(err, SchemaError::DefaultsMismatch));
3879 }
3880
3881 #[test]
3882 fn test_add_detects_conflicting_value_type_configuration() {
3883 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
3884 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
3885
3886 let string_override_enabled = ValueTypes {
3887 string: Some(StringValueType {
3888 string_inverted_index: Some(StringInvertedIndexType {
3889 enabled: true,
3890 config: StringInvertedIndexConfig {},
3891 }),
3892 fts_index: None,
3893 }),
3894 ..Default::default()
3895 };
3896 schema_a
3897 .keys
3898 .insert("custom_field".to_string(), string_override_enabled);
3899
3900 let string_override_disabled = ValueTypes {
3901 string: Some(StringValueType {
3902 string_inverted_index: Some(StringInvertedIndexType {
3903 enabled: false,
3904 config: StringInvertedIndexConfig {},
3905 }),
3906 fts_index: None,
3907 }),
3908 ..Default::default()
3909 };
3910 schema_b
3911 .keys
3912 .insert("custom_field".to_string(), string_override_disabled);
3913
3914 let err = schema_a.merge(&schema_b).unwrap_err();
3915 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
3916 }
3917
3918 #[test]
3920 fn test_backward_compatibility_aliases() {
3921 let old_format_json = r###"{
3923 "defaults": {
3924 "#string": {
3925 "$fts_index": {
3926 "enabled": true,
3927 "config": {}
3928 }
3929 },
3930 "#int": {
3931 "$int_inverted_index": {
3932 "enabled": true,
3933 "config": {}
3934 }
3935 },
3936 "#float_list": {
3937 "$vector_index": {
3938 "enabled": true,
3939 "config": {
3940 "spann": {
3941 "search_nprobe": 10
3942 }
3943 }
3944 }
3945 }
3946 },
3947 "key_overrides": {
3948 "#document": {
3949 "#string": {
3950 "$fts_index": {
3951 "enabled": false,
3952 "config": {}
3953 }
3954 }
3955 }
3956 }
3957 }"###;
3958
3959 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
3960
3961 let new_format_json = r###"{
3963 "defaults": {
3964 "string": {
3965 "fts_index": {
3966 "enabled": true,
3967 "config": {}
3968 }
3969 },
3970 "int": {
3971 "int_inverted_index": {
3972 "enabled": true,
3973 "config": {}
3974 }
3975 },
3976 "float_list": {
3977 "vector_index": {
3978 "enabled": true,
3979 "config": {
3980 "spann": {
3981 "search_nprobe": 10
3982 }
3983 }
3984 }
3985 }
3986 },
3987 "keys": {
3988 "#document": {
3989 "string": {
3990 "fts_index": {
3991 "enabled": false,
3992 "config": {}
3993 }
3994 }
3995 }
3996 }
3997 }"###;
3998
3999 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4000
4001 assert_eq!(schema_from_old, schema_from_new);
4003
4004 assert!(schema_from_old.defaults.string.is_some());
4006 assert!(schema_from_old
4007 .defaults
4008 .string
4009 .as_ref()
4010 .unwrap()
4011 .fts_index
4012 .is_some());
4013 assert!(
4014 schema_from_old
4015 .defaults
4016 .string
4017 .as_ref()
4018 .unwrap()
4019 .fts_index
4020 .as_ref()
4021 .unwrap()
4022 .enabled
4023 );
4024
4025 assert!(schema_from_old.defaults.int.is_some());
4026 assert!(schema_from_old
4027 .defaults
4028 .int
4029 .as_ref()
4030 .unwrap()
4031 .int_inverted_index
4032 .is_some());
4033
4034 assert!(schema_from_old.defaults.float_list.is_some());
4035 assert!(schema_from_old
4036 .defaults
4037 .float_list
4038 .as_ref()
4039 .unwrap()
4040 .vector_index
4041 .is_some());
4042
4043 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4044 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4045 assert!(doc_override.string.is_some());
4046 assert!(
4047 !doc_override
4048 .string
4049 .as_ref()
4050 .unwrap()
4051 .fts_index
4052 .as_ref()
4053 .unwrap()
4054 .enabled
4055 );
4056
4057 let serialized = serde_json::to_string(&schema_from_old).unwrap();
4059
4060 assert!(serialized.contains(r#""keys":"#));
4062 assert!(serialized.contains(r#""string":"#));
4063 assert!(serialized.contains(r#""fts_index":"#));
4064 assert!(serialized.contains(r#""int_inverted_index":"#));
4065 assert!(serialized.contains(r#""vector_index":"#));
4066
4067 assert!(!serialized.contains(r#""key_overrides":"#));
4069 assert!(!serialized.contains(r###""#string":"###));
4070 assert!(!serialized.contains(r###""$fts_index":"###));
4071 assert!(!serialized.contains(r###""$int_inverted_index":"###));
4072 assert!(!serialized.contains(r###""$vector_index":"###));
4073 }
4074
4075 #[test]
4076 fn test_hnsw_index_config_validation() {
4077 use validator::Validate;
4078
4079 let valid_config = HnswIndexConfig {
4081 batch_size: Some(10),
4082 sync_threshold: Some(100),
4083 ef_construction: Some(100),
4084 max_neighbors: Some(16),
4085 ..Default::default()
4086 };
4087 assert!(valid_config.validate().is_ok());
4088
4089 let invalid_batch_size = HnswIndexConfig {
4091 batch_size: Some(1),
4092 ..Default::default()
4093 };
4094 assert!(invalid_batch_size.validate().is_err());
4095
4096 let invalid_sync_threshold = HnswIndexConfig {
4098 sync_threshold: Some(1),
4099 ..Default::default()
4100 };
4101 assert!(invalid_sync_threshold.validate().is_err());
4102
4103 let boundary_config = HnswIndexConfig {
4105 batch_size: Some(2),
4106 sync_threshold: Some(2),
4107 ..Default::default()
4108 };
4109 assert!(boundary_config.validate().is_ok());
4110
4111 let all_none_config = HnswIndexConfig {
4113 ..Default::default()
4114 };
4115 assert!(all_none_config.validate().is_ok());
4116
4117 let other_fields_config = HnswIndexConfig {
4119 ef_construction: Some(1),
4120 max_neighbors: Some(1),
4121 ef_search: Some(1),
4122 num_threads: Some(1),
4123 resize_factor: Some(0.1),
4124 ..Default::default()
4125 };
4126 assert!(other_fields_config.validate().is_ok());
4127 }
4128
4129 #[test]
4130 fn test_spann_index_config_validation() {
4131 use validator::Validate;
4132
4133 let valid_config = SpannIndexConfig {
4135 write_nprobe: Some(32),
4136 nreplica_count: Some(4),
4137 split_threshold: Some(100),
4138 merge_threshold: Some(50),
4139 reassign_neighbor_count: Some(32),
4140 num_centers_to_merge_to: Some(4),
4141 ef_construction: Some(100),
4142 ef_search: Some(100),
4143 max_neighbors: Some(32),
4144 search_rng_factor: Some(1.0),
4145 write_rng_factor: Some(1.0),
4146 search_rng_epsilon: Some(7.5),
4147 write_rng_epsilon: Some(7.5),
4148 ..Default::default()
4149 };
4150 assert!(valid_config.validate().is_ok());
4151
4152 let invalid_write_nprobe = SpannIndexConfig {
4154 write_nprobe: Some(200),
4155 ..Default::default()
4156 };
4157 assert!(invalid_write_nprobe.validate().is_err());
4158
4159 let invalid_split_threshold = SpannIndexConfig {
4161 split_threshold: Some(10),
4162 ..Default::default()
4163 };
4164 assert!(invalid_split_threshold.validate().is_err());
4165
4166 let invalid_split_threshold_high = SpannIndexConfig {
4168 split_threshold: Some(250),
4169 ..Default::default()
4170 };
4171 assert!(invalid_split_threshold_high.validate().is_err());
4172
4173 let invalid_nreplica = SpannIndexConfig {
4175 nreplica_count: Some(10),
4176 ..Default::default()
4177 };
4178 assert!(invalid_nreplica.validate().is_err());
4179
4180 let invalid_reassign = SpannIndexConfig {
4182 reassign_neighbor_count: Some(100),
4183 ..Default::default()
4184 };
4185 assert!(invalid_reassign.validate().is_err());
4186
4187 let invalid_merge_threshold_low = SpannIndexConfig {
4189 merge_threshold: Some(5),
4190 ..Default::default()
4191 };
4192 assert!(invalid_merge_threshold_low.validate().is_err());
4193
4194 let invalid_merge_threshold_high = SpannIndexConfig {
4195 merge_threshold: Some(150),
4196 ..Default::default()
4197 };
4198 assert!(invalid_merge_threshold_high.validate().is_err());
4199
4200 let invalid_num_centers = SpannIndexConfig {
4202 num_centers_to_merge_to: Some(10),
4203 ..Default::default()
4204 };
4205 assert!(invalid_num_centers.validate().is_err());
4206
4207 let invalid_ef_construction = SpannIndexConfig {
4209 ef_construction: Some(300),
4210 ..Default::default()
4211 };
4212 assert!(invalid_ef_construction.validate().is_err());
4213
4214 let invalid_ef_search = SpannIndexConfig {
4216 ef_search: Some(300),
4217 ..Default::default()
4218 };
4219 assert!(invalid_ef_search.validate().is_err());
4220
4221 let invalid_max_neighbors = SpannIndexConfig {
4223 max_neighbors: Some(100),
4224 ..Default::default()
4225 };
4226 assert!(invalid_max_neighbors.validate().is_err());
4227
4228 let invalid_search_nprobe = SpannIndexConfig {
4230 search_nprobe: Some(200),
4231 ..Default::default()
4232 };
4233 assert!(invalid_search_nprobe.validate().is_err());
4234
4235 let invalid_search_rng_factor_low = SpannIndexConfig {
4237 search_rng_factor: Some(0.9),
4238 ..Default::default()
4239 };
4240 assert!(invalid_search_rng_factor_low.validate().is_err());
4241
4242 let invalid_search_rng_factor_high = SpannIndexConfig {
4243 search_rng_factor: Some(1.1),
4244 ..Default::default()
4245 };
4246 assert!(invalid_search_rng_factor_high.validate().is_err());
4247
4248 let valid_search_rng_factor = SpannIndexConfig {
4250 search_rng_factor: Some(1.0),
4251 ..Default::default()
4252 };
4253 assert!(valid_search_rng_factor.validate().is_ok());
4254
4255 let invalid_search_rng_epsilon_low = SpannIndexConfig {
4257 search_rng_epsilon: Some(4.0),
4258 ..Default::default()
4259 };
4260 assert!(invalid_search_rng_epsilon_low.validate().is_err());
4261
4262 let invalid_search_rng_epsilon_high = SpannIndexConfig {
4263 search_rng_epsilon: Some(11.0),
4264 ..Default::default()
4265 };
4266 assert!(invalid_search_rng_epsilon_high.validate().is_err());
4267
4268 let valid_search_rng_epsilon = SpannIndexConfig {
4270 search_rng_epsilon: Some(7.5),
4271 ..Default::default()
4272 };
4273 assert!(valid_search_rng_epsilon.validate().is_ok());
4274
4275 let invalid_write_rng_factor_low = SpannIndexConfig {
4277 write_rng_factor: Some(0.9),
4278 ..Default::default()
4279 };
4280 assert!(invalid_write_rng_factor_low.validate().is_err());
4281
4282 let invalid_write_rng_factor_high = SpannIndexConfig {
4283 write_rng_factor: Some(1.1),
4284 ..Default::default()
4285 };
4286 assert!(invalid_write_rng_factor_high.validate().is_err());
4287
4288 let valid_write_rng_factor = SpannIndexConfig {
4290 write_rng_factor: Some(1.0),
4291 ..Default::default()
4292 };
4293 assert!(valid_write_rng_factor.validate().is_ok());
4294
4295 let invalid_write_rng_epsilon_low = SpannIndexConfig {
4297 write_rng_epsilon: Some(4.0),
4298 ..Default::default()
4299 };
4300 assert!(invalid_write_rng_epsilon_low.validate().is_err());
4301
4302 let invalid_write_rng_epsilon_high = SpannIndexConfig {
4303 write_rng_epsilon: Some(11.0),
4304 ..Default::default()
4305 };
4306 assert!(invalid_write_rng_epsilon_high.validate().is_err());
4307
4308 let valid_write_rng_epsilon = SpannIndexConfig {
4310 write_rng_epsilon: Some(7.5),
4311 ..Default::default()
4312 };
4313 assert!(valid_write_rng_epsilon.validate().is_ok());
4314
4315 let invalid_num_samples_kmeans = SpannIndexConfig {
4317 num_samples_kmeans: Some(1500),
4318 ..Default::default()
4319 };
4320 assert!(invalid_num_samples_kmeans.validate().is_err());
4321
4322 let valid_num_samples_kmeans = SpannIndexConfig {
4324 num_samples_kmeans: Some(500),
4325 ..Default::default()
4326 };
4327 assert!(valid_num_samples_kmeans.validate().is_ok());
4328
4329 let invalid_initial_lambda_high = SpannIndexConfig {
4331 initial_lambda: Some(150.0),
4332 ..Default::default()
4333 };
4334 assert!(invalid_initial_lambda_high.validate().is_err());
4335
4336 let invalid_initial_lambda_low = SpannIndexConfig {
4337 initial_lambda: Some(50.0),
4338 ..Default::default()
4339 };
4340 assert!(invalid_initial_lambda_low.validate().is_err());
4341
4342 let valid_initial_lambda = SpannIndexConfig {
4344 initial_lambda: Some(100.0),
4345 ..Default::default()
4346 };
4347 assert!(valid_initial_lambda.validate().is_ok());
4348
4349 let all_none_config = SpannIndexConfig {
4351 ..Default::default()
4352 };
4353 assert!(all_none_config.validate().is_ok());
4354 }
4355
4356 #[test]
4357 fn test_builder_pattern_crud_workflow() {
4358 let schema = Schema::new_default(KnnIndex::Hnsw)
4362 .create_index(
4363 None,
4364 IndexConfig::Vector(VectorIndexConfig {
4365 space: Some(Space::Cosine),
4366 embedding_function: None,
4367 source_key: None,
4368 hnsw: Some(HnswIndexConfig {
4369 ef_construction: Some(200),
4370 max_neighbors: Some(32),
4371 ef_search: Some(50),
4372 num_threads: None,
4373 batch_size: None,
4374 sync_threshold: None,
4375 resize_factor: None,
4376 }),
4377 spann: None,
4378 }),
4379 )
4380 .expect("vector config should succeed")
4381 .create_index(
4382 Some("category"),
4383 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4384 )
4385 .expect("string inverted on key should succeed")
4386 .create_index(
4387 Some("year"),
4388 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4389 )
4390 .expect("int inverted on key should succeed")
4391 .create_index(
4392 Some("rating"),
4393 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
4394 )
4395 .expect("float inverted on key should succeed")
4396 .create_index(
4397 Some("is_active"),
4398 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
4399 )
4400 .expect("bool inverted on key should succeed");
4401
4402 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4405 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4406 assert!(embedding.float_list.is_some());
4407 let vector_index = embedding
4408 .float_list
4409 .as_ref()
4410 .unwrap()
4411 .vector_index
4412 .as_ref()
4413 .unwrap();
4414 assert!(vector_index.enabled);
4415 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4416 assert_eq!(
4417 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
4418 Some(200)
4419 );
4420
4421 assert!(schema.keys.contains_key("category"));
4423 assert!(schema.keys.contains_key("year"));
4424 assert!(schema.keys.contains_key("rating"));
4425 assert!(schema.keys.contains_key("is_active"));
4426
4427 let category = schema.keys.get("category").unwrap();
4429 assert!(category.string.is_some());
4430 let string_idx = category
4431 .string
4432 .as_ref()
4433 .unwrap()
4434 .string_inverted_index
4435 .as_ref()
4436 .unwrap();
4437 assert!(string_idx.enabled);
4438
4439 let year = schema.keys.get("year").unwrap();
4441 assert!(year.int.is_some());
4442 let int_idx = year
4443 .int
4444 .as_ref()
4445 .unwrap()
4446 .int_inverted_index
4447 .as_ref()
4448 .unwrap();
4449 assert!(int_idx.enabled);
4450
4451 let schema = schema
4453 .delete_index(
4454 Some("category"),
4455 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4456 )
4457 .expect("delete string inverted should succeed")
4458 .delete_index(
4459 Some("year"),
4460 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4461 )
4462 .expect("delete int inverted should succeed");
4463
4464 let category = schema.keys.get("category").unwrap();
4466 let string_idx = category
4467 .string
4468 .as_ref()
4469 .unwrap()
4470 .string_inverted_index
4471 .as_ref()
4472 .unwrap();
4473 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
4476 let int_idx = year
4477 .int
4478 .as_ref()
4479 .unwrap()
4480 .int_inverted_index
4481 .as_ref()
4482 .unwrap();
4483 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
4487 let float_idx = rating
4488 .float
4489 .as_ref()
4490 .unwrap()
4491 .float_inverted_index
4492 .as_ref()
4493 .unwrap();
4494 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
4497 let bool_idx = is_active
4498 .boolean
4499 .as_ref()
4500 .unwrap()
4501 .bool_inverted_index
4502 .as_ref()
4503 .unwrap();
4504 assert!(bool_idx.enabled); }
4506
4507 #[test]
4508 fn test_builder_create_index_validation_errors() {
4509 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4516 Some("my_vectors"),
4517 IndexConfig::Vector(VectorIndexConfig {
4518 space: Some(Space::L2),
4519 embedding_function: None,
4520 source_key: None,
4521 hnsw: None,
4522 spann: None,
4523 }),
4524 );
4525 assert!(result.is_err());
4526 assert!(matches!(
4527 result.unwrap_err(),
4528 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
4529 ));
4530
4531 let result = Schema::new_default(KnnIndex::Hnsw)
4533 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
4534 assert!(result.is_err());
4535 assert!(matches!(
4536 result.unwrap_err(),
4537 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
4538 ));
4539
4540 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4542 Some(DOCUMENT_KEY),
4543 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4544 );
4545 assert!(result.is_err());
4546 assert!(matches!(
4547 result.unwrap_err(),
4548 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4549 ));
4550
4551 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4553 Some(EMBEDDING_KEY),
4554 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4555 );
4556 assert!(result.is_err());
4557 assert!(matches!(
4558 result.unwrap_err(),
4559 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4560 ));
4561
4562 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
4564 None,
4565 IndexConfig::SparseVector(SparseVectorIndexConfig {
4566 embedding_function: None,
4567 source_key: None,
4568 bm25: None,
4569 }),
4570 );
4571 assert!(result.is_err());
4572 assert!(matches!(
4573 result.unwrap_err(),
4574 SchemaBuilderError::SparseVectorRequiresKey
4575 ));
4576
4577 let result = Schema::new_default(KnnIndex::Hnsw)
4579 .create_index(
4580 Some("sparse1"),
4581 IndexConfig::SparseVector(SparseVectorIndexConfig {
4582 embedding_function: None,
4583 source_key: None,
4584 bm25: None,
4585 }),
4586 )
4587 .expect("first sparse should succeed")
4588 .create_index(
4589 Some("sparse2"),
4590 IndexConfig::SparseVector(SparseVectorIndexConfig {
4591 embedding_function: None,
4592 source_key: None,
4593 bm25: None,
4594 }),
4595 );
4596 assert!(result.is_err());
4597 assert!(matches!(
4598 result.unwrap_err(),
4599 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
4600 ));
4601 }
4602
4603 #[test]
4604 fn test_builder_delete_index_validation_errors() {
4605 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4611 Some(EMBEDDING_KEY),
4612 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
4613 );
4614 assert!(result.is_err());
4615 assert!(matches!(
4616 result.unwrap_err(),
4617 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4618 ));
4619
4620 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4622 Some(DOCUMENT_KEY),
4623 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
4624 );
4625 assert!(result.is_err());
4626 assert!(matches!(
4627 result.unwrap_err(),
4628 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
4629 ));
4630
4631 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
4633 None,
4634 IndexConfig::Vector(VectorIndexConfig {
4635 space: None,
4636 embedding_function: None,
4637 source_key: None,
4638 hnsw: None,
4639 spann: None,
4640 }),
4641 );
4642 assert!(result.is_err());
4643 assert!(matches!(
4644 result.unwrap_err(),
4645 SchemaBuilderError::VectorIndexDeletionNotSupported
4646 ));
4647
4648 let result = Schema::new_default(KnnIndex::Hnsw)
4650 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
4651 assert!(result.is_err());
4652 assert!(matches!(
4653 result.unwrap_err(),
4654 SchemaBuilderError::FtsIndexDeletionNotSupported
4655 ));
4656
4657 let result = Schema::new_default(KnnIndex::Hnsw)
4659 .create_index(
4660 Some("sparse"),
4661 IndexConfig::SparseVector(SparseVectorIndexConfig {
4662 embedding_function: None,
4663 source_key: None,
4664 bm25: None,
4665 }),
4666 )
4667 .expect("create should succeed")
4668 .delete_index(
4669 Some("sparse"),
4670 IndexConfig::SparseVector(SparseVectorIndexConfig {
4671 embedding_function: None,
4672 source_key: None,
4673 bm25: None,
4674 }),
4675 );
4676 assert!(result.is_err());
4677 assert!(matches!(
4678 result.unwrap_err(),
4679 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
4680 ));
4681 }
4682
4683 #[test]
4684 fn test_builder_pattern_chaining() {
4685 let schema = Schema::new_default(KnnIndex::Hnsw)
4687 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
4688 .unwrap()
4689 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4690 .unwrap()
4691 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
4692 .unwrap()
4693 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
4694 .unwrap()
4695 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
4696 .unwrap()
4697 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
4698 .unwrap();
4699
4700 assert!(
4702 schema
4703 .keys
4704 .get("tag1")
4705 .unwrap()
4706 .string
4707 .as_ref()
4708 .unwrap()
4709 .string_inverted_index
4710 .as_ref()
4711 .unwrap()
4712 .enabled
4713 );
4714
4715 assert!(
4717 !schema
4718 .keys
4719 .get("tag2")
4720 .unwrap()
4721 .string
4722 .as_ref()
4723 .unwrap()
4724 .string_inverted_index
4725 .as_ref()
4726 .unwrap()
4727 .enabled
4728 );
4729
4730 assert!(
4732 schema
4733 .keys
4734 .get("tag3")
4735 .unwrap()
4736 .string
4737 .as_ref()
4738 .unwrap()
4739 .string_inverted_index
4740 .as_ref()
4741 .unwrap()
4742 .enabled
4743 );
4744
4745 assert!(
4747 schema
4748 .keys
4749 .get("count")
4750 .unwrap()
4751 .int
4752 .as_ref()
4753 .unwrap()
4754 .int_inverted_index
4755 .as_ref()
4756 .unwrap()
4757 .enabled
4758 );
4759
4760 assert!(
4762 schema
4763 .keys
4764 .get("score")
4765 .unwrap()
4766 .float
4767 .as_ref()
4768 .unwrap()
4769 .float_inverted_index
4770 .as_ref()
4771 .unwrap()
4772 .enabled
4773 );
4774 }
4775
4776 #[test]
4777 fn test_schema_default_matches_python() {
4778 let schema = Schema::default();
4780
4781 assert!(schema.defaults.string.is_some());
4787 let string = schema.defaults.string.as_ref().unwrap();
4788 assert!(!string.fts_index.as_ref().unwrap().enabled);
4789 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
4790
4791 assert!(schema.defaults.float_list.is_some());
4793 let float_list = schema.defaults.float_list.as_ref().unwrap();
4794 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
4795 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
4796 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
4800
4801 assert!(schema.defaults.sparse_vector.is_some());
4803 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
4804 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
4805
4806 assert!(schema.defaults.int.is_some());
4808 assert!(
4809 schema
4810 .defaults
4811 .int
4812 .as_ref()
4813 .unwrap()
4814 .int_inverted_index
4815 .as_ref()
4816 .unwrap()
4817 .enabled
4818 );
4819
4820 assert!(schema.defaults.float.is_some());
4822 assert!(
4823 schema
4824 .defaults
4825 .float
4826 .as_ref()
4827 .unwrap()
4828 .float_inverted_index
4829 .as_ref()
4830 .unwrap()
4831 .enabled
4832 );
4833
4834 assert!(schema.defaults.boolean.is_some());
4836 assert!(
4837 schema
4838 .defaults
4839 .boolean
4840 .as_ref()
4841 .unwrap()
4842 .bool_inverted_index
4843 .as_ref()
4844 .unwrap()
4845 .enabled
4846 );
4847
4848 assert!(schema.keys.contains_key(DOCUMENT_KEY));
4854 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
4855 assert!(doc.string.is_some());
4856 assert!(
4857 doc.string
4858 .as_ref()
4859 .unwrap()
4860 .fts_index
4861 .as_ref()
4862 .unwrap()
4863 .enabled
4864 );
4865 assert!(
4866 !doc.string
4867 .as_ref()
4868 .unwrap()
4869 .string_inverted_index
4870 .as_ref()
4871 .unwrap()
4872 .enabled
4873 );
4874
4875 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4877 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
4878 assert!(embedding.float_list.is_some());
4879 let vec_idx = embedding
4880 .float_list
4881 .as_ref()
4882 .unwrap()
4883 .vector_index
4884 .as_ref()
4885 .unwrap();
4886 assert!(vec_idx.enabled);
4887 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
4888 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
4894 }
4895
4896 #[test]
4897 fn test_schema_default_works_with_builder() {
4898 let schema = Schema::default()
4900 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
4901 .expect("should succeed");
4902
4903 assert!(schema.keys.contains_key("category"));
4905 assert!(schema.keys.contains_key(DOCUMENT_KEY));
4906 assert!(schema.keys.contains_key(EMBEDDING_KEY));
4907 assert_eq!(schema.keys.len(), 3);
4908 }
4909}