1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11 EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12 UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18 default_batch_size, default_construction_ef, default_construction_ef_spann,
19 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
20 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
21 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
22 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
23 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
24 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
25 HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
26 InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
27};
28
29impl ChromaError for SchemaError {
30 fn code(&self) -> ErrorCodes {
31 match self {
32 SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
35 SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
36 SchemaError::DefaultsMismatch => ErrorCodes::Internal,
39 SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
40
41 SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
44 SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
45 SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
46 SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
47 SchemaError::Builder(e) => e.code(),
48 }
49 }
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaError {
54 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
55 MissingIndexConfiguration { key: String, value_type: String },
56 #[error("Schema reconciliation failed: {reason}")]
57 InvalidSchema { reason: String },
58 #[error("Cannot set both collection config and schema simultaneously")]
59 ConfigAndSchemaConflict,
60 #[error("Cannot merge schemas with differing defaults")]
61 DefaultsMismatch,
62 #[error("Conflicting configuration for {context}")]
63 ConfigurationConflict { context: String },
64 #[error("Invalid HNSW configuration: {0}")]
65 InvalidHnswConfig(validator::ValidationErrors),
66 #[error("Invalid SPANN configuration: {0}")]
67 InvalidSpannConfig(validator::ValidationErrors),
68 #[error("Invalid schema input: {reason}")]
69 InvalidUserInput { reason: String },
70 #[error(transparent)]
71 Builder(#[from] SchemaBuilderError),
72}
73
74#[derive(Debug, Error)]
75pub enum SchemaBuilderError {
76 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
77 VectorIndexMustBeGlobal { key: String },
78 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
79 FtsIndexMustBeGlobal { key: String },
80 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
81 SpecialKeyModificationNotAllowed { key: String },
82 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
83 SparseVectorRequiresKey,
84 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
85 MultipleSparseVectorIndexes { existing_key: String },
86 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
87 VectorIndexDeletionNotSupported,
88 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
89 FtsIndexDeletionNotSupported,
90 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91 SparseVectorIndexDeletionNotSupported,
92}
93
94#[derive(Debug, Error)]
95pub enum FilterValidationError {
96 #[error(
97 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
98 )]
99 IndexingDisabled {
100 key: String,
101 value_type: MetadataValueType,
102 },
103 #[error(transparent)]
104 Schema(#[from] SchemaError),
105}
106
107impl ChromaError for SchemaBuilderError {
108 fn code(&self) -> ErrorCodes {
109 ErrorCodes::InvalidArgument
110 }
111}
112
113impl ChromaError for FilterValidationError {
114 fn code(&self) -> ErrorCodes {
115 match self {
116 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
117 FilterValidationError::Schema(_) => ErrorCodes::Internal,
118 }
119 }
120}
121
122pub const STRING_VALUE_NAME: &str = "string";
129pub const INT_VALUE_NAME: &str = "int";
130pub const BOOL_VALUE_NAME: &str = "bool";
131pub const FLOAT_VALUE_NAME: &str = "float";
132pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
133pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
134
135pub const FTS_INDEX_NAME: &str = "fts_index";
137pub const VECTOR_INDEX_NAME: &str = "vector_index";
138pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
139pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
140pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
141pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
142pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
143
144pub const DOCUMENT_KEY: &str = "#document";
146pub const EMBEDDING_KEY: &str = "#embedding";
147
148static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
150 Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
151 .expect("The CMEK pattern for GCP should be valid")
152});
153
154#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Cmek {
161 Gcp(Arc<String>),
165}
166
167impl Cmek {
168 pub fn gcp(resource: String) -> Self {
178 Cmek::Gcp(Arc::new(resource))
179 }
180
181 pub fn validate_pattern(&self) -> bool {
187 match self {
188 Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
189 }
190 }
191}
192
193impl TryFrom<chroma_proto::Cmek> for Cmek {
194 type Error = ConversionError;
195
196 fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
197 match proto.provider {
198 Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
199 None => Err(ConversionError::DecodeError),
200 }
201 }
202}
203
204impl From<Cmek> for chroma_proto::Cmek {
205 fn from(cmek: Cmek) -> Self {
206 match cmek {
207 Cmek::Gcp(resource) => chroma_proto::Cmek {
208 provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
209 },
210 }
211 }
212}
213
214#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
223#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
224pub struct Schema {
225 pub defaults: ValueTypes,
227 #[serde(rename = "keys", alias = "key_overrides")]
230 pub keys: HashMap<String, ValueTypes>,
231 #[serde(skip_serializing_if = "Option::is_none")]
233 #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
234 pub cmek: Option<Cmek>,
235}
236
237impl Schema {
238 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
239 if let Some(vector_update) = &configuration.vector_index {
240 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
241 Self::apply_vector_index_update(default_vector_index, vector_update);
242 }
243 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
244 Self::apply_vector_index_update(embedding_vector_index, vector_update);
245 }
246 }
247
248 if let Some(embedding_function) = configuration.embedding_function.as_ref() {
249 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
250 default_vector_index.config.embedding_function = Some(embedding_function.clone());
251 }
252 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
253 embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
254 }
255 }
256 }
257
258 fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
259 self.defaults
260 .float_list
261 .as_mut()
262 .and_then(|float_list| float_list.vector_index.as_mut())
263 }
264
265 fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
266 self.keys
267 .get_mut(EMBEDDING_KEY)
268 .and_then(|value_types| value_types.float_list.as_mut())
269 .and_then(|float_list| float_list.vector_index.as_mut())
270 }
271
272 fn apply_vector_index_update(
273 vector_index: &mut VectorIndexType,
274 update: &UpdateVectorIndexConfiguration,
275 ) {
276 match update {
277 UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
278 if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
279 if let Some(ef_search) = hnsw_update.ef_search {
280 hnsw_config.ef_search = Some(ef_search);
281 }
282 if let Some(max_neighbors) = hnsw_update.max_neighbors {
283 hnsw_config.max_neighbors = Some(max_neighbors);
284 }
285 if let Some(num_threads) = hnsw_update.num_threads {
286 hnsw_config.num_threads = Some(num_threads);
287 }
288 if let Some(resize_factor) = hnsw_update.resize_factor {
289 hnsw_config.resize_factor = Some(resize_factor);
290 }
291 if let Some(sync_threshold) = hnsw_update.sync_threshold {
292 hnsw_config.sync_threshold = Some(sync_threshold);
293 }
294 if let Some(batch_size) = hnsw_update.batch_size {
295 hnsw_config.batch_size = Some(batch_size);
296 }
297 }
298 }
299 UpdateVectorIndexConfiguration::Hnsw(None) => {}
300 UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
301 if let Some(spann_config) = vector_index.config.spann.as_mut() {
302 if let Some(search_nprobe) = spann_update.search_nprobe {
303 spann_config.search_nprobe = Some(search_nprobe);
304 }
305 if let Some(ef_search) = spann_update.ef_search {
306 spann_config.ef_search = Some(ef_search);
307 }
308 }
309 }
310 UpdateVectorIndexConfiguration::Spann(None) => {}
311 }
312 }
313
314 pub fn is_sparse_index_enabled(&self) -> bool {
315 let defaults_enabled = self
316 .defaults
317 .sparse_vector
318 .as_ref()
319 .and_then(|sv| sv.sparse_vector_index.as_ref())
320 .is_some_and(|idx| idx.enabled);
321 let key_enabled = self.keys.values().any(|value_types| {
322 value_types
323 .sparse_vector
324 .as_ref()
325 .and_then(|sv| sv.sparse_vector_index.as_ref())
326 .is_some_and(|idx| idx.enabled)
327 });
328 defaults_enabled || key_enabled
329 }
330}
331
332impl Default for Schema {
333 fn default() -> Self {
350 let defaults = ValueTypes {
352 string: Some(StringValueType {
353 fts_index: Some(FtsIndexType {
354 enabled: false,
355 config: FtsIndexConfig {},
356 }),
357 string_inverted_index: Some(StringInvertedIndexType {
358 enabled: true,
359 config: StringInvertedIndexConfig {},
360 }),
361 }),
362 float_list: Some(FloatListValueType {
363 vector_index: Some(VectorIndexType {
364 enabled: false,
365 config: VectorIndexConfig {
366 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
368 source_key: None,
369 hnsw: None, spann: None, },
372 }),
373 }),
374 sparse_vector: Some(SparseVectorValueType {
375 sparse_vector_index: Some(SparseVectorIndexType {
376 enabled: false,
377 config: SparseVectorIndexConfig {
378 embedding_function: None,
379 source_key: None,
380 bm25: None,
381 },
382 }),
383 }),
384 int: Some(IntValueType {
385 int_inverted_index: Some(IntInvertedIndexType {
386 enabled: true,
387 config: IntInvertedIndexConfig {},
388 }),
389 }),
390 float: Some(FloatValueType {
391 float_inverted_index: Some(FloatInvertedIndexType {
392 enabled: true,
393 config: FloatInvertedIndexConfig {},
394 }),
395 }),
396 boolean: Some(BoolValueType {
397 bool_inverted_index: Some(BoolInvertedIndexType {
398 enabled: true,
399 config: BoolInvertedIndexConfig {},
400 }),
401 }),
402 };
403
404 let mut keys = HashMap::new();
406
407 keys.insert(
409 DOCUMENT_KEY.to_string(),
410 ValueTypes {
411 string: Some(StringValueType {
412 fts_index: Some(FtsIndexType {
413 enabled: true,
414 config: FtsIndexConfig {},
415 }),
416 string_inverted_index: Some(StringInvertedIndexType {
417 enabled: false,
418 config: StringInvertedIndexConfig {},
419 }),
420 }),
421 ..Default::default()
422 },
423 );
424
425 keys.insert(
427 EMBEDDING_KEY.to_string(),
428 ValueTypes {
429 float_list: Some(FloatListValueType {
430 vector_index: Some(VectorIndexType {
431 enabled: true,
432 config: VectorIndexConfig {
433 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
435 source_key: Some(DOCUMENT_KEY.to_string()),
436 hnsw: None, spann: None, },
439 }),
440 }),
441 ..Default::default()
442 },
443 );
444
445 Schema {
446 defaults,
447 keys,
448 cmek: None,
449 }
450 }
451}
452
453pub fn is_embedding_function_default(
454 embedding_function: &Option<EmbeddingFunctionConfiguration>,
455) -> bool {
456 match embedding_function {
457 None => true,
458 Some(embedding_function) => embedding_function.is_default(),
459 }
460}
461
462pub fn is_space_default(space: &Option<Space>) -> bool {
464 match space {
465 None => true, Some(s) => *s == default_space(), }
468}
469
470pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
472 hnsw_config.ef_construction == Some(default_construction_ef())
473 && hnsw_config.ef_search == Some(default_search_ef())
474 && hnsw_config.max_neighbors == Some(default_m())
475 && hnsw_config.num_threads == Some(default_num_threads())
476 && hnsw_config.batch_size == Some(default_batch_size())
477 && hnsw_config.sync_threshold == Some(default_sync_threshold())
478 && hnsw_config.resize_factor == Some(default_resize_factor())
479}
480
481#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
488#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
489pub struct ValueTypes {
490 #[serde(
491 rename = "string",
492 alias = "#string",
493 skip_serializing_if = "Option::is_none"
494 )] pub string: Option<StringValueType>,
496
497 #[serde(
498 rename = "float_list",
499 alias = "#float_list",
500 skip_serializing_if = "Option::is_none"
501 )]
502 pub float_list: Option<FloatListValueType>,
504
505 #[serde(
506 rename = "sparse_vector",
507 alias = "#sparse_vector",
508 skip_serializing_if = "Option::is_none"
509 )]
510 pub sparse_vector: Option<SparseVectorValueType>,
512
513 #[serde(
514 rename = "int",
515 alias = "#int",
516 skip_serializing_if = "Option::is_none"
517 )] pub int: Option<IntValueType>,
519
520 #[serde(
521 rename = "float",
522 alias = "#float",
523 skip_serializing_if = "Option::is_none"
524 )] pub float: Option<FloatValueType>,
526
527 #[serde(
528 rename = "bool",
529 alias = "#bool",
530 skip_serializing_if = "Option::is_none"
531 )] pub boolean: Option<BoolValueType>,
533}
534
535#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
537#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
538pub struct StringValueType {
539 #[serde(
540 rename = "fts_index",
541 alias = "$fts_index",
542 skip_serializing_if = "Option::is_none"
543 )] pub fts_index: Option<FtsIndexType>,
545
546 #[serde(
547 rename = "string_inverted_index", alias = "$string_inverted_index",
549 skip_serializing_if = "Option::is_none"
550 )]
551 pub string_inverted_index: Option<StringInvertedIndexType>,
552}
553
554#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
556#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
557pub struct FloatListValueType {
558 #[serde(
559 rename = "vector_index",
560 alias = "$vector_index",
561 skip_serializing_if = "Option::is_none"
562 )] pub vector_index: Option<VectorIndexType>,
564}
565
566#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
568#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
569pub struct SparseVectorValueType {
570 #[serde(
571 rename = "sparse_vector_index", alias = "$sparse_vector_index",
573 skip_serializing_if = "Option::is_none"
574 )]
575 pub sparse_vector_index: Option<SparseVectorIndexType>,
576}
577
578#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
580#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
581pub struct IntValueType {
582 #[serde(
583 rename = "int_inverted_index",
584 alias = "$int_inverted_index",
585 skip_serializing_if = "Option::is_none"
586 )]
587 pub int_inverted_index: Option<IntInvertedIndexType>,
589}
590
591#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
593#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
594pub struct FloatValueType {
595 #[serde(
596 rename = "float_inverted_index", alias = "$float_inverted_index",
598 skip_serializing_if = "Option::is_none"
599 )]
600 pub float_inverted_index: Option<FloatInvertedIndexType>,
601}
602
603#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
605#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
606pub struct BoolValueType {
607 #[serde(
608 rename = "bool_inverted_index", alias = "$bool_inverted_index",
610 skip_serializing_if = "Option::is_none"
611 )]
612 pub bool_inverted_index: Option<BoolInvertedIndexType>,
613}
614
615#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
617#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
618pub struct FtsIndexType {
619 pub enabled: bool,
620 pub config: FtsIndexConfig,
621}
622
623#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
624#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
625pub struct VectorIndexType {
626 pub enabled: bool,
627 pub config: VectorIndexConfig,
628}
629
630#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
631#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
632pub struct SparseVectorIndexType {
633 pub enabled: bool,
634 pub config: SparseVectorIndexConfig,
635}
636
637#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
638#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
639pub struct StringInvertedIndexType {
640 pub enabled: bool,
641 pub config: StringInvertedIndexConfig,
642}
643
644#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
645#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
646pub struct IntInvertedIndexType {
647 pub enabled: bool,
648 pub config: IntInvertedIndexConfig,
649}
650
651#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
652#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
653pub struct FloatInvertedIndexType {
654 pub enabled: bool,
655 pub config: FloatInvertedIndexConfig,
656}
657
658#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
659#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
660pub struct BoolInvertedIndexType {
661 pub enabled: bool,
662 pub config: BoolInvertedIndexConfig,
663}
664
665impl Schema {
666 pub fn new_default(default_knn_index: KnnIndex) -> Self {
668 let vector_config = VectorIndexType {
670 enabled: false,
671 config: VectorIndexConfig {
672 space: Some(default_space()),
673 embedding_function: None,
674 source_key: None,
675 hnsw: match default_knn_index {
676 KnnIndex::Hnsw => Some(HnswIndexConfig {
677 ef_construction: Some(default_construction_ef()),
678 max_neighbors: Some(default_m()),
679 ef_search: Some(default_search_ef()),
680 num_threads: Some(default_num_threads()),
681 batch_size: Some(default_batch_size()),
682 sync_threshold: Some(default_sync_threshold()),
683 resize_factor: Some(default_resize_factor()),
684 }),
685 KnnIndex::Spann => None,
686 },
687 spann: match default_knn_index {
688 KnnIndex::Hnsw => None,
689 KnnIndex::Spann => Some(SpannIndexConfig {
690 search_nprobe: Some(default_search_nprobe()),
691 search_rng_factor: Some(default_search_rng_factor()),
692 search_rng_epsilon: Some(default_search_rng_epsilon()),
693 nreplica_count: Some(default_nreplica_count()),
694 write_rng_factor: Some(default_write_rng_factor()),
695 write_rng_epsilon: Some(default_write_rng_epsilon()),
696 split_threshold: Some(default_split_threshold()),
697 num_samples_kmeans: Some(default_num_samples_kmeans()),
698 initial_lambda: Some(default_initial_lambda()),
699 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
700 merge_threshold: Some(default_merge_threshold()),
701 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
702 write_nprobe: Some(default_write_nprobe()),
703 ef_construction: Some(default_construction_ef_spann()),
704 ef_search: Some(default_search_ef_spann()),
705 max_neighbors: Some(default_m_spann()),
706 }),
707 },
708 },
709 };
710
711 let defaults = ValueTypes {
713 string: Some(StringValueType {
714 string_inverted_index: Some(StringInvertedIndexType {
715 enabled: true,
716 config: StringInvertedIndexConfig {},
717 }),
718 fts_index: Some(FtsIndexType {
719 enabled: false,
720 config: FtsIndexConfig {},
721 }),
722 }),
723 float: Some(FloatValueType {
724 float_inverted_index: Some(FloatInvertedIndexType {
725 enabled: true,
726 config: FloatInvertedIndexConfig {},
727 }),
728 }),
729 int: Some(IntValueType {
730 int_inverted_index: Some(IntInvertedIndexType {
731 enabled: true,
732 config: IntInvertedIndexConfig {},
733 }),
734 }),
735 boolean: Some(BoolValueType {
736 bool_inverted_index: Some(BoolInvertedIndexType {
737 enabled: true,
738 config: BoolInvertedIndexConfig {},
739 }),
740 }),
741 float_list: Some(FloatListValueType {
742 vector_index: Some(vector_config),
743 }),
744 sparse_vector: Some(SparseVectorValueType {
745 sparse_vector_index: Some(SparseVectorIndexType {
746 enabled: false,
747 config: SparseVectorIndexConfig {
748 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
749 source_key: None,
750 bm25: Some(false),
751 },
752 }),
753 }),
754 };
755
756 let mut keys = HashMap::new();
758
759 let embedding_defaults = ValueTypes {
761 float_list: Some(FloatListValueType {
762 vector_index: Some(VectorIndexType {
763 enabled: true,
764 config: VectorIndexConfig {
765 space: Some(default_space()),
766 embedding_function: None,
767 source_key: Some(DOCUMENT_KEY.to_string()),
768 hnsw: match default_knn_index {
769 KnnIndex::Hnsw => Some(HnswIndexConfig {
770 ef_construction: Some(default_construction_ef()),
771 max_neighbors: Some(default_m()),
772 ef_search: Some(default_search_ef()),
773 num_threads: Some(default_num_threads()),
774 batch_size: Some(default_batch_size()),
775 sync_threshold: Some(default_sync_threshold()),
776 resize_factor: Some(default_resize_factor()),
777 }),
778 KnnIndex::Spann => None,
779 },
780 spann: match default_knn_index {
781 KnnIndex::Hnsw => None,
782 KnnIndex::Spann => Some(SpannIndexConfig {
783 search_nprobe: Some(default_search_nprobe()),
784 search_rng_factor: Some(default_search_rng_factor()),
785 search_rng_epsilon: Some(default_search_rng_epsilon()),
786 nreplica_count: Some(default_nreplica_count()),
787 write_rng_factor: Some(default_write_rng_factor()),
788 write_rng_epsilon: Some(default_write_rng_epsilon()),
789 split_threshold: Some(default_split_threshold()),
790 num_samples_kmeans: Some(default_num_samples_kmeans()),
791 initial_lambda: Some(default_initial_lambda()),
792 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
793 merge_threshold: Some(default_merge_threshold()),
794 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
795 write_nprobe: Some(default_write_nprobe()),
796 ef_construction: Some(default_construction_ef_spann()),
797 ef_search: Some(default_search_ef_spann()),
798 max_neighbors: Some(default_m_spann()),
799 }),
800 },
801 },
802 }),
803 }),
804 ..Default::default()
805 };
806 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
807
808 let document_defaults = ValueTypes {
810 string: Some(StringValueType {
811 fts_index: Some(FtsIndexType {
812 enabled: true,
813 config: FtsIndexConfig {},
814 }),
815 string_inverted_index: Some(StringInvertedIndexType {
816 enabled: false,
817 config: StringInvertedIndexConfig {},
818 }),
819 }),
820 ..Default::default()
821 };
822 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
823
824 Schema {
825 defaults,
826 keys,
827 cmek: None,
828 }
829 }
830
831 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
832 let to_internal = |vector_index: &VectorIndexType| {
833 let space = vector_index.config.space.clone();
834 vector_index
835 .config
836 .spann
837 .clone()
838 .map(|config| (space.as_ref(), &config).into())
839 };
840
841 self.keys
842 .get(EMBEDDING_KEY)
843 .and_then(|value_types| value_types.float_list.as_ref())
844 .and_then(|float_list| float_list.vector_index.as_ref())
845 .and_then(to_internal)
846 .or_else(|| {
847 self.defaults
848 .float_list
849 .as_ref()
850 .and_then(|float_list| float_list.vector_index.as_ref())
851 .and_then(to_internal)
852 })
853 }
854
855 pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
856 let to_internal = |vector_index: &VectorIndexType| {
857 if vector_index.config.spann.is_some() {
858 return None;
859 }
860 let space = vector_index.config.space.as_ref();
861 let hnsw_config = vector_index.config.hnsw.as_ref();
862 Some((space, hnsw_config).into())
863 };
864
865 self.keys
866 .get(EMBEDDING_KEY)
867 .and_then(|value_types| value_types.float_list.as_ref())
868 .and_then(|float_list| float_list.vector_index.as_ref())
869 .and_then(to_internal)
870 .or_else(|| {
871 self.defaults
872 .float_list
873 .as_ref()
874 .and_then(|float_list| float_list.vector_index.as_ref())
875 .and_then(to_internal)
876 })
877 }
878
879 pub fn get_internal_hnsw_config_with_legacy_fallback(
880 &self,
881 segment: &Segment,
882 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
883 if let Some(config) = self.get_internal_hnsw_config() {
884 let config_from_metadata =
885 InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
886
887 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
888 return Ok(Some(config_from_metadata));
889 }
890
891 return Ok(Some(config));
892 }
893
894 Ok(None)
895 }
896
897 pub fn reconcile_with_defaults(
904 user_schema: Option<&Schema>,
905 knn_index: KnnIndex,
906 ) -> Result<Self, SchemaError> {
907 let default_schema = Schema::new_default(knn_index);
908
909 match user_schema {
910 Some(user) => {
911 let merged_defaults =
913 Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
914
915 let mut merged_keys = default_schema.keys.clone();
917 for (key, user_value_types) in &user.keys {
918 if let Some(default_value_types) = merged_keys.get(key) {
919 let merged_value_types = Self::merge_value_types(
921 default_value_types,
922 user_value_types,
923 knn_index,
924 )?;
925 merged_keys.insert(key.clone(), merged_value_types);
926 } else {
927 merged_keys.insert(key.clone(), user_value_types.clone());
929 }
930 }
931
932 Ok(Schema {
933 defaults: merged_defaults,
934 keys: merged_keys,
935 cmek: user.cmek.clone().or(default_schema.cmek.clone()),
936 })
937 }
938 None => Ok(default_schema),
939 }
940 }
941
942 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
944 if self.defaults != other.defaults {
945 return Err(SchemaError::DefaultsMismatch);
946 }
947
948 let mut keys = self.keys.clone();
949
950 for (key, other_value_types) in &other.keys {
951 if let Some(existing) = keys.get(key).cloned() {
952 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
953 keys.insert(key.clone(), merged);
954 } else {
955 keys.insert(key.clone(), other_value_types.clone());
956 }
957 }
958
959 Ok(Schema {
960 defaults: self.defaults.clone(),
961 keys,
962 cmek: other.cmek.clone().or(self.cmek.clone()),
963 })
964 }
965
966 fn merge_override_value_types(
967 key: &str,
968 left: &ValueTypes,
969 right: &ValueTypes,
970 ) -> Result<ValueTypes, SchemaError> {
971 Ok(ValueTypes {
972 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
973 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
974 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
975 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
976 float_list: Self::merge_float_list_override(
977 key,
978 left.float_list.as_ref(),
979 right.float_list.as_ref(),
980 )?,
981 sparse_vector: Self::merge_sparse_vector_override(
982 key,
983 left.sparse_vector.as_ref(),
984 right.sparse_vector.as_ref(),
985 )?,
986 })
987 }
988
989 fn merge_string_override(
990 key: &str,
991 left: Option<&StringValueType>,
992 right: Option<&StringValueType>,
993 ) -> Result<Option<StringValueType>, SchemaError> {
994 match (left, right) {
995 (Some(l), Some(r)) => Ok(Some(StringValueType {
996 string_inverted_index: Self::merge_index_or_error(
997 l.string_inverted_index.as_ref(),
998 r.string_inverted_index.as_ref(),
999 &format!("key '{key}' string.string_inverted_index"),
1000 )?,
1001 fts_index: Self::merge_index_or_error(
1002 l.fts_index.as_ref(),
1003 r.fts_index.as_ref(),
1004 &format!("key '{key}' string.fts_index"),
1005 )?,
1006 })),
1007 (Some(l), None) => Ok(Some(l.clone())),
1008 (None, Some(r)) => Ok(Some(r.clone())),
1009 (None, None) => Ok(None),
1010 }
1011 }
1012
1013 fn merge_float_override(
1014 key: &str,
1015 left: Option<&FloatValueType>,
1016 right: Option<&FloatValueType>,
1017 ) -> Result<Option<FloatValueType>, SchemaError> {
1018 match (left, right) {
1019 (Some(l), Some(r)) => Ok(Some(FloatValueType {
1020 float_inverted_index: Self::merge_index_or_error(
1021 l.float_inverted_index.as_ref(),
1022 r.float_inverted_index.as_ref(),
1023 &format!("key '{key}' float.float_inverted_index"),
1024 )?,
1025 })),
1026 (Some(l), None) => Ok(Some(l.clone())),
1027 (None, Some(r)) => Ok(Some(r.clone())),
1028 (None, None) => Ok(None),
1029 }
1030 }
1031
1032 fn merge_int_override(
1033 key: &str,
1034 left: Option<&IntValueType>,
1035 right: Option<&IntValueType>,
1036 ) -> Result<Option<IntValueType>, SchemaError> {
1037 match (left, right) {
1038 (Some(l), Some(r)) => Ok(Some(IntValueType {
1039 int_inverted_index: Self::merge_index_or_error(
1040 l.int_inverted_index.as_ref(),
1041 r.int_inverted_index.as_ref(),
1042 &format!("key '{key}' int.int_inverted_index"),
1043 )?,
1044 })),
1045 (Some(l), None) => Ok(Some(l.clone())),
1046 (None, Some(r)) => Ok(Some(r.clone())),
1047 (None, None) => Ok(None),
1048 }
1049 }
1050
1051 fn merge_bool_override(
1052 key: &str,
1053 left: Option<&BoolValueType>,
1054 right: Option<&BoolValueType>,
1055 ) -> Result<Option<BoolValueType>, SchemaError> {
1056 match (left, right) {
1057 (Some(l), Some(r)) => Ok(Some(BoolValueType {
1058 bool_inverted_index: Self::merge_index_or_error(
1059 l.bool_inverted_index.as_ref(),
1060 r.bool_inverted_index.as_ref(),
1061 &format!("key '{key}' bool.bool_inverted_index"),
1062 )?,
1063 })),
1064 (Some(l), None) => Ok(Some(l.clone())),
1065 (None, Some(r)) => Ok(Some(r.clone())),
1066 (None, None) => Ok(None),
1067 }
1068 }
1069
1070 fn merge_float_list_override(
1071 key: &str,
1072 left: Option<&FloatListValueType>,
1073 right: Option<&FloatListValueType>,
1074 ) -> Result<Option<FloatListValueType>, SchemaError> {
1075 match (left, right) {
1076 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1077 vector_index: Self::merge_index_or_error(
1078 l.vector_index.as_ref(),
1079 r.vector_index.as_ref(),
1080 &format!("key '{key}' float_list.vector_index"),
1081 )?,
1082 })),
1083 (Some(l), None) => Ok(Some(l.clone())),
1084 (None, Some(r)) => Ok(Some(r.clone())),
1085 (None, None) => Ok(None),
1086 }
1087 }
1088
1089 fn merge_sparse_vector_override(
1090 key: &str,
1091 left: Option<&SparseVectorValueType>,
1092 right: Option<&SparseVectorValueType>,
1093 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1094 match (left, right) {
1095 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1096 sparse_vector_index: Self::merge_index_or_error(
1097 l.sparse_vector_index.as_ref(),
1098 r.sparse_vector_index.as_ref(),
1099 &format!("key '{key}' sparse_vector.sparse_vector_index"),
1100 )?,
1101 })),
1102 (Some(l), None) => Ok(Some(l.clone())),
1103 (None, Some(r)) => Ok(Some(r.clone())),
1104 (None, None) => Ok(None),
1105 }
1106 }
1107
1108 fn merge_index_or_error<T: Clone + PartialEq>(
1109 left: Option<&T>,
1110 right: Option<&T>,
1111 context: &str,
1112 ) -> Result<Option<T>, SchemaError> {
1113 match (left, right) {
1114 (Some(l), Some(r)) => {
1115 if l == r {
1116 Ok(Some(l.clone()))
1117 } else {
1118 Err(SchemaError::ConfigurationConflict {
1119 context: context.to_string(),
1120 })
1121 }
1122 }
1123 (Some(l), None) => Ok(Some(l.clone())),
1124 (None, Some(r)) => Ok(Some(r.clone())),
1125 (None, None) => Ok(None),
1126 }
1127 }
1128
1129 fn merge_value_types(
1132 default: &ValueTypes,
1133 user: &ValueTypes,
1134 knn_index: KnnIndex,
1135 ) -> Result<ValueTypes, SchemaError> {
1136 let float_list = Self::merge_float_list_type(
1138 default.float_list.as_ref(),
1139 user.float_list.as_ref(),
1140 knn_index,
1141 );
1142
1143 if let Some(ref fl) = float_list {
1145 Self::validate_float_list_value_type(fl)?;
1146 }
1147
1148 Ok(ValueTypes {
1149 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1150 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1151 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1152 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1153 float_list,
1154 sparse_vector: Self::merge_sparse_vector_type(
1155 default.sparse_vector.as_ref(),
1156 user.sparse_vector.as_ref(),
1157 )?,
1158 })
1159 }
1160
1161 fn merge_string_type(
1163 default: Option<&StringValueType>,
1164 user: Option<&StringValueType>,
1165 ) -> Result<Option<StringValueType>, SchemaError> {
1166 match (default, user) {
1167 (Some(default), Some(user)) => Ok(Some(StringValueType {
1168 string_inverted_index: Self::merge_string_inverted_index_type(
1169 default.string_inverted_index.as_ref(),
1170 user.string_inverted_index.as_ref(),
1171 )?,
1172 fts_index: Self::merge_fts_index_type(
1173 default.fts_index.as_ref(),
1174 user.fts_index.as_ref(),
1175 )?,
1176 })),
1177 (Some(default), None) => Ok(Some(default.clone())),
1178 (None, Some(user)) => Ok(Some(user.clone())),
1179 (None, None) => Ok(None),
1180 }
1181 }
1182
1183 fn merge_float_type(
1185 default: Option<&FloatValueType>,
1186 user: Option<&FloatValueType>,
1187 ) -> Result<Option<FloatValueType>, SchemaError> {
1188 match (default, user) {
1189 (Some(default), Some(user)) => Ok(Some(FloatValueType {
1190 float_inverted_index: Self::merge_float_inverted_index_type(
1191 default.float_inverted_index.as_ref(),
1192 user.float_inverted_index.as_ref(),
1193 )?,
1194 })),
1195 (Some(default), None) => Ok(Some(default.clone())),
1196 (None, Some(user)) => Ok(Some(user.clone())),
1197 (None, None) => Ok(None),
1198 }
1199 }
1200
1201 fn merge_int_type(
1203 default: Option<&IntValueType>,
1204 user: Option<&IntValueType>,
1205 ) -> Result<Option<IntValueType>, SchemaError> {
1206 match (default, user) {
1207 (Some(default), Some(user)) => Ok(Some(IntValueType {
1208 int_inverted_index: Self::merge_int_inverted_index_type(
1209 default.int_inverted_index.as_ref(),
1210 user.int_inverted_index.as_ref(),
1211 )?,
1212 })),
1213 (Some(default), None) => Ok(Some(default.clone())),
1214 (None, Some(user)) => Ok(Some(user.clone())),
1215 (None, None) => Ok(None),
1216 }
1217 }
1218
1219 fn merge_bool_type(
1221 default: Option<&BoolValueType>,
1222 user: Option<&BoolValueType>,
1223 ) -> Result<Option<BoolValueType>, SchemaError> {
1224 match (default, user) {
1225 (Some(default), Some(user)) => Ok(Some(BoolValueType {
1226 bool_inverted_index: Self::merge_bool_inverted_index_type(
1227 default.bool_inverted_index.as_ref(),
1228 user.bool_inverted_index.as_ref(),
1229 )?,
1230 })),
1231 (Some(default), None) => Ok(Some(default.clone())),
1232 (None, Some(user)) => Ok(Some(user.clone())),
1233 (None, None) => Ok(None),
1234 }
1235 }
1236
1237 fn merge_float_list_type(
1239 default: Option<&FloatListValueType>,
1240 user: Option<&FloatListValueType>,
1241 knn_index: KnnIndex,
1242 ) -> Option<FloatListValueType> {
1243 match (default, user) {
1244 (Some(default), Some(user)) => Some(FloatListValueType {
1245 vector_index: Self::merge_vector_index_type(
1246 default.vector_index.as_ref(),
1247 user.vector_index.as_ref(),
1248 knn_index,
1249 ),
1250 }),
1251 (Some(default), None) => Some(default.clone()),
1252 (None, Some(user)) => Some(user.clone()),
1253 (None, None) => None,
1254 }
1255 }
1256
1257 fn merge_sparse_vector_type(
1259 default: Option<&SparseVectorValueType>,
1260 user: Option<&SparseVectorValueType>,
1261 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1262 match (default, user) {
1263 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1264 sparse_vector_index: Self::merge_sparse_vector_index_type(
1265 default.sparse_vector_index.as_ref(),
1266 user.sparse_vector_index.as_ref(),
1267 )?,
1268 })),
1269 (Some(default), None) => Ok(Some(default.clone())),
1270 (None, Some(user)) => Ok(Some(user.clone())),
1271 (None, None) => Ok(None),
1272 }
1273 }
1274
1275 fn merge_string_inverted_index_type(
1277 default: Option<&StringInvertedIndexType>,
1278 user: Option<&StringInvertedIndexType>,
1279 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1280 match (default, user) {
1281 (Some(_default), Some(user)) => {
1282 Ok(Some(StringInvertedIndexType {
1283 enabled: user.enabled, config: user.config.clone(), }))
1286 }
1287 (Some(default), None) => Ok(Some(default.clone())),
1288 (None, Some(user)) => Ok(Some(user.clone())),
1289 (None, None) => Ok(None),
1290 }
1291 }
1292
1293 fn merge_fts_index_type(
1294 default: Option<&FtsIndexType>,
1295 user: Option<&FtsIndexType>,
1296 ) -> Result<Option<FtsIndexType>, SchemaError> {
1297 match (default, user) {
1298 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1299 enabled: user.enabled,
1300 config: user.config.clone(),
1301 })),
1302 (Some(default), None) => Ok(Some(default.clone())),
1303 (None, Some(user)) => Ok(Some(user.clone())),
1304 (None, None) => Ok(None),
1305 }
1306 }
1307
1308 fn merge_float_inverted_index_type(
1309 default: Option<&FloatInvertedIndexType>,
1310 user: Option<&FloatInvertedIndexType>,
1311 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1312 match (default, user) {
1313 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1314 enabled: user.enabled,
1315 config: user.config.clone(),
1316 })),
1317 (Some(default), None) => Ok(Some(default.clone())),
1318 (None, Some(user)) => Ok(Some(user.clone())),
1319 (None, None) => Ok(None),
1320 }
1321 }
1322
1323 fn merge_int_inverted_index_type(
1324 default: Option<&IntInvertedIndexType>,
1325 user: Option<&IntInvertedIndexType>,
1326 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1327 match (default, user) {
1328 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1329 enabled: user.enabled,
1330 config: user.config.clone(),
1331 })),
1332 (Some(default), None) => Ok(Some(default.clone())),
1333 (None, Some(user)) => Ok(Some(user.clone())),
1334 (None, None) => Ok(None),
1335 }
1336 }
1337
1338 fn merge_bool_inverted_index_type(
1339 default: Option<&BoolInvertedIndexType>,
1340 user: Option<&BoolInvertedIndexType>,
1341 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1342 match (default, user) {
1343 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1344 enabled: user.enabled,
1345 config: user.config.clone(),
1346 })),
1347 (Some(default), None) => Ok(Some(default.clone())),
1348 (None, Some(user)) => Ok(Some(user.clone())),
1349 (None, None) => Ok(None),
1350 }
1351 }
1352
1353 fn merge_vector_index_type(
1354 default: Option<&VectorIndexType>,
1355 user: Option<&VectorIndexType>,
1356 knn_index: KnnIndex,
1357 ) -> Option<VectorIndexType> {
1358 match (default, user) {
1359 (Some(default), Some(user)) => Some(VectorIndexType {
1360 enabled: user.enabled,
1361 config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1362 }),
1363 (Some(default), None) => Some(default.clone()),
1364 (None, Some(user)) => Some(user.clone()),
1365 (None, None) => None,
1366 }
1367 }
1368
1369 fn merge_sparse_vector_index_type(
1370 default: Option<&SparseVectorIndexType>,
1371 user: Option<&SparseVectorIndexType>,
1372 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1373 match (default, user) {
1374 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1375 enabled: user.enabled,
1376 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1377 })),
1378 (Some(default), None) => Ok(Some(default.clone())),
1379 (None, Some(user)) => Ok(Some(user.clone())),
1380 (None, None) => Ok(None),
1381 }
1382 }
1383
1384 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1387 if let Some(vector_index) = &float_list.vector_index {
1388 if let Some(hnsw) = &vector_index.config.hnsw {
1389 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1390 }
1391 if let Some(spann) = &vector_index.config.spann {
1392 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1393 }
1394 }
1395 Ok(())
1396 }
1397
1398 fn merge_vector_index_config(
1400 default: &VectorIndexConfig,
1401 user: &VectorIndexConfig,
1402 knn_index: KnnIndex,
1403 ) -> VectorIndexConfig {
1404 match knn_index {
1405 KnnIndex::Hnsw => VectorIndexConfig {
1406 space: user.space.clone().or(default.space.clone()),
1407 embedding_function: user
1408 .embedding_function
1409 .clone()
1410 .or(default.embedding_function.clone()),
1411 source_key: user.source_key.clone().or(default.source_key.clone()),
1412 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1413 spann: None,
1414 },
1415 KnnIndex::Spann => VectorIndexConfig {
1416 space: user.space.clone().or(default.space.clone()),
1417 embedding_function: user
1418 .embedding_function
1419 .clone()
1420 .or(default.embedding_function.clone()),
1421 source_key: user.source_key.clone().or(default.source_key.clone()),
1422 hnsw: None,
1423 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1424 },
1425 }
1426 }
1427
1428 fn merge_sparse_vector_index_config(
1430 default: &SparseVectorIndexConfig,
1431 user: &SparseVectorIndexConfig,
1432 ) -> SparseVectorIndexConfig {
1433 SparseVectorIndexConfig {
1434 embedding_function: user
1435 .embedding_function
1436 .clone()
1437 .or(default.embedding_function.clone()),
1438 source_key: user.source_key.clone().or(default.source_key.clone()),
1439 bm25: user.bm25.or(default.bm25),
1440 }
1441 }
1442
1443 fn merge_hnsw_configs(
1445 default_hnsw: Option<&HnswIndexConfig>,
1446 user_hnsw: Option<&HnswIndexConfig>,
1447 ) -> Option<HnswIndexConfig> {
1448 match (default_hnsw, user_hnsw) {
1449 (Some(default), Some(user)) => Some(HnswIndexConfig {
1450 ef_construction: user.ef_construction.or(default.ef_construction),
1451 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1452 ef_search: user.ef_search.or(default.ef_search),
1453 num_threads: user.num_threads.or(default.num_threads),
1454 batch_size: user.batch_size.or(default.batch_size),
1455 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1456 resize_factor: user.resize_factor.or(default.resize_factor),
1457 }),
1458 (Some(default), None) => Some(default.clone()),
1459 (None, Some(user)) => Some(user.clone()),
1460 (None, None) => None,
1461 }
1462 }
1463
1464 fn merge_spann_configs(
1466 default_spann: Option<&SpannIndexConfig>,
1467 user_spann: Option<&SpannIndexConfig>,
1468 ) -> Option<SpannIndexConfig> {
1469 match (default_spann, user_spann) {
1470 (Some(default), Some(user)) => Some(SpannIndexConfig {
1471 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1472 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1473 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1474 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1475 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1476 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1477 split_threshold: user.split_threshold.or(default.split_threshold),
1478 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1479 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1480 reassign_neighbor_count: user
1481 .reassign_neighbor_count
1482 .or(default.reassign_neighbor_count),
1483 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1484 num_centers_to_merge_to: user
1485 .num_centers_to_merge_to
1486 .or(default.num_centers_to_merge_to),
1487 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1488 ef_construction: user.ef_construction.or(default.ef_construction),
1489 ef_search: user.ef_search.or(default.ef_search),
1490 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1491 }),
1492 (Some(default), None) => Some(default.clone()),
1493 (None, Some(user)) => Some(user.clone()),
1494 (None, None) => None,
1495 }
1496 }
1497
1498 pub fn reconcile_with_collection_config(
1506 schema: &Schema,
1507 collection_config: &InternalCollectionConfiguration,
1508 default_knn_index: KnnIndex,
1509 ) -> Result<Schema, SchemaError> {
1510 if collection_config.is_default() {
1512 if schema.is_default() {
1513 let mut new_schema = Schema::new_default(default_knn_index);
1516
1517 if collection_config.embedding_function.is_some() {
1518 if let Some(float_list) = &mut new_schema.defaults.float_list {
1519 if let Some(vector_index) = &mut float_list.vector_index {
1520 vector_index.config.embedding_function =
1521 collection_config.embedding_function.clone();
1522 }
1523 }
1524 if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1525 if let Some(float_list) = &mut embedding_types.float_list {
1526 if let Some(vector_index) = &mut float_list.vector_index {
1527 vector_index.config.embedding_function =
1528 collection_config.embedding_function.clone();
1529 }
1530 }
1531 }
1532 }
1533 return Ok(new_schema);
1534 } else {
1535 return Ok(schema.clone());
1537 }
1538 }
1539
1540 Self::try_from(collection_config)
1543 }
1544
1545 pub fn reconcile_schema_and_config(
1546 schema: Option<&Schema>,
1547 configuration: Option<&InternalCollectionConfiguration>,
1548 knn_index: KnnIndex,
1549 ) -> Result<Schema, SchemaError> {
1550 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1552 if !user_schema.is_default() && !config.is_default() {
1553 return Err(SchemaError::ConfigAndSchemaConflict);
1554 }
1555 }
1556
1557 let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1558 if let Some(config) = configuration {
1559 Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1560 } else {
1561 Ok(reconciled_schema)
1562 }
1563 }
1564
1565 pub fn default_with_embedding_function(
1566 embedding_function: EmbeddingFunctionConfiguration,
1567 ) -> Schema {
1568 let mut schema = Schema::new_default(KnnIndex::Spann);
1569 if let Some(float_list) = &mut schema.defaults.float_list {
1570 if let Some(vector_index) = &mut float_list.vector_index {
1571 vector_index.config.embedding_function = Some(embedding_function.clone());
1572 }
1573 }
1574 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1575 if let Some(float_list) = &mut embedding_types.float_list {
1576 if let Some(vector_index) = &mut float_list.vector_index {
1577 vector_index.config.embedding_function = Some(embedding_function);
1578 }
1579 }
1580 }
1581 schema
1582 }
1583
1584 pub fn is_default(&self) -> bool {
1586 if !Self::is_value_types_default(&self.defaults) {
1588 return false;
1589 }
1590
1591 for key in self.keys.keys() {
1592 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1593 return false;
1594 }
1595 }
1596
1597 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1599 if !Self::is_embedding_value_types_default(embedding_value) {
1600 return false;
1601 }
1602 }
1603
1604 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1606 if !Self::is_document_value_types_default(document_value) {
1607 return false;
1608 }
1609 }
1610
1611 if self.cmek.is_some() {
1613 return false;
1614 }
1615
1616 true
1617 }
1618
1619 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1621 if let Some(string) = &value_types.string {
1623 if let Some(string_inverted) = &string.string_inverted_index {
1624 if !string_inverted.enabled {
1625 return false;
1626 }
1627 }
1629 if let Some(fts) = &string.fts_index {
1630 if fts.enabled {
1631 return false;
1632 }
1633 }
1635 }
1636
1637 if let Some(float) = &value_types.float {
1639 if let Some(float_inverted) = &float.float_inverted_index {
1640 if !float_inverted.enabled {
1641 return false;
1642 }
1643 }
1645 }
1646
1647 if let Some(int) = &value_types.int {
1649 if let Some(int_inverted) = &int.int_inverted_index {
1650 if !int_inverted.enabled {
1651 return false;
1652 }
1653 }
1655 }
1656
1657 if let Some(boolean) = &value_types.boolean {
1659 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1660 if !bool_inverted.enabled {
1661 return false;
1662 }
1663 }
1665 }
1666
1667 if let Some(float_list) = &value_types.float_list {
1669 if let Some(vector_index) = &float_list.vector_index {
1670 if vector_index.enabled {
1671 return false;
1672 }
1673 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1674 return false;
1675 }
1676 if !is_space_default(&vector_index.config.space) {
1677 return false;
1678 }
1679 if vector_index.config.source_key.is_some() {
1681 return false;
1682 }
1683 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1686 (Some(hnsw_config), None) => {
1687 if !hnsw_config.is_default() {
1688 return false;
1689 }
1690 }
1691 (None, Some(spann_config)) => {
1692 if !spann_config.is_default() {
1693 return false;
1694 }
1695 }
1696 (Some(_), Some(_)) => return false, (None, None) => {}
1698 }
1699 }
1700 }
1701
1702 if let Some(sparse_vector) = &value_types.sparse_vector {
1704 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1705 if sparse_index.enabled {
1706 return false;
1707 }
1708 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1710 return false;
1711 }
1712 if sparse_index.config.source_key.is_some() {
1713 return false;
1714 }
1715 if let Some(bm25) = &sparse_index.config.bm25 {
1716 if bm25 != &false {
1717 return false;
1718 }
1719 }
1720 }
1721 }
1722
1723 true
1724 }
1725
1726 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1728 if value_types.string.is_some()
1730 || value_types.float.is_some()
1731 || value_types.int.is_some()
1732 || value_types.boolean.is_some()
1733 || value_types.sparse_vector.is_some()
1734 {
1735 return false;
1736 }
1737
1738 if let Some(float_list) = &value_types.float_list {
1740 if let Some(vector_index) = &float_list.vector_index {
1741 if !vector_index.enabled {
1742 return false;
1743 }
1744 if !is_space_default(&vector_index.config.space) {
1745 return false;
1746 }
1747 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1749 return false;
1750 }
1751 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1753 return false;
1754 }
1755 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1758 (Some(hnsw_config), None) => {
1759 if !hnsw_config.is_default() {
1760 return false;
1761 }
1762 }
1763 (None, Some(spann_config)) => {
1764 if !spann_config.is_default() {
1765 return false;
1766 }
1767 }
1768 (Some(_), Some(_)) => return false, (None, None) => {}
1770 }
1771 }
1772 }
1773
1774 true
1775 }
1776
1777 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1779 if value_types.float_list.is_some()
1781 || value_types.float.is_some()
1782 || value_types.int.is_some()
1783 || value_types.boolean.is_some()
1784 || value_types.sparse_vector.is_some()
1785 {
1786 return false;
1787 }
1788
1789 if let Some(string) = &value_types.string {
1791 if let Some(fts) = &string.fts_index {
1792 if !fts.enabled {
1793 return false;
1794 }
1795 }
1797 if let Some(string_inverted) = &string.string_inverted_index {
1798 if string_inverted.enabled {
1799 return false;
1800 }
1801 }
1803 }
1804
1805 true
1806 }
1807
1808 pub fn is_metadata_type_index_enabled(
1810 &self,
1811 key: &str,
1812 value_type: MetadataValueType,
1813 ) -> Result<bool, SchemaError> {
1814 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1815
1816 match value_type {
1817 MetadataValueType::Bool => match &v_type.boolean {
1818 Some(bool_type) => match &bool_type.bool_inverted_index {
1819 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1820 None => Err(SchemaError::MissingIndexConfiguration {
1821 key: key.to_string(),
1822 value_type: "bool".to_string(),
1823 }),
1824 },
1825 None => match &self.defaults.boolean {
1826 Some(bool_type) => match &bool_type.bool_inverted_index {
1827 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1828 None => Err(SchemaError::MissingIndexConfiguration {
1829 key: key.to_string(),
1830 value_type: "bool".to_string(),
1831 }),
1832 },
1833 None => Err(SchemaError::MissingIndexConfiguration {
1834 key: key.to_string(),
1835 value_type: "bool".to_string(),
1836 }),
1837 },
1838 },
1839 MetadataValueType::Int => match &v_type.int {
1840 Some(int_type) => match &int_type.int_inverted_index {
1841 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1842 None => Err(SchemaError::MissingIndexConfiguration {
1843 key: key.to_string(),
1844 value_type: "int".to_string(),
1845 }),
1846 },
1847 None => match &self.defaults.int {
1848 Some(int_type) => match &int_type.int_inverted_index {
1849 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1850 None => Err(SchemaError::MissingIndexConfiguration {
1851 key: key.to_string(),
1852 value_type: "int".to_string(),
1853 }),
1854 },
1855 None => Err(SchemaError::MissingIndexConfiguration {
1856 key: key.to_string(),
1857 value_type: "int".to_string(),
1858 }),
1859 },
1860 },
1861 MetadataValueType::Float => match &v_type.float {
1862 Some(float_type) => match &float_type.float_inverted_index {
1863 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1864 None => Err(SchemaError::MissingIndexConfiguration {
1865 key: key.to_string(),
1866 value_type: "float".to_string(),
1867 }),
1868 },
1869 None => match &self.defaults.float {
1870 Some(float_type) => match &float_type.float_inverted_index {
1871 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1872 None => Err(SchemaError::MissingIndexConfiguration {
1873 key: key.to_string(),
1874 value_type: "float".to_string(),
1875 }),
1876 },
1877 None => Err(SchemaError::MissingIndexConfiguration {
1878 key: key.to_string(),
1879 value_type: "float".to_string(),
1880 }),
1881 },
1882 },
1883 MetadataValueType::Str => match &v_type.string {
1884 Some(string_type) => match &string_type.string_inverted_index {
1885 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1886 None => Err(SchemaError::MissingIndexConfiguration {
1887 key: key.to_string(),
1888 value_type: "string".to_string(),
1889 }),
1890 },
1891 None => match &self.defaults.string {
1892 Some(string_type) => match &string_type.string_inverted_index {
1893 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1894 None => Err(SchemaError::MissingIndexConfiguration {
1895 key: key.to_string(),
1896 value_type: "string".to_string(),
1897 }),
1898 },
1899 None => Err(SchemaError::MissingIndexConfiguration {
1900 key: key.to_string(),
1901 value_type: "string".to_string(),
1902 }),
1903 },
1904 },
1905 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1906 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1907 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1908 None => Err(SchemaError::MissingIndexConfiguration {
1909 key: key.to_string(),
1910 value_type: "sparse_vector".to_string(),
1911 }),
1912 },
1913 None => match &self.defaults.sparse_vector {
1914 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1915 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1916 None => Err(SchemaError::MissingIndexConfiguration {
1917 key: key.to_string(),
1918 value_type: "sparse_vector".to_string(),
1919 }),
1920 },
1921 None => Err(SchemaError::MissingIndexConfiguration {
1922 key: key.to_string(),
1923 value_type: "sparse_vector".to_string(),
1924 }),
1925 },
1926 },
1927 }
1928 }
1929
1930 pub fn is_metadata_where_indexing_enabled(
1931 &self,
1932 where_clause: &Where,
1933 ) -> Result<(), FilterValidationError> {
1934 match where_clause {
1935 Where::Composite(composite) => {
1936 for child in &composite.children {
1937 self.is_metadata_where_indexing_enabled(child)?;
1938 }
1939 Ok(())
1940 }
1941 Where::Document(_) => Ok(()),
1942 Where::Metadata(expression) => {
1943 let value_type = match &expression.comparison {
1944 MetadataComparison::Primitive(_, value) => value.value_type(),
1945 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1946 };
1947 let is_enabled = self
1948 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1949 .map_err(FilterValidationError::Schema)?;
1950 if !is_enabled {
1951 return Err(FilterValidationError::IndexingDisabled {
1952 key: expression.key.clone(),
1953 value_type,
1954 });
1955 }
1956 Ok(())
1957 }
1958 }
1959 }
1960
1961 pub fn is_knn_key_indexing_enabled(
1962 &self,
1963 key: &str,
1964 query: &QueryVector,
1965 ) -> Result<(), FilterValidationError> {
1966 match query {
1967 QueryVector::Sparse(_) => {
1968 let is_enabled = self
1969 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1970 .map_err(FilterValidationError::Schema)?;
1971 if !is_enabled {
1972 return Err(FilterValidationError::IndexingDisabled {
1973 key: key.to_string(),
1974 value_type: MetadataValueType::SparseVector,
1975 });
1976 }
1977 Ok(())
1978 }
1979 QueryVector::Dense(_) => {
1980 Ok(())
1983 }
1984 }
1985 }
1986
1987 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1988 if key.starts_with(CHROMA_KEY) {
1989 return false;
1990 }
1991 let value_types = self.keys.entry(key.to_string()).or_default();
1992 match value_type {
1993 MetadataValueType::Bool => {
1994 if value_types.boolean.is_none() {
1995 value_types.boolean = self.defaults.boolean.clone();
1996 return true;
1997 }
1998 }
1999 MetadataValueType::Int => {
2000 if value_types.int.is_none() {
2001 value_types.int = self.defaults.int.clone();
2002 return true;
2003 }
2004 }
2005 MetadataValueType::Float => {
2006 if value_types.float.is_none() {
2007 value_types.float = self.defaults.float.clone();
2008 return true;
2009 }
2010 }
2011 MetadataValueType::Str => {
2012 if value_types.string.is_none() {
2013 value_types.string = self.defaults.string.clone();
2014 return true;
2015 }
2016 }
2017 MetadataValueType::SparseVector => {
2018 if value_types.sparse_vector.is_none() {
2019 value_types.sparse_vector = self.defaults.sparse_vector.clone();
2020 return true;
2021 }
2022 }
2023 }
2024 false
2025 }
2026
2027 pub fn create_index(
2067 mut self,
2068 key: Option<&str>,
2069 config: IndexConfig,
2070 ) -> Result<Self, SchemaBuilderError> {
2071 match (&key, &config) {
2073 (None, IndexConfig::Vector(cfg)) => {
2074 self._set_vector_index_config_builder(cfg.clone());
2075 return Ok(self);
2076 }
2077 (None, IndexConfig::Fts(cfg)) => {
2078 self._set_fts_index_config_builder(cfg.clone());
2079 return Ok(self);
2080 }
2081 (Some(k), IndexConfig::Vector(_)) => {
2082 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2083 }
2084 (Some(k), IndexConfig::Fts(_)) => {
2085 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2086 }
2087 _ => {}
2088 }
2089
2090 if let Some(k) = key {
2092 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2093 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2094 key: k.to_string(),
2095 });
2096 }
2097 }
2098
2099 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2101 return Err(SchemaBuilderError::SparseVectorRequiresKey);
2102 }
2103
2104 match key {
2106 Some(k) => self._set_index_for_key_builder(k, config, true)?,
2107 None => self._set_index_in_defaults_builder(config, true)?,
2108 }
2109
2110 Ok(self)
2111 }
2112
2113 pub fn delete_index(
2141 mut self,
2142 key: Option<&str>,
2143 config: IndexConfig,
2144 ) -> Result<Self, SchemaBuilderError> {
2145 if let Some(k) = key {
2147 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2148 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2149 key: k.to_string(),
2150 });
2151 }
2152 }
2153
2154 match &config {
2156 IndexConfig::Vector(_) => {
2157 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2158 }
2159 IndexConfig::Fts(_) => {
2160 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2161 }
2162 IndexConfig::SparseVector(_) => {
2163 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2164 }
2165 _ => {}
2166 }
2167
2168 match key {
2170 Some(k) => self._set_index_for_key_builder(k, config, false)?,
2171 None => self._set_index_in_defaults_builder(config, false)?,
2172 }
2173
2174 Ok(self)
2175 }
2176
2177 pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2195 self.cmek = Some(cmek);
2196 self
2197 }
2198
2199 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2201 if let Some(float_list) = &mut self.defaults.float_list {
2203 if let Some(vector_index) = &mut float_list.vector_index {
2204 vector_index.config = config.clone();
2205 }
2206 }
2207
2208 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2210 if let Some(float_list) = &mut embedding_types.float_list {
2211 if let Some(vector_index) = &mut float_list.vector_index {
2212 let mut updated_config = config;
2213 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2215 vector_index.config = updated_config;
2216 }
2217 }
2218 }
2219 }
2220
2221 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2223 if let Some(string) = &mut self.defaults.string {
2225 if let Some(fts_index) = &mut string.fts_index {
2226 fts_index.config = config.clone();
2227 }
2228 }
2229
2230 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2232 if let Some(string) = &mut document_types.string {
2233 if let Some(fts_index) = &mut string.fts_index {
2234 fts_index.config = config;
2235 }
2236 }
2237 }
2238 }
2239
2240 fn _set_index_for_key_builder(
2242 &mut self,
2243 key: &str,
2244 config: IndexConfig,
2245 enabled: bool,
2246 ) -> Result<(), SchemaBuilderError> {
2247 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2249 let existing_key = self
2251 .keys
2252 .iter()
2253 .find(|(k, v)| {
2254 k.as_str() != key
2255 && v.sparse_vector
2256 .as_ref()
2257 .and_then(|sv| sv.sparse_vector_index.as_ref())
2258 .map(|idx| idx.enabled)
2259 .unwrap_or(false)
2260 })
2261 .map(|(k, _)| k.clone());
2262
2263 if let Some(existing_key) = existing_key {
2264 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2265 }
2266 }
2267
2268 let value_types = self.keys.entry(key.to_string()).or_default();
2270
2271 match config {
2273 IndexConfig::Vector(_) => {
2274 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2275 key: key.to_string(),
2276 });
2277 }
2278 IndexConfig::Fts(_) => {
2279 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2280 key: key.to_string(),
2281 });
2282 }
2283 IndexConfig::SparseVector(cfg) => {
2284 value_types.sparse_vector = Some(SparseVectorValueType {
2285 sparse_vector_index: Some(SparseVectorIndexType {
2286 enabled,
2287 config: cfg,
2288 }),
2289 });
2290 }
2291 IndexConfig::StringInverted(cfg) => {
2292 if value_types.string.is_none() {
2293 value_types.string = Some(StringValueType {
2294 fts_index: None,
2295 string_inverted_index: None,
2296 });
2297 }
2298 if let Some(string) = &mut value_types.string {
2299 string.string_inverted_index = Some(StringInvertedIndexType {
2300 enabled,
2301 config: cfg,
2302 });
2303 }
2304 }
2305 IndexConfig::IntInverted(cfg) => {
2306 value_types.int = Some(IntValueType {
2307 int_inverted_index: Some(IntInvertedIndexType {
2308 enabled,
2309 config: cfg,
2310 }),
2311 });
2312 }
2313 IndexConfig::FloatInverted(cfg) => {
2314 value_types.float = Some(FloatValueType {
2315 float_inverted_index: Some(FloatInvertedIndexType {
2316 enabled,
2317 config: cfg,
2318 }),
2319 });
2320 }
2321 IndexConfig::BoolInverted(cfg) => {
2322 value_types.boolean = Some(BoolValueType {
2323 bool_inverted_index: Some(BoolInvertedIndexType {
2324 enabled,
2325 config: cfg,
2326 }),
2327 });
2328 }
2329 }
2330
2331 Ok(())
2332 }
2333
2334 fn _set_index_in_defaults_builder(
2336 &mut self,
2337 config: IndexConfig,
2338 enabled: bool,
2339 ) -> Result<(), SchemaBuilderError> {
2340 match config {
2341 IndexConfig::Vector(_) => {
2342 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2343 key: "defaults".to_string(),
2344 });
2345 }
2346 IndexConfig::Fts(_) => {
2347 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2348 key: "defaults".to_string(),
2349 });
2350 }
2351 IndexConfig::SparseVector(cfg) => {
2352 self.defaults.sparse_vector = Some(SparseVectorValueType {
2353 sparse_vector_index: Some(SparseVectorIndexType {
2354 enabled,
2355 config: cfg,
2356 }),
2357 });
2358 }
2359 IndexConfig::StringInverted(cfg) => {
2360 if self.defaults.string.is_none() {
2361 self.defaults.string = Some(StringValueType {
2362 fts_index: None,
2363 string_inverted_index: None,
2364 });
2365 }
2366 if let Some(string) = &mut self.defaults.string {
2367 string.string_inverted_index = Some(StringInvertedIndexType {
2368 enabled,
2369 config: cfg,
2370 });
2371 }
2372 }
2373 IndexConfig::IntInverted(cfg) => {
2374 self.defaults.int = Some(IntValueType {
2375 int_inverted_index: Some(IntInvertedIndexType {
2376 enabled,
2377 config: cfg,
2378 }),
2379 });
2380 }
2381 IndexConfig::FloatInverted(cfg) => {
2382 self.defaults.float = Some(FloatValueType {
2383 float_inverted_index: Some(FloatInvertedIndexType {
2384 enabled,
2385 config: cfg,
2386 }),
2387 });
2388 }
2389 IndexConfig::BoolInverted(cfg) => {
2390 self.defaults.boolean = Some(BoolValueType {
2391 bool_inverted_index: Some(BoolInvertedIndexType {
2392 enabled,
2393 config: cfg,
2394 }),
2395 });
2396 }
2397 }
2398
2399 Ok(())
2400 }
2401}
2402
2403#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2408#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2409#[serde(deny_unknown_fields)]
2410pub struct VectorIndexConfig {
2411 #[serde(skip_serializing_if = "Option::is_none")]
2413 pub space: Option<Space>,
2414 #[serde(skip_serializing_if = "Option::is_none")]
2416 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2417 #[serde(skip_serializing_if = "Option::is_none")]
2419 pub source_key: Option<String>,
2420 #[serde(skip_serializing_if = "Option::is_none")]
2422 pub hnsw: Option<HnswIndexConfig>,
2423 #[serde(skip_serializing_if = "Option::is_none")]
2425 pub spann: Option<SpannIndexConfig>,
2426}
2427
2428#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2430#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2431#[serde(deny_unknown_fields)]
2432pub struct HnswIndexConfig {
2433 #[serde(skip_serializing_if = "Option::is_none")]
2434 pub ef_construction: Option<usize>,
2435 #[serde(skip_serializing_if = "Option::is_none")]
2436 pub max_neighbors: Option<usize>,
2437 #[serde(skip_serializing_if = "Option::is_none")]
2438 pub ef_search: Option<usize>,
2439 #[serde(skip_serializing_if = "Option::is_none")]
2440 pub num_threads: Option<usize>,
2441 #[serde(skip_serializing_if = "Option::is_none")]
2442 #[validate(range(min = 2))]
2443 pub batch_size: Option<usize>,
2444 #[serde(skip_serializing_if = "Option::is_none")]
2445 #[validate(range(min = 2))]
2446 pub sync_threshold: Option<usize>,
2447 #[serde(skip_serializing_if = "Option::is_none")]
2448 pub resize_factor: Option<f64>,
2449}
2450
2451impl HnswIndexConfig {
2452 pub fn is_default(&self) -> bool {
2456 if let Some(ef_construction) = self.ef_construction {
2457 if ef_construction != default_construction_ef() {
2458 return false;
2459 }
2460 }
2461 if let Some(max_neighbors) = self.max_neighbors {
2462 if max_neighbors != default_m() {
2463 return false;
2464 }
2465 }
2466 if let Some(ef_search) = self.ef_search {
2467 if ef_search != default_search_ef() {
2468 return false;
2469 }
2470 }
2471 if let Some(batch_size) = self.batch_size {
2472 if batch_size != default_batch_size() {
2473 return false;
2474 }
2475 }
2476 if let Some(sync_threshold) = self.sync_threshold {
2477 if sync_threshold != default_sync_threshold() {
2478 return false;
2479 }
2480 }
2481 if let Some(resize_factor) = self.resize_factor {
2482 if resize_factor != default_resize_factor() {
2483 return false;
2484 }
2485 }
2486 true
2488 }
2489}
2490
2491#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2493#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2494#[serde(deny_unknown_fields)]
2495pub struct SpannIndexConfig {
2496 #[serde(skip_serializing_if = "Option::is_none")]
2497 #[validate(range(max = 128))]
2498 pub search_nprobe: Option<u32>,
2499 #[serde(skip_serializing_if = "Option::is_none")]
2500 #[validate(range(min = 1.0, max = 1.0))]
2501 pub search_rng_factor: Option<f32>,
2502 #[serde(skip_serializing_if = "Option::is_none")]
2503 #[validate(range(min = 5.0, max = 10.0))]
2504 pub search_rng_epsilon: Option<f32>,
2505 #[serde(skip_serializing_if = "Option::is_none")]
2506 #[validate(range(max = 8))]
2507 pub nreplica_count: Option<u32>,
2508 #[serde(skip_serializing_if = "Option::is_none")]
2509 #[validate(range(min = 1.0, max = 1.0))]
2510 pub write_rng_factor: Option<f32>,
2511 #[serde(skip_serializing_if = "Option::is_none")]
2512 #[validate(range(min = 5.0, max = 10.0))]
2513 pub write_rng_epsilon: Option<f32>,
2514 #[serde(skip_serializing_if = "Option::is_none")]
2515 #[validate(range(min = 50, max = 200))]
2516 pub split_threshold: Option<u32>,
2517 #[serde(skip_serializing_if = "Option::is_none")]
2518 #[validate(range(max = 1000))]
2519 pub num_samples_kmeans: Option<usize>,
2520 #[serde(skip_serializing_if = "Option::is_none")]
2521 #[validate(range(min = 100.0, max = 100.0))]
2522 pub initial_lambda: Option<f32>,
2523 #[serde(skip_serializing_if = "Option::is_none")]
2524 #[validate(range(max = 64))]
2525 pub reassign_neighbor_count: Option<u32>,
2526 #[serde(skip_serializing_if = "Option::is_none")]
2527 #[validate(range(min = 25, max = 100))]
2528 pub merge_threshold: Option<u32>,
2529 #[serde(skip_serializing_if = "Option::is_none")]
2530 #[validate(range(max = 8))]
2531 pub num_centers_to_merge_to: Option<u32>,
2532 #[serde(skip_serializing_if = "Option::is_none")]
2533 #[validate(range(max = 64))]
2534 pub write_nprobe: Option<u32>,
2535 #[serde(skip_serializing_if = "Option::is_none")]
2536 #[validate(range(max = 200))]
2537 pub ef_construction: Option<usize>,
2538 #[serde(skip_serializing_if = "Option::is_none")]
2539 #[validate(range(max = 200))]
2540 pub ef_search: Option<usize>,
2541 #[serde(skip_serializing_if = "Option::is_none")]
2542 #[validate(range(max = 64))]
2543 pub max_neighbors: Option<usize>,
2544}
2545
2546impl SpannIndexConfig {
2547 pub fn is_default(&self) -> bool {
2550 if let Some(search_nprobe) = self.search_nprobe {
2551 if search_nprobe != default_search_nprobe() {
2552 return false;
2553 }
2554 }
2555 if let Some(search_rng_factor) = self.search_rng_factor {
2556 if search_rng_factor != default_search_rng_factor() {
2557 return false;
2558 }
2559 }
2560 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2561 if search_rng_epsilon != default_search_rng_epsilon() {
2562 return false;
2563 }
2564 }
2565 if let Some(nreplica_count) = self.nreplica_count {
2566 if nreplica_count != default_nreplica_count() {
2567 return false;
2568 }
2569 }
2570 if let Some(write_rng_factor) = self.write_rng_factor {
2571 if write_rng_factor != default_write_rng_factor() {
2572 return false;
2573 }
2574 }
2575 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2576 if write_rng_epsilon != default_write_rng_epsilon() {
2577 return false;
2578 }
2579 }
2580 if let Some(split_threshold) = self.split_threshold {
2581 if split_threshold != default_split_threshold() {
2582 return false;
2583 }
2584 }
2585 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2586 if num_samples_kmeans != default_num_samples_kmeans() {
2587 return false;
2588 }
2589 }
2590 if let Some(initial_lambda) = self.initial_lambda {
2591 if initial_lambda != default_initial_lambda() {
2592 return false;
2593 }
2594 }
2595 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2596 if reassign_neighbor_count != default_reassign_neighbor_count() {
2597 return false;
2598 }
2599 }
2600 if let Some(merge_threshold) = self.merge_threshold {
2601 if merge_threshold != default_merge_threshold() {
2602 return false;
2603 }
2604 }
2605 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2606 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2607 return false;
2608 }
2609 }
2610 if let Some(write_nprobe) = self.write_nprobe {
2611 if write_nprobe != default_write_nprobe() {
2612 return false;
2613 }
2614 }
2615 if let Some(ef_construction) = self.ef_construction {
2616 if ef_construction != default_construction_ef_spann() {
2617 return false;
2618 }
2619 }
2620 if let Some(ef_search) = self.ef_search {
2621 if ef_search != default_search_ef_spann() {
2622 return false;
2623 }
2624 }
2625 if let Some(max_neighbors) = self.max_neighbors {
2626 if max_neighbors != default_m_spann() {
2627 return false;
2628 }
2629 }
2630 true
2631 }
2632}
2633
2634#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2635#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2636#[serde(deny_unknown_fields)]
2637pub struct SparseVectorIndexConfig {
2638 #[serde(skip_serializing_if = "Option::is_none")]
2640 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2641 #[serde(skip_serializing_if = "Option::is_none")]
2643 pub source_key: Option<String>,
2644 #[serde(skip_serializing_if = "Option::is_none")]
2646 pub bm25: Option<bool>,
2647}
2648
2649#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2650#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2651#[serde(deny_unknown_fields)]
2652pub struct FtsIndexConfig {
2653 }
2655
2656#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2657#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2658#[serde(deny_unknown_fields)]
2659pub struct StringInvertedIndexConfig {
2660 }
2662
2663#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2664#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2665#[serde(deny_unknown_fields)]
2666pub struct IntInvertedIndexConfig {
2667 }
2669
2670#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2671#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2672#[serde(deny_unknown_fields)]
2673pub struct FloatInvertedIndexConfig {
2674 }
2676
2677#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2678#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2679#[serde(deny_unknown_fields)]
2680pub struct BoolInvertedIndexConfig {
2681 }
2683
2684#[derive(Clone, Debug)]
2690pub enum IndexConfig {
2691 Vector(VectorIndexConfig),
2692 SparseVector(SparseVectorIndexConfig),
2693 Fts(FtsIndexConfig),
2694 StringInverted(StringInvertedIndexConfig),
2695 IntInverted(IntInvertedIndexConfig),
2696 FloatInverted(FloatInvertedIndexConfig),
2697 BoolInverted(BoolInvertedIndexConfig),
2698}
2699
2700impl From<VectorIndexConfig> for IndexConfig {
2702 fn from(config: VectorIndexConfig) -> Self {
2703 IndexConfig::Vector(config)
2704 }
2705}
2706
2707impl From<SparseVectorIndexConfig> for IndexConfig {
2708 fn from(config: SparseVectorIndexConfig) -> Self {
2709 IndexConfig::SparseVector(config)
2710 }
2711}
2712
2713impl From<FtsIndexConfig> for IndexConfig {
2714 fn from(config: FtsIndexConfig) -> Self {
2715 IndexConfig::Fts(config)
2716 }
2717}
2718
2719impl From<StringInvertedIndexConfig> for IndexConfig {
2720 fn from(config: StringInvertedIndexConfig) -> Self {
2721 IndexConfig::StringInverted(config)
2722 }
2723}
2724
2725impl From<IntInvertedIndexConfig> for IndexConfig {
2726 fn from(config: IntInvertedIndexConfig) -> Self {
2727 IndexConfig::IntInverted(config)
2728 }
2729}
2730
2731impl From<FloatInvertedIndexConfig> for IndexConfig {
2732 fn from(config: FloatInvertedIndexConfig) -> Self {
2733 IndexConfig::FloatInverted(config)
2734 }
2735}
2736
2737impl From<BoolInvertedIndexConfig> for IndexConfig {
2738 fn from(config: BoolInvertedIndexConfig) -> Self {
2739 IndexConfig::BoolInverted(config)
2740 }
2741}
2742
2743impl TryFrom<&InternalCollectionConfiguration> for Schema {
2744 type Error = SchemaError;
2745
2746 fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2747 let mut schema = match &config.vector_index {
2749 VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2750 VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2751 };
2752 let vector_config = match &config.vector_index {
2754 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2755 space: Some(hnsw_config.space.clone()),
2756 embedding_function: config.embedding_function.clone(),
2757 source_key: None,
2758 hnsw: Some(HnswIndexConfig {
2759 ef_construction: Some(hnsw_config.ef_construction),
2760 max_neighbors: Some(hnsw_config.max_neighbors),
2761 ef_search: Some(hnsw_config.ef_search),
2762 num_threads: Some(hnsw_config.num_threads),
2763 batch_size: Some(hnsw_config.batch_size),
2764 sync_threshold: Some(hnsw_config.sync_threshold),
2765 resize_factor: Some(hnsw_config.resize_factor),
2766 }),
2767 spann: None,
2768 },
2769 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
2770 space: Some(spann_config.space.clone()),
2771 embedding_function: config.embedding_function.clone(),
2772 source_key: None,
2773 hnsw: None,
2774 spann: Some(SpannIndexConfig {
2775 search_nprobe: Some(spann_config.search_nprobe),
2776 search_rng_factor: Some(spann_config.search_rng_factor),
2777 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
2778 nreplica_count: Some(spann_config.nreplica_count),
2779 write_rng_factor: Some(spann_config.write_rng_factor),
2780 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
2781 split_threshold: Some(spann_config.split_threshold),
2782 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
2783 initial_lambda: Some(spann_config.initial_lambda),
2784 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
2785 merge_threshold: Some(spann_config.merge_threshold),
2786 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
2787 write_nprobe: Some(spann_config.write_nprobe),
2788 ef_construction: Some(spann_config.ef_construction),
2789 ef_search: Some(spann_config.ef_search),
2790 max_neighbors: Some(spann_config.max_neighbors),
2791 }),
2792 },
2793 };
2794
2795 if let Some(float_list) = &mut schema.defaults.float_list {
2798 if let Some(vector_index) = &mut float_list.vector_index {
2799 vector_index.config = vector_config.clone();
2800 }
2801 }
2802
2803 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
2807 if let Some(float_list) = &mut embedding_types.float_list {
2808 if let Some(vector_index) = &mut float_list.vector_index {
2809 let mut vector_config = vector_config;
2810 vector_config.source_key = Some(DOCUMENT_KEY.to_string());
2811 vector_index.config = vector_config;
2812 }
2813 }
2814 }
2815
2816 Ok(schema)
2817 }
2818}
2819
2820#[cfg(test)]
2821mod tests {
2822 use super::*;
2823 use crate::hnsw_configuration::Space;
2824 use crate::metadata::SparseVector;
2825 use crate::{
2826 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2827 };
2828 use serde_json::json;
2829
2830 #[test]
2831 fn test_reconcile_with_defaults_none_user_schema() {
2832 let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2834 let expected = Schema::new_default(KnnIndex::Spann);
2835 assert_eq!(result, expected);
2836 }
2837
2838 #[test]
2839 fn test_reconcile_with_defaults_empty_user_schema() {
2840 let user_schema = Schema {
2842 defaults: ValueTypes::default(),
2843 keys: HashMap::new(),
2844 cmek: None,
2845 };
2846
2847 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2848 let expected = Schema::new_default(KnnIndex::Spann);
2849 assert_eq!(result, expected);
2850 }
2851
2852 #[test]
2853 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2854 let mut user_schema = Schema {
2856 defaults: ValueTypes::default(),
2857 keys: HashMap::new(),
2858 cmek: None,
2859 };
2860
2861 user_schema.defaults.string = Some(StringValueType {
2862 string_inverted_index: Some(StringInvertedIndexType {
2863 enabled: false, config: StringInvertedIndexConfig {},
2865 }),
2866 fts_index: None,
2867 });
2868
2869 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2870
2871 assert!(
2873 !result
2874 .defaults
2875 .string
2876 .as_ref()
2877 .unwrap()
2878 .string_inverted_index
2879 .as_ref()
2880 .unwrap()
2881 .enabled
2882 );
2883 assert!(result.defaults.float.is_some());
2885 assert!(result.defaults.int.is_some());
2886 }
2887
2888 #[test]
2889 fn test_reconcile_with_defaults_user_overrides_vector_config() {
2890 let mut user_schema = Schema {
2892 defaults: ValueTypes::default(),
2893 keys: HashMap::new(),
2894 cmek: None,
2895 };
2896
2897 user_schema.defaults.float_list = Some(FloatListValueType {
2898 vector_index: Some(VectorIndexType {
2899 enabled: true, config: VectorIndexConfig {
2901 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
2905 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
2909 batch_size: None,
2910 sync_threshold: None,
2911 resize_factor: None,
2912 }),
2913 spann: None,
2914 },
2915 }),
2916 });
2917
2918 let result = {
2920 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2921 let merged_defaults = Schema::merge_value_types(
2922 &default_schema.defaults,
2923 &user_schema.defaults,
2924 KnnIndex::Hnsw,
2925 )
2926 .unwrap();
2927 let mut merged_keys = default_schema.keys.clone();
2928 for (key, user_value_types) in user_schema.keys {
2929 if let Some(default_value_types) = merged_keys.get(&key) {
2930 let merged_value_types = Schema::merge_value_types(
2931 default_value_types,
2932 &user_value_types,
2933 KnnIndex::Hnsw,
2934 )
2935 .unwrap();
2936 merged_keys.insert(key, merged_value_types);
2937 } else {
2938 merged_keys.insert(key, user_value_types);
2939 }
2940 }
2941 Schema {
2942 defaults: merged_defaults,
2943 keys: merged_keys,
2944 cmek: None,
2945 }
2946 };
2947
2948 let vector_config = &result
2949 .defaults
2950 .float_list
2951 .as_ref()
2952 .unwrap()
2953 .vector_index
2954 .as_ref()
2955 .unwrap()
2956 .config;
2957
2958 assert_eq!(vector_config.space, Some(Space::L2));
2960 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2961 assert_eq!(
2962 vector_config.hnsw.as_ref().unwrap().ef_construction,
2963 Some(500)
2964 );
2965
2966 assert_eq!(vector_config.embedding_function, None);
2968 assert_eq!(
2970 vector_config.hnsw.as_ref().unwrap().max_neighbors,
2971 Some(default_m())
2972 );
2973 }
2974
2975 #[test]
2976 fn test_reconcile_with_defaults_keys() {
2977 let mut user_schema = Schema {
2979 defaults: ValueTypes::default(),
2980 keys: HashMap::new(),
2981 cmek: None,
2982 };
2983
2984 let custom_key_types = ValueTypes {
2986 string: Some(StringValueType {
2987 fts_index: Some(FtsIndexType {
2988 enabled: true,
2989 config: FtsIndexConfig {},
2990 }),
2991 string_inverted_index: Some(StringInvertedIndexType {
2992 enabled: false,
2993 config: StringInvertedIndexConfig {},
2994 }),
2995 }),
2996 ..Default::default()
2997 };
2998 user_schema
2999 .keys
3000 .insert("custom_key".to_string(), custom_key_types);
3001
3002 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3003
3004 assert!(result.keys.contains_key(EMBEDDING_KEY));
3006 assert!(result.keys.contains_key(DOCUMENT_KEY));
3007
3008 assert!(result.keys.contains_key("custom_key"));
3010 let custom_override = result.keys.get("custom_key").unwrap();
3011 assert!(
3012 custom_override
3013 .string
3014 .as_ref()
3015 .unwrap()
3016 .fts_index
3017 .as_ref()
3018 .unwrap()
3019 .enabled
3020 );
3021 }
3022
3023 #[test]
3024 fn test_reconcile_with_defaults_override_existing_key() {
3025 let mut user_schema = Schema {
3027 defaults: ValueTypes::default(),
3028 keys: HashMap::new(),
3029 cmek: None,
3030 };
3031
3032 let embedding_override = ValueTypes {
3034 float_list: Some(FloatListValueType {
3035 vector_index: Some(VectorIndexType {
3036 enabled: false, config: VectorIndexConfig {
3038 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3040 source_key: Some("custom_embedding_key".to_string()),
3041 hnsw: None,
3042 spann: None,
3043 },
3044 }),
3045 }),
3046 ..Default::default()
3047 };
3048 user_schema
3049 .keys
3050 .insert(EMBEDDING_KEY.to_string(), embedding_override);
3051
3052 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3053
3054 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3055 let vector_config = &embedding_config
3056 .float_list
3057 .as_ref()
3058 .unwrap()
3059 .vector_index
3060 .as_ref()
3061 .unwrap();
3062
3063 assert!(!vector_config.enabled);
3065 assert_eq!(vector_config.config.space, Some(Space::Ip));
3066 assert_eq!(
3067 vector_config.config.source_key,
3068 Some("custom_embedding_key".to_string())
3069 );
3070 }
3071
3072 #[test]
3073 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3074 let collection_config = InternalCollectionConfiguration {
3075 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3076 space: Space::Cosine,
3077 ef_construction: 128,
3078 ef_search: 96,
3079 max_neighbors: 42,
3080 num_threads: 8,
3081 resize_factor: 1.5,
3082 sync_threshold: 2_000,
3083 batch_size: 256,
3084 }),
3085 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3086 EmbeddingFunctionNewConfiguration {
3087 name: "custom".to_string(),
3088 config: json!({"alpha": 1}),
3089 },
3090 )),
3091 };
3092
3093 let schema = Schema::try_from(&collection_config).unwrap();
3094 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3095
3096 assert_eq!(reconstructed, collection_config);
3097 }
3098
3099 #[test]
3100 fn test_convert_schema_to_collection_config_spann_roundtrip() {
3101 let spann_config = InternalSpannConfiguration {
3102 space: Space::Cosine,
3103 search_nprobe: 11,
3104 search_rng_factor: 1.7,
3105 write_nprobe: 5,
3106 nreplica_count: 3,
3107 split_threshold: 150,
3108 merge_threshold: 80,
3109 ef_construction: 120,
3110 ef_search: 90,
3111 max_neighbors: 40,
3112 ..Default::default()
3113 };
3114
3115 let collection_config = InternalCollectionConfiguration {
3116 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3117 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3118 EmbeddingFunctionNewConfiguration {
3119 name: "custom".to_string(),
3120 config: json!({"beta": true}),
3121 },
3122 )),
3123 };
3124
3125 let schema = Schema::try_from(&collection_config).unwrap();
3126 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3127
3128 assert_eq!(reconstructed, collection_config);
3129 }
3130
3131 #[test]
3132 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3133 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3134 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3135 if let Some(float_list) = &mut embedding.float_list {
3136 if let Some(vector_index) = &mut float_list.vector_index {
3137 vector_index.config.spann = Some(SpannIndexConfig {
3138 search_nprobe: Some(1),
3139 search_rng_factor: Some(1.0),
3140 search_rng_epsilon: Some(0.1),
3141 nreplica_count: Some(1),
3142 write_rng_factor: Some(1.0),
3143 write_rng_epsilon: Some(0.1),
3144 split_threshold: Some(100),
3145 num_samples_kmeans: Some(10),
3146 initial_lambda: Some(0.5),
3147 reassign_neighbor_count: Some(10),
3148 merge_threshold: Some(50),
3149 num_centers_to_merge_to: Some(3),
3150 write_nprobe: Some(1),
3151 ef_construction: Some(50),
3152 ef_search: Some(40),
3153 max_neighbors: Some(20),
3154 });
3155 }
3156 }
3157 }
3158
3159 let result = InternalCollectionConfiguration::try_from(&schema);
3160 assert!(result.is_err());
3161 }
3162
3163 #[test]
3164 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3165 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3166 let before = schema.clone();
3167 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3168 assert!(!modified);
3169 assert_eq!(schema, before);
3170 }
3171
3172 #[test]
3173 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3174 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3175 assert!(!schema.keys.contains_key("custom_field"));
3176
3177 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3178
3179 assert!(modified);
3180 let entry = schema
3181 .keys
3182 .get("custom_field")
3183 .expect("expected new key override to be inserted");
3184 assert_eq!(entry.boolean, schema.defaults.boolean);
3185 assert!(entry.string.is_none());
3186 assert!(entry.int.is_none());
3187 assert!(entry.float.is_none());
3188 assert!(entry.float_list.is_none());
3189 assert!(entry.sparse_vector.is_none());
3190 }
3191
3192 #[test]
3193 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3194 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3195 let initial_len = schema.keys.len();
3196 schema.keys.insert(
3197 "custom_field".to_string(),
3198 ValueTypes {
3199 string: schema.defaults.string.clone(),
3200 ..Default::default()
3201 },
3202 );
3203
3204 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3205
3206 assert!(modified);
3207 assert_eq!(schema.keys.len(), initial_len + 1);
3208 let entry = schema
3209 .keys
3210 .get("custom_field")
3211 .expect("expected key override to exist after ensure call");
3212 assert!(entry.string.is_some());
3213 assert_eq!(entry.boolean, schema.defaults.boolean);
3214 }
3215
3216 #[test]
3217 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3218 let schema = Schema::new_default(KnnIndex::Spann);
3219 let result = schema.is_knn_key_indexing_enabled(
3220 "custom_sparse",
3221 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3222 );
3223
3224 let err = result.expect_err("expected indexing disabled error");
3225 match err {
3226 FilterValidationError::IndexingDisabled { key, value_type } => {
3227 assert_eq!(key, "custom_sparse");
3228 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3229 }
3230 other => panic!("unexpected error variant: {other:?}"),
3231 }
3232 }
3233
3234 #[test]
3235 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3236 let mut schema = Schema::new_default(KnnIndex::Spann);
3237 schema.keys.insert(
3238 "sparse_enabled".to_string(),
3239 ValueTypes {
3240 sparse_vector: Some(SparseVectorValueType {
3241 sparse_vector_index: Some(SparseVectorIndexType {
3242 enabled: true,
3243 config: SparseVectorIndexConfig {
3244 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3245 source_key: None,
3246 bm25: None,
3247 },
3248 }),
3249 }),
3250 ..Default::default()
3251 },
3252 );
3253
3254 let result = schema.is_knn_key_indexing_enabled(
3255 "sparse_enabled",
3256 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3257 );
3258
3259 assert!(result.is_ok());
3260 }
3261
3262 #[test]
3263 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3264 let schema = Schema::new_default(KnnIndex::Spann);
3265 let result = schema.is_knn_key_indexing_enabled(
3266 EMBEDDING_KEY,
3267 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3268 );
3269
3270 assert!(result.is_ok());
3271 }
3272
3273 #[test]
3274 fn test_merge_hnsw_configs_field_level() {
3275 let default_hnsw = HnswIndexConfig {
3277 ef_construction: Some(200),
3278 max_neighbors: Some(16),
3279 ef_search: Some(10),
3280 num_threads: Some(4),
3281 batch_size: Some(100),
3282 sync_threshold: Some(1000),
3283 resize_factor: Some(1.2),
3284 };
3285
3286 let user_hnsw = HnswIndexConfig {
3287 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
3295
3296 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3297
3298 assert_eq!(result.ef_construction, Some(300));
3300 assert_eq!(result.ef_search, Some(20));
3301 assert_eq!(result.sync_threshold, Some(2000));
3302
3303 assert_eq!(result.max_neighbors, Some(16));
3305 assert_eq!(result.num_threads, Some(4));
3306 assert_eq!(result.batch_size, Some(100));
3307 assert_eq!(result.resize_factor, Some(1.2));
3308 }
3309
3310 #[test]
3311 fn test_merge_spann_configs_field_level() {
3312 let default_spann = SpannIndexConfig {
3314 search_nprobe: Some(10),
3315 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
3318 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
3322 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
3324 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
3327 ef_construction: Some(100),
3328 ef_search: Some(10),
3329 max_neighbors: Some(16),
3330 };
3331
3332 let user_spann = SpannIndexConfig {
3333 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
3338 write_rng_epsilon: None,
3339 split_threshold: Some(150), num_samples_kmeans: None,
3341 initial_lambda: None,
3342 reassign_neighbor_count: None,
3343 merge_threshold: None,
3344 num_centers_to_merge_to: None,
3345 write_nprobe: None,
3346 ef_construction: None,
3347 ef_search: None,
3348 max_neighbors: None,
3349 };
3350
3351 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3352
3353 assert_eq!(result.search_nprobe, Some(20));
3355 assert_eq!(result.search_rng_epsilon, Some(8.0));
3356 assert_eq!(result.split_threshold, Some(150));
3357
3358 assert_eq!(result.search_rng_factor, Some(1.0));
3360 assert_eq!(result.nreplica_count, Some(3));
3361 assert_eq!(result.initial_lambda, Some(100.0));
3362 }
3363
3364 #[test]
3365 fn test_spann_index_config_into_internal_configuration() {
3366 let config = SpannIndexConfig {
3367 search_nprobe: Some(33),
3368 search_rng_factor: Some(1.2),
3369 search_rng_epsilon: None,
3370 nreplica_count: None,
3371 write_rng_factor: Some(1.5),
3372 write_rng_epsilon: None,
3373 split_threshold: Some(75),
3374 num_samples_kmeans: None,
3375 initial_lambda: Some(0.9),
3376 reassign_neighbor_count: Some(40),
3377 merge_threshold: None,
3378 num_centers_to_merge_to: Some(4),
3379 write_nprobe: Some(60),
3380 ef_construction: Some(180),
3381 ef_search: Some(170),
3382 max_neighbors: Some(32),
3383 };
3384
3385 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3386 assert_eq!(with_space.space, Space::Cosine);
3387 assert_eq!(with_space.search_nprobe, 33);
3388 assert_eq!(with_space.search_rng_factor, 1.2);
3389 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3390 assert_eq!(with_space.write_rng_factor, 1.5);
3391 assert_eq!(with_space.write_nprobe, 60);
3392 assert_eq!(with_space.ef_construction, 180);
3393 assert_eq!(with_space.ef_search, 170);
3394 assert_eq!(with_space.max_neighbors, 32);
3395 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3396
3397 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3398 assert_eq!(default_space_config.space, default_space());
3399 }
3400
3401 #[test]
3402 fn test_merge_string_type_combinations() {
3403 let default = StringValueType {
3407 string_inverted_index: Some(StringInvertedIndexType {
3408 enabled: true,
3409 config: StringInvertedIndexConfig {},
3410 }),
3411 fts_index: Some(FtsIndexType {
3412 enabled: false,
3413 config: FtsIndexConfig {},
3414 }),
3415 };
3416
3417 let user = StringValueType {
3418 string_inverted_index: Some(StringInvertedIndexType {
3419 enabled: false, config: StringInvertedIndexConfig {},
3421 }),
3422 fts_index: None, };
3424
3425 let result = Schema::merge_string_type(Some(&default), Some(&user))
3426 .unwrap()
3427 .unwrap();
3428 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3433 .unwrap()
3434 .unwrap();
3435 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3436
3437 let result = Schema::merge_string_type(None, Some(&user))
3439 .unwrap()
3440 .unwrap();
3441 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3442
3443 let result = Schema::merge_string_type(None, None).unwrap();
3445 assert!(result.is_none());
3446 }
3447
3448 #[test]
3449 fn test_merge_vector_index_config_comprehensive() {
3450 let default_config = VectorIndexConfig {
3452 space: Some(Space::Cosine),
3453 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3454 source_key: Some("default_key".to_string()),
3455 hnsw: Some(HnswIndexConfig {
3456 ef_construction: Some(200),
3457 max_neighbors: Some(16),
3458 ef_search: Some(10),
3459 num_threads: Some(4),
3460 batch_size: Some(100),
3461 sync_threshold: Some(1000),
3462 resize_factor: Some(1.2),
3463 }),
3464 spann: None,
3465 };
3466
3467 let user_config = VectorIndexConfig {
3468 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3472 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3476 batch_size: None,
3477 sync_threshold: None,
3478 resize_factor: None,
3479 }),
3480 spann: Some(SpannIndexConfig {
3481 search_nprobe: Some(15),
3482 search_rng_factor: None,
3483 search_rng_epsilon: None,
3484 nreplica_count: None,
3485 write_rng_factor: None,
3486 write_rng_epsilon: None,
3487 split_threshold: None,
3488 num_samples_kmeans: None,
3489 initial_lambda: None,
3490 reassign_neighbor_count: None,
3491 merge_threshold: None,
3492 num_centers_to_merge_to: None,
3493 write_nprobe: None,
3494 ef_construction: None,
3495 ef_search: None,
3496 max_neighbors: None,
3497 }), };
3499
3500 let result =
3501 Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3502
3503 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3506 result.embedding_function,
3507 Some(EmbeddingFunctionConfiguration::Legacy)
3508 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_none());
3517 }
3518
3519 #[test]
3520 fn test_merge_sparse_vector_index_config() {
3521 let default_config = SparseVectorIndexConfig {
3523 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3524 source_key: Some("default_sparse_key".to_string()),
3525 bm25: None,
3526 };
3527
3528 let user_config = SparseVectorIndexConfig {
3529 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
3532 };
3533
3534 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3535
3536 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3538 assert_eq!(
3540 result.embedding_function,
3541 Some(EmbeddingFunctionConfiguration::Legacy)
3542 );
3543 }
3544
3545 #[test]
3546 fn test_complex_nested_merging_scenario() {
3547 let mut user_schema = Schema {
3549 defaults: ValueTypes::default(),
3550 keys: HashMap::new(),
3551 cmek: None,
3552 };
3553
3554 user_schema.defaults.string = Some(StringValueType {
3556 string_inverted_index: Some(StringInvertedIndexType {
3557 enabled: false,
3558 config: StringInvertedIndexConfig {},
3559 }),
3560 fts_index: Some(FtsIndexType {
3561 enabled: true,
3562 config: FtsIndexConfig {},
3563 }),
3564 });
3565
3566 user_schema.defaults.float_list = Some(FloatListValueType {
3567 vector_index: Some(VectorIndexType {
3568 enabled: true,
3569 config: VectorIndexConfig {
3570 space: Some(Space::Ip),
3571 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
3573 hnsw: Some(HnswIndexConfig {
3574 ef_construction: Some(400),
3575 max_neighbors: Some(32),
3576 ef_search: None, num_threads: None,
3578 batch_size: None,
3579 sync_threshold: None,
3580 resize_factor: None,
3581 }),
3582 spann: None,
3583 },
3584 }),
3585 });
3586
3587 let custom_key_override = ValueTypes {
3589 string: Some(StringValueType {
3590 fts_index: Some(FtsIndexType {
3591 enabled: true,
3592 config: FtsIndexConfig {},
3593 }),
3594 string_inverted_index: None,
3595 }),
3596 ..Default::default()
3597 };
3598 user_schema
3599 .keys
3600 .insert("custom_field".to_string(), custom_key_override);
3601
3602 let result = {
3604 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3605 let merged_defaults = Schema::merge_value_types(
3606 &default_schema.defaults,
3607 &user_schema.defaults,
3608 KnnIndex::Hnsw,
3609 )
3610 .unwrap();
3611 let mut merged_keys = default_schema.keys.clone();
3612 for (key, user_value_types) in user_schema.keys {
3613 if let Some(default_value_types) = merged_keys.get(&key) {
3614 let merged_value_types = Schema::merge_value_types(
3615 default_value_types,
3616 &user_value_types,
3617 KnnIndex::Hnsw,
3618 )
3619 .unwrap();
3620 merged_keys.insert(key, merged_value_types);
3621 } else {
3622 merged_keys.insert(key, user_value_types);
3623 }
3624 }
3625 Schema {
3626 defaults: merged_defaults,
3627 keys: merged_keys,
3628 cmek: None,
3629 }
3630 };
3631
3632 assert!(
3636 !result
3637 .defaults
3638 .string
3639 .as_ref()
3640 .unwrap()
3641 .string_inverted_index
3642 .as_ref()
3643 .unwrap()
3644 .enabled
3645 );
3646 assert!(
3647 result
3648 .defaults
3649 .string
3650 .as_ref()
3651 .unwrap()
3652 .fts_index
3653 .as_ref()
3654 .unwrap()
3655 .enabled
3656 );
3657
3658 let vector_config = &result
3659 .defaults
3660 .float_list
3661 .as_ref()
3662 .unwrap()
3663 .vector_index
3664 .as_ref()
3665 .unwrap()
3666 .config;
3667 assert_eq!(vector_config.space, Some(Space::Ip));
3668 assert_eq!(vector_config.embedding_function, None); assert_eq!(
3670 vector_config.source_key,
3671 Some("custom_vector_key".to_string())
3672 );
3673 assert_eq!(
3674 vector_config.hnsw.as_ref().unwrap().ef_construction,
3675 Some(400)
3676 );
3677 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3678 assert_eq!(
3679 vector_config.hnsw.as_ref().unwrap().ef_search,
3680 Some(default_search_ef())
3681 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
3689 assert!(
3690 custom_override
3691 .string
3692 .as_ref()
3693 .unwrap()
3694 .fts_index
3695 .as_ref()
3696 .unwrap()
3697 .enabled
3698 );
3699 assert!(custom_override
3700 .string
3701 .as_ref()
3702 .unwrap()
3703 .string_inverted_index
3704 .is_none());
3705 }
3706
3707 #[test]
3708 fn test_reconcile_with_collection_config_default_config() {
3709 let collection_config = InternalCollectionConfiguration::default_hnsw();
3711 let schema = Schema::try_from(&collection_config).unwrap();
3712
3713 let result =
3714 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3715 .unwrap();
3716 assert_eq!(result, schema);
3717 }
3718
3719 #[test]
3721 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
3722 let collection_config = InternalCollectionConfiguration::default_hnsw();
3723 let schema = Schema::new_default(KnnIndex::Hnsw);
3724 let result =
3725 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3726 .unwrap();
3727
3728 assert!(result.defaults.float_list.is_some());
3730 assert!(result
3731 .defaults
3732 .float_list
3733 .as_ref()
3734 .unwrap()
3735 .vector_index
3736 .as_ref()
3737 .unwrap()
3738 .config
3739 .hnsw
3740 .is_some());
3741 assert!(result
3742 .defaults
3743 .float_list
3744 .as_ref()
3745 .unwrap()
3746 .vector_index
3747 .as_ref()
3748 .unwrap()
3749 .config
3750 .spann
3751 .is_none());
3752 }
3753
3754 #[test]
3755 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
3756 let collection_config = InternalCollectionConfiguration::default_hnsw();
3757 let schema = Schema::new_default(KnnIndex::Hnsw);
3758 let result =
3759 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3760 .unwrap();
3761
3762 assert!(result.defaults.float_list.is_some());
3764 assert!(result
3765 .defaults
3766 .float_list
3767 .as_ref()
3768 .unwrap()
3769 .vector_index
3770 .as_ref()
3771 .unwrap()
3772 .config
3773 .spann
3774 .is_some());
3775 assert!(result
3776 .defaults
3777 .float_list
3778 .as_ref()
3779 .unwrap()
3780 .vector_index
3781 .as_ref()
3782 .unwrap()
3783 .config
3784 .hnsw
3785 .is_none());
3786 }
3787
3788 #[test]
3789 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
3790 let collection_config = InternalCollectionConfiguration::default_hnsw();
3791 let schema = Schema::new_default(KnnIndex::Spann);
3792 let result =
3793 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3794 .unwrap();
3795
3796 assert!(result.defaults.float_list.is_some());
3798 assert!(result
3799 .defaults
3800 .float_list
3801 .as_ref()
3802 .unwrap()
3803 .vector_index
3804 .as_ref()
3805 .unwrap()
3806 .config
3807 .hnsw
3808 .is_some());
3809 assert!(result
3810 .defaults
3811 .float_list
3812 .as_ref()
3813 .unwrap()
3814 .vector_index
3815 .as_ref()
3816 .unwrap()
3817 .config
3818 .spann
3819 .is_none());
3820 }
3821
3822 #[test]
3823 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
3824 let collection_config = InternalCollectionConfiguration::default_hnsw();
3825 let schema = Schema::new_default(KnnIndex::Spann);
3826 let result =
3827 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3828 .unwrap();
3829
3830 assert!(result.defaults.float_list.is_some());
3832 assert!(result
3833 .defaults
3834 .float_list
3835 .as_ref()
3836 .unwrap()
3837 .vector_index
3838 .as_ref()
3839 .unwrap()
3840 .config
3841 .spann
3842 .is_some());
3843 assert!(result
3844 .defaults
3845 .float_list
3846 .as_ref()
3847 .unwrap()
3848 .vector_index
3849 .as_ref()
3850 .unwrap()
3851 .config
3852 .hnsw
3853 .is_none());
3854 }
3855
3856 #[test]
3857 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
3858 let collection_config = InternalCollectionConfiguration::default_spann();
3859 let schema = Schema::new_default(KnnIndex::Spann);
3860 let result =
3861 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3862 .unwrap();
3863
3864 assert!(result.defaults.float_list.is_some());
3866 assert!(result
3867 .defaults
3868 .float_list
3869 .as_ref()
3870 .unwrap()
3871 .vector_index
3872 .as_ref()
3873 .unwrap()
3874 .config
3875 .hnsw
3876 .is_some());
3877 assert!(result
3878 .defaults
3879 .float_list
3880 .as_ref()
3881 .unwrap()
3882 .vector_index
3883 .as_ref()
3884 .unwrap()
3885 .config
3886 .spann
3887 .is_none());
3888 }
3889
3890 #[test]
3891 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
3892 let collection_config = InternalCollectionConfiguration::default_spann();
3893 let schema = Schema::new_default(KnnIndex::Spann);
3894 let result =
3895 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3896 .unwrap();
3897
3898 assert!(result.defaults.float_list.is_some());
3900 assert!(result
3901 .defaults
3902 .float_list
3903 .as_ref()
3904 .unwrap()
3905 .vector_index
3906 .as_ref()
3907 .unwrap()
3908 .config
3909 .spann
3910 .is_some());
3911 assert!(result
3912 .defaults
3913 .float_list
3914 .as_ref()
3915 .unwrap()
3916 .vector_index
3917 .as_ref()
3918 .unwrap()
3919 .config
3920 .hnsw
3921 .is_none());
3922 assert_eq!(
3924 result
3925 .defaults
3926 .float_list
3927 .as_ref()
3928 .unwrap()
3929 .vector_index
3930 .as_ref()
3931 .unwrap()
3932 .config
3933 .source_key,
3934 None
3935 );
3936 }
3937
3938 #[test]
3939 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
3940 let collection_config = InternalCollectionConfiguration::default_spann();
3941 let schema = Schema::new_default(KnnIndex::Hnsw);
3942 let result =
3943 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3944 .unwrap();
3945
3946 assert!(result.defaults.float_list.is_some());
3948 assert!(result
3949 .defaults
3950 .float_list
3951 .as_ref()
3952 .unwrap()
3953 .vector_index
3954 .as_ref()
3955 .unwrap()
3956 .config
3957 .hnsw
3958 .is_some());
3959 assert!(result
3960 .defaults
3961 .float_list
3962 .as_ref()
3963 .unwrap()
3964 .vector_index
3965 .as_ref()
3966 .unwrap()
3967 .config
3968 .spann
3969 .is_none());
3970 }
3971
3972 #[test]
3973 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
3974 let collection_config = InternalCollectionConfiguration::default_spann();
3975 let schema = Schema::new_default(KnnIndex::Hnsw);
3976 let result =
3977 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3978 .unwrap();
3979
3980 assert!(result.defaults.float_list.is_some());
3982 assert!(result
3983 .defaults
3984 .float_list
3985 .as_ref()
3986 .unwrap()
3987 .vector_index
3988 .as_ref()
3989 .unwrap()
3990 .config
3991 .spann
3992 .is_some());
3993 assert!(result
3994 .defaults
3995 .float_list
3996 .as_ref()
3997 .unwrap()
3998 .vector_index
3999 .as_ref()
4000 .unwrap()
4001 .config
4002 .hnsw
4003 .is_none());
4004 }
4005
4006 #[test]
4007 fn test_defaults_source_key_not_document() {
4008 let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4010 let schema_spann = Schema::new_default(KnnIndex::Spann);
4011
4012 let defaults_hnsw = schema_hnsw
4014 .defaults
4015 .float_list
4016 .as_ref()
4017 .unwrap()
4018 .vector_index
4019 .as_ref()
4020 .unwrap();
4021 assert_eq!(defaults_hnsw.config.source_key, None);
4022
4023 let defaults_spann = schema_spann
4025 .defaults
4026 .float_list
4027 .as_ref()
4028 .unwrap()
4029 .vector_index
4030 .as_ref()
4031 .unwrap();
4032 assert_eq!(defaults_spann.config.source_key, None);
4033
4034 let collection_config_hnsw = InternalCollectionConfiguration {
4037 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4038 ef_construction: 300,
4039 max_neighbors: 32,
4040 ef_search: 50,
4041 num_threads: 8,
4042 batch_size: 200,
4043 sync_threshold: 2000,
4044 resize_factor: 1.5,
4045 space: Space::L2,
4046 }),
4047 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4048 };
4049 let result_hnsw = Schema::reconcile_with_collection_config(
4050 &schema_hnsw,
4051 &collection_config_hnsw,
4052 KnnIndex::Hnsw,
4053 )
4054 .unwrap();
4055 let reconciled_defaults_hnsw = result_hnsw
4056 .defaults
4057 .float_list
4058 .as_ref()
4059 .unwrap()
4060 .vector_index
4061 .as_ref()
4062 .unwrap();
4063 assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4064
4065 let collection_config_spann = InternalCollectionConfiguration {
4066 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4067 search_nprobe: 20,
4068 search_rng_factor: 3.0,
4069 search_rng_epsilon: 0.2,
4070 nreplica_count: 5,
4071 write_rng_factor: 2.0,
4072 write_rng_epsilon: 0.1,
4073 split_threshold: 2000,
4074 num_samples_kmeans: 200,
4075 initial_lambda: 0.8,
4076 reassign_neighbor_count: 100,
4077 merge_threshold: 800,
4078 num_centers_to_merge_to: 20,
4079 write_nprobe: 10,
4080 ef_construction: 400,
4081 ef_search: 60,
4082 max_neighbors: 24,
4083 space: Space::Cosine,
4084 }),
4085 embedding_function: None,
4086 };
4087 let result_spann = Schema::reconcile_with_collection_config(
4088 &schema_spann,
4089 &collection_config_spann,
4090 KnnIndex::Spann,
4091 )
4092 .unwrap();
4093 let reconciled_defaults_spann = result_spann
4094 .defaults
4095 .float_list
4096 .as_ref()
4097 .unwrap()
4098 .vector_index
4099 .as_ref()
4100 .unwrap();
4101 assert_eq!(reconciled_defaults_spann.config.source_key, None);
4102
4103 let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4105 let embedding_vector_index_hnsw = embedding_hnsw
4106 .float_list
4107 .as_ref()
4108 .unwrap()
4109 .vector_index
4110 .as_ref()
4111 .unwrap();
4112 assert_eq!(
4113 embedding_vector_index_hnsw.config.source_key,
4114 Some(DOCUMENT_KEY.to_string())
4115 );
4116
4117 let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4118 let embedding_vector_index_spann = embedding_spann
4119 .float_list
4120 .as_ref()
4121 .unwrap()
4122 .vector_index
4123 .as_ref()
4124 .unwrap();
4125 assert_eq!(
4126 embedding_vector_index_spann.config.source_key,
4127 Some(DOCUMENT_KEY.to_string())
4128 );
4129 }
4130
4131 #[test]
4132 fn test_try_from_source_key() {
4133 let collection_config_hnsw = InternalCollectionConfiguration {
4138 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4139 ef_construction: 300,
4140 max_neighbors: 32,
4141 ef_search: 50,
4142 num_threads: 8,
4143 batch_size: 200,
4144 sync_threshold: 2000,
4145 resize_factor: 1.5,
4146 space: Space::L2,
4147 }),
4148 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4149 };
4150 let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4151
4152 let defaults_hnsw = schema_hnsw
4154 .defaults
4155 .float_list
4156 .as_ref()
4157 .unwrap()
4158 .vector_index
4159 .as_ref()
4160 .unwrap();
4161 assert_eq!(defaults_hnsw.config.source_key, None);
4162
4163 let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4165 let embedding_vector_index_hnsw = embedding_hnsw
4166 .float_list
4167 .as_ref()
4168 .unwrap()
4169 .vector_index
4170 .as_ref()
4171 .unwrap();
4172 assert_eq!(
4173 embedding_vector_index_hnsw.config.source_key,
4174 Some(DOCUMENT_KEY.to_string())
4175 );
4176
4177 let collection_config_spann = InternalCollectionConfiguration {
4179 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4180 search_nprobe: 20,
4181 search_rng_factor: 3.0,
4182 search_rng_epsilon: 0.2,
4183 nreplica_count: 5,
4184 write_rng_factor: 2.0,
4185 write_rng_epsilon: 0.1,
4186 split_threshold: 2000,
4187 num_samples_kmeans: 200,
4188 initial_lambda: 0.8,
4189 reassign_neighbor_count: 100,
4190 merge_threshold: 800,
4191 num_centers_to_merge_to: 20,
4192 write_nprobe: 10,
4193 ef_construction: 400,
4194 ef_search: 60,
4195 max_neighbors: 24,
4196 space: Space::Cosine,
4197 }),
4198 embedding_function: None,
4199 };
4200 let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4201
4202 let defaults_spann = schema_spann
4204 .defaults
4205 .float_list
4206 .as_ref()
4207 .unwrap()
4208 .vector_index
4209 .as_ref()
4210 .unwrap();
4211 assert_eq!(defaults_spann.config.source_key, None);
4212
4213 let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4215 let embedding_vector_index_spann = embedding_spann
4216 .float_list
4217 .as_ref()
4218 .unwrap()
4219 .vector_index
4220 .as_ref()
4221 .unwrap();
4222 assert_eq!(
4223 embedding_vector_index_spann.config.source_key,
4224 Some(DOCUMENT_KEY.to_string())
4225 );
4226 }
4227
4228 #[test]
4229 fn test_default_hnsw_with_default_embedding_function() {
4230 use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4234
4235 let collection_config = InternalCollectionConfiguration {
4236 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4237 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4238 EmbeddingFunctionNewConfiguration {
4239 name: "default".to_string(),
4240 config: serde_json::json!({}),
4241 },
4242 )),
4243 };
4244
4245 assert!(collection_config.is_default());
4247
4248 let schema = Schema::new_default(KnnIndex::Hnsw);
4249 let result =
4250 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4251 .unwrap();
4252
4253 let defaults = result
4255 .defaults
4256 .float_list
4257 .as_ref()
4258 .unwrap()
4259 .vector_index
4260 .as_ref()
4261 .unwrap();
4262 assert_eq!(defaults.config.source_key, None);
4263
4264 let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4266 let embedding_vector_index = embedding
4267 .float_list
4268 .as_ref()
4269 .unwrap()
4270 .vector_index
4271 .as_ref()
4272 .unwrap();
4273 assert_eq!(
4274 embedding_vector_index.config.source_key,
4275 Some(DOCUMENT_KEY.to_string())
4276 );
4277
4278 let vector_index_config = defaults.config.clone();
4280 assert!(vector_index_config.spann.is_some());
4281 assert!(vector_index_config.hnsw.is_none());
4282
4283 assert_eq!(
4285 embedding_vector_index.config.embedding_function,
4286 Some(EmbeddingFunctionConfiguration::Known(
4287 EmbeddingFunctionNewConfiguration {
4288 name: "default".to_string(),
4289 config: serde_json::json!({}),
4290 },
4291 ))
4292 );
4293 assert_eq!(
4294 defaults.config.embedding_function,
4295 Some(EmbeddingFunctionConfiguration::Known(
4296 EmbeddingFunctionNewConfiguration {
4297 name: "default".to_string(),
4298 config: serde_json::json!({}),
4299 },
4300 ))
4301 );
4302 }
4303
4304 #[test]
4305 fn test_reconcile_with_collection_config_both_non_default() {
4306 let mut schema = Schema::new_default(KnnIndex::Hnsw);
4308 schema.defaults.string = Some(StringValueType {
4309 fts_index: Some(FtsIndexType {
4310 enabled: true,
4311 config: FtsIndexConfig {},
4312 }),
4313 string_inverted_index: None,
4314 });
4315
4316 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4317 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4319 {
4320 hnsw_config.ef_construction = 500; }
4322
4323 let result = Schema::reconcile_schema_and_config(
4325 Some(&schema),
4326 Some(&collection_config),
4327 KnnIndex::Spann,
4328 );
4329 assert!(result.is_err());
4330 assert!(matches!(
4331 result.unwrap_err(),
4332 SchemaError::ConfigAndSchemaConflict
4333 ));
4334 }
4335
4336 #[test]
4337 fn test_reconcile_with_collection_config_hnsw_override() {
4338 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
4342 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4343 ef_construction: 300,
4344 max_neighbors: 32,
4345 ef_search: 50,
4346 num_threads: 8,
4347 batch_size: 200,
4348 sync_threshold: 2000,
4349 resize_factor: 1.5,
4350 space: Space::L2,
4351 }),
4352 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4353 };
4354
4355 let result =
4356 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4357 .unwrap();
4358
4359 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4361 let vector_index = embedding_override
4362 .float_list
4363 .as_ref()
4364 .unwrap()
4365 .vector_index
4366 .as_ref()
4367 .unwrap();
4368
4369 assert!(vector_index.enabled);
4370 assert_eq!(vector_index.config.space, Some(Space::L2));
4371 assert_eq!(
4372 vector_index.config.embedding_function,
4373 Some(EmbeddingFunctionConfiguration::Legacy)
4374 );
4375 assert_eq!(
4376 vector_index.config.source_key,
4377 Some(DOCUMENT_KEY.to_string())
4378 );
4379
4380 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4381 assert_eq!(hnsw_config.ef_construction, Some(300));
4382 assert_eq!(hnsw_config.max_neighbors, Some(32));
4383 assert_eq!(hnsw_config.ef_search, Some(50));
4384 assert_eq!(hnsw_config.num_threads, Some(8));
4385 assert_eq!(hnsw_config.batch_size, Some(200));
4386 assert_eq!(hnsw_config.sync_threshold, Some(2000));
4387 assert_eq!(hnsw_config.resize_factor, Some(1.5));
4388
4389 assert!(vector_index.config.spann.is_none());
4390 }
4391
4392 #[test]
4393 fn test_reconcile_with_collection_config_spann_override() {
4394 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
4398 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4399 search_nprobe: 20,
4400 search_rng_factor: 3.0,
4401 search_rng_epsilon: 0.2,
4402 nreplica_count: 5,
4403 write_rng_factor: 2.0,
4404 write_rng_epsilon: 0.1,
4405 split_threshold: 2000,
4406 num_samples_kmeans: 200,
4407 initial_lambda: 0.8,
4408 reassign_neighbor_count: 100,
4409 merge_threshold: 800,
4410 num_centers_to_merge_to: 20,
4411 write_nprobe: 10,
4412 ef_construction: 400,
4413 ef_search: 60,
4414 max_neighbors: 24,
4415 space: Space::Cosine,
4416 }),
4417 embedding_function: None,
4418 };
4419
4420 let result =
4421 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4422 .unwrap();
4423
4424 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4426 let vector_index = embedding_override
4427 .float_list
4428 .as_ref()
4429 .unwrap()
4430 .vector_index
4431 .as_ref()
4432 .unwrap();
4433
4434 assert!(vector_index.enabled);
4435 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4436 assert_eq!(vector_index.config.embedding_function, None);
4437 assert_eq!(
4438 vector_index.config.source_key,
4439 Some(DOCUMENT_KEY.to_string())
4440 );
4441
4442 assert!(vector_index.config.hnsw.is_none());
4443
4444 let spann_config = vector_index.config.spann.as_ref().unwrap();
4445 assert_eq!(spann_config.search_nprobe, Some(20));
4446 assert_eq!(spann_config.search_rng_factor, Some(3.0));
4447 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4448 assert_eq!(spann_config.nreplica_count, Some(5));
4449 assert_eq!(spann_config.write_rng_factor, Some(2.0));
4450 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4451 assert_eq!(spann_config.split_threshold, Some(2000));
4452 assert_eq!(spann_config.num_samples_kmeans, Some(200));
4453 assert_eq!(spann_config.initial_lambda, Some(0.8));
4454 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4455 assert_eq!(spann_config.merge_threshold, Some(800));
4456 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4457 assert_eq!(spann_config.write_nprobe, Some(10));
4458 assert_eq!(spann_config.ef_construction, Some(400));
4459 assert_eq!(spann_config.ef_search, Some(60));
4460 assert_eq!(spann_config.max_neighbors, Some(24));
4461 }
4462
4463 #[test]
4464 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4465 let schema = Schema::new_default(KnnIndex::Hnsw);
4468
4469 let collection_config = InternalCollectionConfiguration {
4470 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4471 ef_construction: 300,
4472 max_neighbors: 32,
4473 ef_search: 50,
4474 num_threads: 8,
4475 batch_size: 200,
4476 sync_threshold: 2000,
4477 resize_factor: 1.5,
4478 space: Space::L2,
4479 }),
4480 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4481 };
4482
4483 let result =
4484 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4485 .unwrap();
4486
4487 let defaults_vector_index = result
4489 .defaults
4490 .float_list
4491 .as_ref()
4492 .unwrap()
4493 .vector_index
4494 .as_ref()
4495 .unwrap();
4496
4497 assert!(!defaults_vector_index.enabled);
4499 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4501 assert_eq!(
4502 defaults_vector_index.config.embedding_function,
4503 Some(EmbeddingFunctionConfiguration::Legacy)
4504 );
4505 assert_eq!(defaults_vector_index.config.source_key, None);
4506 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4507 assert_eq!(defaults_hnsw.ef_construction, Some(300));
4508 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4509
4510 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4512 let embedding_vector_index = embedding_override
4513 .float_list
4514 .as_ref()
4515 .unwrap()
4516 .vector_index
4517 .as_ref()
4518 .unwrap();
4519
4520 assert!(embedding_vector_index.enabled);
4522 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4524 assert_eq!(
4525 embedding_vector_index.config.embedding_function,
4526 Some(EmbeddingFunctionConfiguration::Legacy)
4527 );
4528 assert_eq!(
4529 embedding_vector_index.config.source_key,
4530 Some(DOCUMENT_KEY.to_string())
4531 );
4532 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4533 assert_eq!(embedding_hnsw.ef_construction, Some(300));
4534 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4535 }
4536
4537 #[test]
4538 fn test_is_schema_default() {
4539 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4541 assert!(default_hnsw_schema.is_default());
4542
4543 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4544 assert!(default_spann_schema.is_default());
4545
4546 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4548 if let Some(ref mut string_type) = modified_schema.defaults.string {
4550 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4551 string_inverted.enabled = false; }
4553 }
4554 assert!(!modified_schema.is_default());
4555
4556 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4558 schema_with_extra_overrides
4559 .keys
4560 .insert("custom_key".to_string(), ValueTypes::default());
4561 assert!(!schema_with_extra_overrides.is_default());
4562 }
4563
4564 #[test]
4565 fn test_is_schema_default_with_space() {
4566 let schema = Schema::new_default(KnnIndex::Hnsw);
4567 assert!(schema.is_default());
4568
4569 let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4570 if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4571 if let Some(ref mut vector_index) = float_list.vector_index {
4572 vector_index.config.space = Some(Space::Cosine);
4573 }
4574 }
4575 assert!(!schema_with_space.is_default());
4576
4577 let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4578 if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4579 .keys
4580 .get_mut(EMBEDDING_KEY)
4581 {
4582 if let Some(ref mut float_list) = embedding_key.float_list {
4583 if let Some(ref mut vector_index) = float_list.vector_index {
4584 vector_index.config.space = Some(Space::Cosine);
4585 }
4586 }
4587 }
4588 assert!(!schema_with_space_in_embedding_key.is_default());
4589 }
4590
4591 #[test]
4592 fn test_is_schema_default_with_embedding_function() {
4593 let schema = Schema::new_default(KnnIndex::Hnsw);
4594 assert!(schema.is_default());
4595
4596 let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4597 if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4598 if let Some(ref mut vector_index) = float_list.vector_index {
4599 vector_index.config.embedding_function =
4600 Some(EmbeddingFunctionConfiguration::Legacy);
4601 }
4602 }
4603 assert!(!schema_with_embedding_function.is_default());
4604
4605 let mut schema_with_embedding_function_in_embedding_key =
4606 Schema::new_default(KnnIndex::Spann);
4607 if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4608 .keys
4609 .get_mut(EMBEDDING_KEY)
4610 {
4611 if let Some(ref mut float_list) = embedding_key.float_list {
4612 if let Some(ref mut vector_index) = float_list.vector_index {
4613 vector_index.config.embedding_function =
4614 Some(EmbeddingFunctionConfiguration::Legacy);
4615 }
4616 }
4617 }
4618 assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4619 }
4620
4621 #[test]
4622 fn test_add_merges_keys_by_value_type() {
4623 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4624 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4625
4626 let string_override = ValueTypes {
4627 string: Some(StringValueType {
4628 string_inverted_index: Some(StringInvertedIndexType {
4629 enabled: true,
4630 config: StringInvertedIndexConfig {},
4631 }),
4632 fts_index: None,
4633 }),
4634 ..Default::default()
4635 };
4636 schema_a
4637 .keys
4638 .insert("custom_field".to_string(), string_override);
4639
4640 let float_override = ValueTypes {
4641 float: Some(FloatValueType {
4642 float_inverted_index: Some(FloatInvertedIndexType {
4643 enabled: true,
4644 config: FloatInvertedIndexConfig {},
4645 }),
4646 }),
4647 ..Default::default()
4648 };
4649 schema_b
4650 .keys
4651 .insert("custom_field".to_string(), float_override);
4652
4653 let merged = schema_a.merge(&schema_b).unwrap();
4654 let merged_override = merged.keys.get("custom_field").unwrap();
4655
4656 assert!(merged_override.string.is_some());
4657 assert!(merged_override.float.is_some());
4658 assert!(
4659 merged_override
4660 .string
4661 .as_ref()
4662 .unwrap()
4663 .string_inverted_index
4664 .as_ref()
4665 .unwrap()
4666 .enabled
4667 );
4668 assert!(
4669 merged_override
4670 .float
4671 .as_ref()
4672 .unwrap()
4673 .float_inverted_index
4674 .as_ref()
4675 .unwrap()
4676 .enabled
4677 );
4678 }
4679
4680 #[test]
4681 fn test_add_rejects_different_defaults() {
4682 let schema_a = Schema::new_default(KnnIndex::Hnsw);
4683 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4684
4685 if let Some(string_type) = schema_b.defaults.string.as_mut() {
4686 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
4687 string_index.enabled = false;
4688 }
4689 }
4690
4691 let err = schema_a.merge(&schema_b).unwrap_err();
4692 assert!(matches!(err, SchemaError::DefaultsMismatch));
4693 }
4694
4695 #[test]
4696 fn test_add_detects_conflicting_value_type_configuration() {
4697 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4698 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4699
4700 let string_override_enabled = ValueTypes {
4701 string: Some(StringValueType {
4702 string_inverted_index: Some(StringInvertedIndexType {
4703 enabled: true,
4704 config: StringInvertedIndexConfig {},
4705 }),
4706 fts_index: None,
4707 }),
4708 ..Default::default()
4709 };
4710 schema_a
4711 .keys
4712 .insert("custom_field".to_string(), string_override_enabled);
4713
4714 let string_override_disabled = ValueTypes {
4715 string: Some(StringValueType {
4716 string_inverted_index: Some(StringInvertedIndexType {
4717 enabled: false,
4718 config: StringInvertedIndexConfig {},
4719 }),
4720 fts_index: None,
4721 }),
4722 ..Default::default()
4723 };
4724 schema_b
4725 .keys
4726 .insert("custom_field".to_string(), string_override_disabled);
4727
4728 let err = schema_a.merge(&schema_b).unwrap_err();
4729 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
4730 }
4731
4732 #[test]
4734 fn test_backward_compatibility_aliases() {
4735 let old_format_json = r###"{
4737 "defaults": {
4738 "#string": {
4739 "$fts_index": {
4740 "enabled": true,
4741 "config": {}
4742 }
4743 },
4744 "#int": {
4745 "$int_inverted_index": {
4746 "enabled": true,
4747 "config": {}
4748 }
4749 },
4750 "#float_list": {
4751 "$vector_index": {
4752 "enabled": true,
4753 "config": {
4754 "spann": {
4755 "search_nprobe": 10
4756 }
4757 }
4758 }
4759 }
4760 },
4761 "key_overrides": {
4762 "#document": {
4763 "#string": {
4764 "$fts_index": {
4765 "enabled": false,
4766 "config": {}
4767 }
4768 }
4769 }
4770 }
4771 }"###;
4772
4773 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
4774
4775 let new_format_json = r###"{
4777 "defaults": {
4778 "string": {
4779 "fts_index": {
4780 "enabled": true,
4781 "config": {}
4782 }
4783 },
4784 "int": {
4785 "int_inverted_index": {
4786 "enabled": true,
4787 "config": {}
4788 }
4789 },
4790 "float_list": {
4791 "vector_index": {
4792 "enabled": true,
4793 "config": {
4794 "spann": {
4795 "search_nprobe": 10
4796 }
4797 }
4798 }
4799 }
4800 },
4801 "keys": {
4802 "#document": {
4803 "string": {
4804 "fts_index": {
4805 "enabled": false,
4806 "config": {}
4807 }
4808 }
4809 }
4810 }
4811 }"###;
4812
4813 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4814
4815 assert_eq!(schema_from_old, schema_from_new);
4817
4818 assert!(schema_from_old.defaults.string.is_some());
4820 assert!(schema_from_old
4821 .defaults
4822 .string
4823 .as_ref()
4824 .unwrap()
4825 .fts_index
4826 .is_some());
4827 assert!(
4828 schema_from_old
4829 .defaults
4830 .string
4831 .as_ref()
4832 .unwrap()
4833 .fts_index
4834 .as_ref()
4835 .unwrap()
4836 .enabled
4837 );
4838
4839 assert!(schema_from_old.defaults.int.is_some());
4840 assert!(schema_from_old
4841 .defaults
4842 .int
4843 .as_ref()
4844 .unwrap()
4845 .int_inverted_index
4846 .is_some());
4847
4848 assert!(schema_from_old.defaults.float_list.is_some());
4849 assert!(schema_from_old
4850 .defaults
4851 .float_list
4852 .as_ref()
4853 .unwrap()
4854 .vector_index
4855 .is_some());
4856
4857 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4858 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4859 assert!(doc_override.string.is_some());
4860 assert!(
4861 !doc_override
4862 .string
4863 .as_ref()
4864 .unwrap()
4865 .fts_index
4866 .as_ref()
4867 .unwrap()
4868 .enabled
4869 );
4870
4871 let serialized = serde_json::to_string(&schema_from_old).unwrap();
4873
4874 assert!(serialized.contains(r#""keys":"#));
4876 assert!(serialized.contains(r#""string":"#));
4877 assert!(serialized.contains(r#""fts_index":"#));
4878 assert!(serialized.contains(r#""int_inverted_index":"#));
4879 assert!(serialized.contains(r#""vector_index":"#));
4880
4881 assert!(!serialized.contains(r#""key_overrides":"#));
4883 assert!(!serialized.contains(r###""#string":"###));
4884 assert!(!serialized.contains(r###""$fts_index":"###));
4885 assert!(!serialized.contains(r###""$int_inverted_index":"###));
4886 assert!(!serialized.contains(r###""$vector_index":"###));
4887 }
4888
4889 #[test]
4890 fn test_hnsw_index_config_validation() {
4891 use validator::Validate;
4892
4893 let valid_config = HnswIndexConfig {
4895 batch_size: Some(10),
4896 sync_threshold: Some(100),
4897 ef_construction: Some(100),
4898 max_neighbors: Some(16),
4899 ..Default::default()
4900 };
4901 assert!(valid_config.validate().is_ok());
4902
4903 let invalid_batch_size = HnswIndexConfig {
4905 batch_size: Some(1),
4906 ..Default::default()
4907 };
4908 assert!(invalid_batch_size.validate().is_err());
4909
4910 let invalid_sync_threshold = HnswIndexConfig {
4912 sync_threshold: Some(1),
4913 ..Default::default()
4914 };
4915 assert!(invalid_sync_threshold.validate().is_err());
4916
4917 let boundary_config = HnswIndexConfig {
4919 batch_size: Some(2),
4920 sync_threshold: Some(2),
4921 ..Default::default()
4922 };
4923 assert!(boundary_config.validate().is_ok());
4924
4925 let all_none_config = HnswIndexConfig {
4927 ..Default::default()
4928 };
4929 assert!(all_none_config.validate().is_ok());
4930
4931 let other_fields_config = HnswIndexConfig {
4933 ef_construction: Some(1),
4934 max_neighbors: Some(1),
4935 ef_search: Some(1),
4936 num_threads: Some(1),
4937 resize_factor: Some(0.1),
4938 ..Default::default()
4939 };
4940 assert!(other_fields_config.validate().is_ok());
4941 }
4942
4943 #[test]
4944 fn test_spann_index_config_validation() {
4945 use validator::Validate;
4946
4947 let valid_config = SpannIndexConfig {
4949 write_nprobe: Some(32),
4950 nreplica_count: Some(4),
4951 split_threshold: Some(100),
4952 merge_threshold: Some(50),
4953 reassign_neighbor_count: Some(32),
4954 num_centers_to_merge_to: Some(4),
4955 ef_construction: Some(100),
4956 ef_search: Some(100),
4957 max_neighbors: Some(32),
4958 search_rng_factor: Some(1.0),
4959 write_rng_factor: Some(1.0),
4960 search_rng_epsilon: Some(7.5),
4961 write_rng_epsilon: Some(7.5),
4962 ..Default::default()
4963 };
4964 assert!(valid_config.validate().is_ok());
4965
4966 let invalid_write_nprobe = SpannIndexConfig {
4968 write_nprobe: Some(200),
4969 ..Default::default()
4970 };
4971 assert!(invalid_write_nprobe.validate().is_err());
4972
4973 let invalid_split_threshold = SpannIndexConfig {
4975 split_threshold: Some(10),
4976 ..Default::default()
4977 };
4978 assert!(invalid_split_threshold.validate().is_err());
4979
4980 let invalid_split_threshold_high = SpannIndexConfig {
4982 split_threshold: Some(250),
4983 ..Default::default()
4984 };
4985 assert!(invalid_split_threshold_high.validate().is_err());
4986
4987 let invalid_nreplica = SpannIndexConfig {
4989 nreplica_count: Some(10),
4990 ..Default::default()
4991 };
4992 assert!(invalid_nreplica.validate().is_err());
4993
4994 let invalid_reassign = SpannIndexConfig {
4996 reassign_neighbor_count: Some(100),
4997 ..Default::default()
4998 };
4999 assert!(invalid_reassign.validate().is_err());
5000
5001 let invalid_merge_threshold_low = SpannIndexConfig {
5003 merge_threshold: Some(5),
5004 ..Default::default()
5005 };
5006 assert!(invalid_merge_threshold_low.validate().is_err());
5007
5008 let invalid_merge_threshold_high = SpannIndexConfig {
5009 merge_threshold: Some(150),
5010 ..Default::default()
5011 };
5012 assert!(invalid_merge_threshold_high.validate().is_err());
5013
5014 let invalid_num_centers = SpannIndexConfig {
5016 num_centers_to_merge_to: Some(10),
5017 ..Default::default()
5018 };
5019 assert!(invalid_num_centers.validate().is_err());
5020
5021 let invalid_ef_construction = SpannIndexConfig {
5023 ef_construction: Some(300),
5024 ..Default::default()
5025 };
5026 assert!(invalid_ef_construction.validate().is_err());
5027
5028 let invalid_ef_search = SpannIndexConfig {
5030 ef_search: Some(300),
5031 ..Default::default()
5032 };
5033 assert!(invalid_ef_search.validate().is_err());
5034
5035 let invalid_max_neighbors = SpannIndexConfig {
5037 max_neighbors: Some(100),
5038 ..Default::default()
5039 };
5040 assert!(invalid_max_neighbors.validate().is_err());
5041
5042 let invalid_search_nprobe = SpannIndexConfig {
5044 search_nprobe: Some(200),
5045 ..Default::default()
5046 };
5047 assert!(invalid_search_nprobe.validate().is_err());
5048
5049 let invalid_search_rng_factor_low = SpannIndexConfig {
5051 search_rng_factor: Some(0.9),
5052 ..Default::default()
5053 };
5054 assert!(invalid_search_rng_factor_low.validate().is_err());
5055
5056 let invalid_search_rng_factor_high = SpannIndexConfig {
5057 search_rng_factor: Some(1.1),
5058 ..Default::default()
5059 };
5060 assert!(invalid_search_rng_factor_high.validate().is_err());
5061
5062 let valid_search_rng_factor = SpannIndexConfig {
5064 search_rng_factor: Some(1.0),
5065 ..Default::default()
5066 };
5067 assert!(valid_search_rng_factor.validate().is_ok());
5068
5069 let invalid_search_rng_epsilon_low = SpannIndexConfig {
5071 search_rng_epsilon: Some(4.0),
5072 ..Default::default()
5073 };
5074 assert!(invalid_search_rng_epsilon_low.validate().is_err());
5075
5076 let invalid_search_rng_epsilon_high = SpannIndexConfig {
5077 search_rng_epsilon: Some(11.0),
5078 ..Default::default()
5079 };
5080 assert!(invalid_search_rng_epsilon_high.validate().is_err());
5081
5082 let valid_search_rng_epsilon = SpannIndexConfig {
5084 search_rng_epsilon: Some(7.5),
5085 ..Default::default()
5086 };
5087 assert!(valid_search_rng_epsilon.validate().is_ok());
5088
5089 let invalid_write_rng_factor_low = SpannIndexConfig {
5091 write_rng_factor: Some(0.9),
5092 ..Default::default()
5093 };
5094 assert!(invalid_write_rng_factor_low.validate().is_err());
5095
5096 let invalid_write_rng_factor_high = SpannIndexConfig {
5097 write_rng_factor: Some(1.1),
5098 ..Default::default()
5099 };
5100 assert!(invalid_write_rng_factor_high.validate().is_err());
5101
5102 let valid_write_rng_factor = SpannIndexConfig {
5104 write_rng_factor: Some(1.0),
5105 ..Default::default()
5106 };
5107 assert!(valid_write_rng_factor.validate().is_ok());
5108
5109 let invalid_write_rng_epsilon_low = SpannIndexConfig {
5111 write_rng_epsilon: Some(4.0),
5112 ..Default::default()
5113 };
5114 assert!(invalid_write_rng_epsilon_low.validate().is_err());
5115
5116 let invalid_write_rng_epsilon_high = SpannIndexConfig {
5117 write_rng_epsilon: Some(11.0),
5118 ..Default::default()
5119 };
5120 assert!(invalid_write_rng_epsilon_high.validate().is_err());
5121
5122 let valid_write_rng_epsilon = SpannIndexConfig {
5124 write_rng_epsilon: Some(7.5),
5125 ..Default::default()
5126 };
5127 assert!(valid_write_rng_epsilon.validate().is_ok());
5128
5129 let invalid_num_samples_kmeans = SpannIndexConfig {
5131 num_samples_kmeans: Some(1500),
5132 ..Default::default()
5133 };
5134 assert!(invalid_num_samples_kmeans.validate().is_err());
5135
5136 let valid_num_samples_kmeans = SpannIndexConfig {
5138 num_samples_kmeans: Some(500),
5139 ..Default::default()
5140 };
5141 assert!(valid_num_samples_kmeans.validate().is_ok());
5142
5143 let invalid_initial_lambda_high = SpannIndexConfig {
5145 initial_lambda: Some(150.0),
5146 ..Default::default()
5147 };
5148 assert!(invalid_initial_lambda_high.validate().is_err());
5149
5150 let invalid_initial_lambda_low = SpannIndexConfig {
5151 initial_lambda: Some(50.0),
5152 ..Default::default()
5153 };
5154 assert!(invalid_initial_lambda_low.validate().is_err());
5155
5156 let valid_initial_lambda = SpannIndexConfig {
5158 initial_lambda: Some(100.0),
5159 ..Default::default()
5160 };
5161 assert!(valid_initial_lambda.validate().is_ok());
5162
5163 let all_none_config = SpannIndexConfig {
5165 ..Default::default()
5166 };
5167 assert!(all_none_config.validate().is_ok());
5168 }
5169
5170 #[test]
5171 fn test_builder_pattern_crud_workflow() {
5172 let schema = Schema::new_default(KnnIndex::Hnsw)
5176 .create_index(
5177 None,
5178 IndexConfig::Vector(VectorIndexConfig {
5179 space: Some(Space::Cosine),
5180 embedding_function: None,
5181 source_key: None,
5182 hnsw: Some(HnswIndexConfig {
5183 ef_construction: Some(200),
5184 max_neighbors: Some(32),
5185 ef_search: Some(50),
5186 num_threads: None,
5187 batch_size: None,
5188 sync_threshold: None,
5189 resize_factor: None,
5190 }),
5191 spann: None,
5192 }),
5193 )
5194 .expect("vector config should succeed")
5195 .create_index(
5196 Some("category"),
5197 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5198 )
5199 .expect("string inverted on key should succeed")
5200 .create_index(
5201 Some("year"),
5202 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5203 )
5204 .expect("int inverted on key should succeed")
5205 .create_index(
5206 Some("rating"),
5207 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5208 )
5209 .expect("float inverted on key should succeed")
5210 .create_index(
5211 Some("is_active"),
5212 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5213 )
5214 .expect("bool inverted on key should succeed");
5215
5216 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5219 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5220 assert!(embedding.float_list.is_some());
5221 let vector_index = embedding
5222 .float_list
5223 .as_ref()
5224 .unwrap()
5225 .vector_index
5226 .as_ref()
5227 .unwrap();
5228 assert!(vector_index.enabled);
5229 assert_eq!(vector_index.config.space, Some(Space::Cosine));
5230 assert_eq!(
5231 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5232 Some(200)
5233 );
5234
5235 assert!(schema.keys.contains_key("category"));
5237 assert!(schema.keys.contains_key("year"));
5238 assert!(schema.keys.contains_key("rating"));
5239 assert!(schema.keys.contains_key("is_active"));
5240
5241 let category = schema.keys.get("category").unwrap();
5243 assert!(category.string.is_some());
5244 let string_idx = category
5245 .string
5246 .as_ref()
5247 .unwrap()
5248 .string_inverted_index
5249 .as_ref()
5250 .unwrap();
5251 assert!(string_idx.enabled);
5252
5253 let year = schema.keys.get("year").unwrap();
5255 assert!(year.int.is_some());
5256 let int_idx = year
5257 .int
5258 .as_ref()
5259 .unwrap()
5260 .int_inverted_index
5261 .as_ref()
5262 .unwrap();
5263 assert!(int_idx.enabled);
5264
5265 let schema = schema
5267 .delete_index(
5268 Some("category"),
5269 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5270 )
5271 .expect("delete string inverted should succeed")
5272 .delete_index(
5273 Some("year"),
5274 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5275 )
5276 .expect("delete int inverted should succeed");
5277
5278 let category = schema.keys.get("category").unwrap();
5280 let string_idx = category
5281 .string
5282 .as_ref()
5283 .unwrap()
5284 .string_inverted_index
5285 .as_ref()
5286 .unwrap();
5287 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
5290 let int_idx = year
5291 .int
5292 .as_ref()
5293 .unwrap()
5294 .int_inverted_index
5295 .as_ref()
5296 .unwrap();
5297 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
5301 let float_idx = rating
5302 .float
5303 .as_ref()
5304 .unwrap()
5305 .float_inverted_index
5306 .as_ref()
5307 .unwrap();
5308 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
5311 let bool_idx = is_active
5312 .boolean
5313 .as_ref()
5314 .unwrap()
5315 .bool_inverted_index
5316 .as_ref()
5317 .unwrap();
5318 assert!(bool_idx.enabled); }
5320
5321 #[test]
5322 fn test_builder_create_index_validation_errors() {
5323 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5330 Some("my_vectors"),
5331 IndexConfig::Vector(VectorIndexConfig {
5332 space: Some(Space::L2),
5333 embedding_function: None,
5334 source_key: None,
5335 hnsw: None,
5336 spann: None,
5337 }),
5338 );
5339 assert!(result.is_err());
5340 assert!(matches!(
5341 result.unwrap_err(),
5342 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5343 ));
5344
5345 let result = Schema::new_default(KnnIndex::Hnsw)
5347 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5348 assert!(result.is_err());
5349 assert!(matches!(
5350 result.unwrap_err(),
5351 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5352 ));
5353
5354 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5356 Some(DOCUMENT_KEY),
5357 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5358 );
5359 assert!(result.is_err());
5360 assert!(matches!(
5361 result.unwrap_err(),
5362 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5363 ));
5364
5365 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5367 Some(EMBEDDING_KEY),
5368 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5369 );
5370 assert!(result.is_err());
5371 assert!(matches!(
5372 result.unwrap_err(),
5373 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5374 ));
5375
5376 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5378 None,
5379 IndexConfig::SparseVector(SparseVectorIndexConfig {
5380 embedding_function: None,
5381 source_key: None,
5382 bm25: None,
5383 }),
5384 );
5385 assert!(result.is_err());
5386 assert!(matches!(
5387 result.unwrap_err(),
5388 SchemaBuilderError::SparseVectorRequiresKey
5389 ));
5390
5391 let result = Schema::new_default(KnnIndex::Hnsw)
5393 .create_index(
5394 Some("sparse1"),
5395 IndexConfig::SparseVector(SparseVectorIndexConfig {
5396 embedding_function: None,
5397 source_key: None,
5398 bm25: None,
5399 }),
5400 )
5401 .expect("first sparse should succeed")
5402 .create_index(
5403 Some("sparse2"),
5404 IndexConfig::SparseVector(SparseVectorIndexConfig {
5405 embedding_function: None,
5406 source_key: None,
5407 bm25: None,
5408 }),
5409 );
5410 assert!(result.is_err());
5411 assert!(matches!(
5412 result.unwrap_err(),
5413 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5414 ));
5415 }
5416
5417 #[test]
5418 fn test_builder_delete_index_validation_errors() {
5419 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5425 Some(EMBEDDING_KEY),
5426 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5427 );
5428 assert!(result.is_err());
5429 assert!(matches!(
5430 result.unwrap_err(),
5431 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5432 ));
5433
5434 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5436 Some(DOCUMENT_KEY),
5437 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5438 );
5439 assert!(result.is_err());
5440 assert!(matches!(
5441 result.unwrap_err(),
5442 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5443 ));
5444
5445 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5447 None,
5448 IndexConfig::Vector(VectorIndexConfig {
5449 space: None,
5450 embedding_function: None,
5451 source_key: None,
5452 hnsw: None,
5453 spann: None,
5454 }),
5455 );
5456 assert!(result.is_err());
5457 assert!(matches!(
5458 result.unwrap_err(),
5459 SchemaBuilderError::VectorIndexDeletionNotSupported
5460 ));
5461
5462 let result = Schema::new_default(KnnIndex::Hnsw)
5464 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5465 assert!(result.is_err());
5466 assert!(matches!(
5467 result.unwrap_err(),
5468 SchemaBuilderError::FtsIndexDeletionNotSupported
5469 ));
5470
5471 let result = Schema::new_default(KnnIndex::Hnsw)
5473 .create_index(
5474 Some("sparse"),
5475 IndexConfig::SparseVector(SparseVectorIndexConfig {
5476 embedding_function: None,
5477 source_key: None,
5478 bm25: None,
5479 }),
5480 )
5481 .expect("create should succeed")
5482 .delete_index(
5483 Some("sparse"),
5484 IndexConfig::SparseVector(SparseVectorIndexConfig {
5485 embedding_function: None,
5486 source_key: None,
5487 bm25: None,
5488 }),
5489 );
5490 assert!(result.is_err());
5491 assert!(matches!(
5492 result.unwrap_err(),
5493 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5494 ));
5495 }
5496
5497 #[test]
5498 fn test_builder_pattern_chaining() {
5499 let schema = Schema::new_default(KnnIndex::Hnsw)
5501 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5502 .unwrap()
5503 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5504 .unwrap()
5505 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5506 .unwrap()
5507 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5508 .unwrap()
5509 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5510 .unwrap()
5511 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5512 .unwrap();
5513
5514 assert!(
5516 schema
5517 .keys
5518 .get("tag1")
5519 .unwrap()
5520 .string
5521 .as_ref()
5522 .unwrap()
5523 .string_inverted_index
5524 .as_ref()
5525 .unwrap()
5526 .enabled
5527 );
5528
5529 assert!(
5531 !schema
5532 .keys
5533 .get("tag2")
5534 .unwrap()
5535 .string
5536 .as_ref()
5537 .unwrap()
5538 .string_inverted_index
5539 .as_ref()
5540 .unwrap()
5541 .enabled
5542 );
5543
5544 assert!(
5546 schema
5547 .keys
5548 .get("tag3")
5549 .unwrap()
5550 .string
5551 .as_ref()
5552 .unwrap()
5553 .string_inverted_index
5554 .as_ref()
5555 .unwrap()
5556 .enabled
5557 );
5558
5559 assert!(
5561 schema
5562 .keys
5563 .get("count")
5564 .unwrap()
5565 .int
5566 .as_ref()
5567 .unwrap()
5568 .int_inverted_index
5569 .as_ref()
5570 .unwrap()
5571 .enabled
5572 );
5573
5574 assert!(
5576 schema
5577 .keys
5578 .get("score")
5579 .unwrap()
5580 .float
5581 .as_ref()
5582 .unwrap()
5583 .float_inverted_index
5584 .as_ref()
5585 .unwrap()
5586 .enabled
5587 );
5588 }
5589
5590 #[test]
5591 fn test_schema_default_matches_python() {
5592 let schema = Schema::default();
5594
5595 assert!(schema.defaults.string.is_some());
5601 let string = schema.defaults.string.as_ref().unwrap();
5602 assert!(!string.fts_index.as_ref().unwrap().enabled);
5603 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5604
5605 assert!(schema.defaults.float_list.is_some());
5607 let float_list = schema.defaults.float_list.as_ref().unwrap();
5608 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5609 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5610 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
5614
5615 assert!(schema.defaults.sparse_vector.is_some());
5617 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5618 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5619
5620 assert!(schema.defaults.int.is_some());
5622 assert!(
5623 schema
5624 .defaults
5625 .int
5626 .as_ref()
5627 .unwrap()
5628 .int_inverted_index
5629 .as_ref()
5630 .unwrap()
5631 .enabled
5632 );
5633
5634 assert!(schema.defaults.float.is_some());
5636 assert!(
5637 schema
5638 .defaults
5639 .float
5640 .as_ref()
5641 .unwrap()
5642 .float_inverted_index
5643 .as_ref()
5644 .unwrap()
5645 .enabled
5646 );
5647
5648 assert!(schema.defaults.boolean.is_some());
5650 assert!(
5651 schema
5652 .defaults
5653 .boolean
5654 .as_ref()
5655 .unwrap()
5656 .bool_inverted_index
5657 .as_ref()
5658 .unwrap()
5659 .enabled
5660 );
5661
5662 assert!(schema.keys.contains_key(DOCUMENT_KEY));
5668 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
5669 assert!(doc.string.is_some());
5670 assert!(
5671 doc.string
5672 .as_ref()
5673 .unwrap()
5674 .fts_index
5675 .as_ref()
5676 .unwrap()
5677 .enabled
5678 );
5679 assert!(
5680 !doc.string
5681 .as_ref()
5682 .unwrap()
5683 .string_inverted_index
5684 .as_ref()
5685 .unwrap()
5686 .enabled
5687 );
5688
5689 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5691 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5692 assert!(embedding.float_list.is_some());
5693 let vec_idx = embedding
5694 .float_list
5695 .as_ref()
5696 .unwrap()
5697 .vector_index
5698 .as_ref()
5699 .unwrap();
5700 assert!(vec_idx.enabled);
5701 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
5702 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
5708 }
5709
5710 #[test]
5711 fn test_schema_default_works_with_builder() {
5712 let schema = Schema::default()
5714 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
5715 .expect("should succeed");
5716
5717 assert!(schema.keys.contains_key("category"));
5719 assert!(schema.keys.contains_key(DOCUMENT_KEY));
5720 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5721 assert_eq!(schema.keys.len(), 3);
5722 }
5723
5724 #[cfg(feature = "testing")]
5725 mod proptests {
5726 use super::*;
5727 use crate::strategies::{
5728 embedding_function_strategy, internal_collection_configuration_strategy,
5729 internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
5730 knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
5731 };
5732 use crate::{
5733 HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
5734 };
5735 use proptest::prelude::*;
5736 use proptest::strategy::BoxedStrategy;
5737 use proptest::string::string_regex;
5738 use serde_json::json;
5739
5740 fn default_embedding_function_strategy(
5741 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5742 proptest::option::of(prop_oneof![
5743 Just(EmbeddingFunctionConfiguration::Unknown),
5744 Just(EmbeddingFunctionConfiguration::Known(
5745 EmbeddingFunctionNewConfiguration {
5746 name: "default".to_string(),
5747 config: json!({ "alpha": 1 }),
5748 }
5749 )),
5750 ])
5751 }
5752
5753 fn sparse_embedding_function_strategy(
5754 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5755 let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
5756 EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
5757 name,
5758 config: json!({ "alpha": 1 }),
5759 })
5760 });
5761
5762 proptest::option::of(prop_oneof![
5763 Just(EmbeddingFunctionConfiguration::Unknown),
5764 known_strategy,
5765 ])
5766 }
5767
5768 fn non_default_internal_collection_configuration_strategy(
5769 ) -> impl Strategy<Value = InternalCollectionConfiguration> {
5770 internal_collection_configuration_strategy()
5771 .prop_filter("non-default configuration", |config| !config.is_default())
5772 }
5773
5774 fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
5775 (
5776 proptest::option::of(1usize..=512),
5777 proptest::option::of(1usize..=128),
5778 proptest::option::of(1usize..=512),
5779 proptest::option::of(1usize..=64),
5780 proptest::option::of(2usize..=4096),
5781 proptest::option::of(2usize..=4096),
5782 proptest::option::of(prop_oneof![
5783 Just(0.5f64),
5784 Just(1.0f64),
5785 Just(1.5f64),
5786 Just(2.0f64)
5787 ]),
5788 )
5789 .prop_map(
5790 |(
5791 ef_construction,
5792 max_neighbors,
5793 ef_search,
5794 num_threads,
5795 batch_size,
5796 sync_threshold,
5797 resize_factor,
5798 )| HnswIndexConfig {
5799 ef_construction,
5800 max_neighbors,
5801 ef_search,
5802 num_threads,
5803 batch_size,
5804 sync_threshold,
5805 resize_factor,
5806 },
5807 )
5808 }
5809
5810 fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
5811 let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
5812 (
5813 (
5814 proptest::option::of(1u32..=128), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy.clone()), proptest::option::of(1u32..=8), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy), proptest::option::of(50u32..=200), proptest::option::of(1usize..=1000), ),
5823 (
5824 proptest::option::of(Just(100.0f32)), proptest::option::of(1u32..=64), proptest::option::of(25u32..=100), proptest::option::of(1u32..=8), proptest::option::of(1u32..=64), proptest::option::of(1usize..=200), proptest::option::of(1usize..=200), proptest::option::of(1usize..=64), ),
5833 )
5834 .prop_map(
5835 |(
5836 (
5837 search_nprobe,
5838 search_rng_factor,
5839 search_rng_epsilon,
5840 nreplica_count,
5841 write_rng_factor,
5842 write_rng_epsilon,
5843 split_threshold,
5844 num_samples_kmeans,
5845 ),
5846 (
5847 initial_lambda,
5848 reassign_neighbor_count,
5849 merge_threshold,
5850 num_centers_to_merge_to,
5851 write_nprobe,
5852 ef_construction,
5853 ef_search,
5854 max_neighbors,
5855 ),
5856 )| SpannIndexConfig {
5857 search_nprobe,
5858 search_rng_factor,
5859 search_rng_epsilon,
5860 nreplica_count,
5861 write_rng_factor,
5862 write_rng_epsilon,
5863 split_threshold,
5864 num_samples_kmeans,
5865 initial_lambda,
5866 reassign_neighbor_count,
5867 merge_threshold,
5868 num_centers_to_merge_to,
5869 write_nprobe,
5870 ef_construction,
5871 ef_search,
5872 max_neighbors,
5873 },
5874 )
5875 }
5876
5877 proptest! {
5878 #[test]
5879 fn merge_hnsw_configs_preserves_user_overrides(
5880 base in partial_hnsw_index_config_strategy(),
5881 user in partial_hnsw_index_config_strategy(),
5882 ) {
5883 let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
5884 .expect("merge should return Some when both are Some");
5885
5886 if user.ef_construction.is_some() {
5888 prop_assert_eq!(merged.ef_construction, user.ef_construction);
5889 }
5890 if user.max_neighbors.is_some() {
5891 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5892 }
5893 if user.ef_search.is_some() {
5894 prop_assert_eq!(merged.ef_search, user.ef_search);
5895 }
5896 if user.num_threads.is_some() {
5897 prop_assert_eq!(merged.num_threads, user.num_threads);
5898 }
5899 if user.batch_size.is_some() {
5900 prop_assert_eq!(merged.batch_size, user.batch_size);
5901 }
5902 if user.sync_threshold.is_some() {
5903 prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
5904 }
5905 if user.resize_factor.is_some() {
5906 prop_assert_eq!(merged.resize_factor, user.resize_factor);
5907 }
5908 }
5909
5910 #[test]
5911 fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
5912 base in partial_hnsw_index_config_strategy(),
5913 ) {
5914 let merged = Schema::merge_hnsw_configs(Some(&base), None)
5915 .expect("merge should return Some when base is Some");
5916
5917 prop_assert_eq!(merged, base);
5919 }
5920
5921 #[test]
5922 fn merge_hnsw_configs_returns_user_when_base_is_none(
5923 user in partial_hnsw_index_config_strategy(),
5924 ) {
5925 let merged = Schema::merge_hnsw_configs(None, Some(&user))
5926 .expect("merge should return Some when user is Some");
5927
5928 prop_assert_eq!(merged, user);
5930 }
5931
5932 #[test]
5933 fn merge_spann_configs_preserves_user_overrides(
5934 base in partial_spann_index_config_strategy(),
5935 user in partial_spann_index_config_strategy(),
5936 ) {
5937 let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
5938 .expect("merge should return Some when both are Some");
5939
5940 if user.search_nprobe.is_some() {
5942 prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
5943 }
5944 if user.search_rng_epsilon.is_some() {
5945 prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
5946 }
5947 if user.split_threshold.is_some() {
5948 prop_assert_eq!(merged.split_threshold, user.split_threshold);
5949 }
5950 if user.ef_construction.is_some() {
5951 prop_assert_eq!(merged.ef_construction, user.ef_construction);
5952 }
5953 if user.ef_search.is_some() {
5954 prop_assert_eq!(merged.ef_search, user.ef_search);
5955 }
5956 if user.max_neighbors.is_some() {
5957 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5958 }
5959 }
5960
5961 #[test]
5962 fn merge_spann_configs_falls_back_to_base_when_user_is_none(
5963 base in partial_spann_index_config_strategy(),
5964 ) {
5965 let merged = Schema::merge_spann_configs(Some(&base), None)
5966 .expect("merge should return Some when base is Some");
5967
5968 prop_assert_eq!(merged, base);
5970 }
5971
5972 #[test]
5973 fn merge_vector_index_config_preserves_user_overrides(
5974 base in vector_index_config_strategy(),
5975 user in vector_index_config_strategy(),
5976 knn in knn_index_strategy(),
5977 ) {
5978 let merged = Schema::merge_vector_index_config(&base, &user, knn);
5979
5980 if user.space.is_some() {
5982 prop_assert_eq!(merged.space, user.space);
5983 }
5984 if user.embedding_function.is_some() {
5985 prop_assert_eq!(merged.embedding_function, user.embedding_function);
5986 }
5987 if user.source_key.is_some() {
5988 prop_assert_eq!(merged.source_key, user.source_key);
5989 }
5990
5991 match knn {
5993 KnnIndex::Hnsw => {
5994 if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
5995 let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
5996 if user_hnsw.ef_construction.is_some() {
5997 prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
5998 }
5999 }
6000 }
6001 KnnIndex::Spann => {
6002 if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6003 let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6004 if user_spann.search_nprobe.is_some() {
6005 prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6006 }
6007 }
6008 }
6009 }
6010 }
6011 }
6012
6013 fn expected_vector_index_config(
6014 config: &InternalCollectionConfiguration,
6015 ) -> VectorIndexConfig {
6016 match &config.vector_index {
6017 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6018 space: Some(hnsw_config.space.clone()),
6019 embedding_function: config.embedding_function.clone(),
6020 source_key: None,
6021 hnsw: Some(HnswIndexConfig {
6022 ef_construction: Some(hnsw_config.ef_construction),
6023 max_neighbors: Some(hnsw_config.max_neighbors),
6024 ef_search: Some(hnsw_config.ef_search),
6025 num_threads: Some(hnsw_config.num_threads),
6026 batch_size: Some(hnsw_config.batch_size),
6027 sync_threshold: Some(hnsw_config.sync_threshold),
6028 resize_factor: Some(hnsw_config.resize_factor),
6029 }),
6030 spann: None,
6031 },
6032 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6033 space: Some(spann_config.space.clone()),
6034 embedding_function: config.embedding_function.clone(),
6035 source_key: None,
6036 hnsw: None,
6037 spann: Some(SpannIndexConfig {
6038 search_nprobe: Some(spann_config.search_nprobe),
6039 search_rng_factor: Some(spann_config.search_rng_factor),
6040 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6041 nreplica_count: Some(spann_config.nreplica_count),
6042 write_rng_factor: Some(spann_config.write_rng_factor),
6043 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6044 split_threshold: Some(spann_config.split_threshold),
6045 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6046 initial_lambda: Some(spann_config.initial_lambda),
6047 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6048 merge_threshold: Some(spann_config.merge_threshold),
6049 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6050 write_nprobe: Some(spann_config.write_nprobe),
6051 ef_construction: Some(spann_config.ef_construction),
6052 ef_search: Some(spann_config.ef_search),
6053 max_neighbors: Some(spann_config.max_neighbors),
6054 }),
6055 },
6056 }
6057 }
6058
6059 fn non_special_key_strategy() -> BoxedStrategy<String> {
6060 string_regex(TEST_NAME_PATTERN)
6061 .unwrap()
6062 .prop_filter("exclude special keys", |key| {
6063 key != DOCUMENT_KEY && key != EMBEDDING_KEY
6064 })
6065 .boxed()
6066 }
6067
6068 fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6069 proptest::option::of(prop_oneof![
6070 Just(DOCUMENT_KEY.to_string()),
6071 string_regex(TEST_NAME_PATTERN).unwrap(),
6072 ])
6073 .boxed()
6074 }
6075
6076 fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6077 any::<bool>().prop_map(|enabled| FtsIndexType {
6078 enabled,
6079 config: FtsIndexConfig {},
6080 })
6081 }
6082
6083 fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6084 any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6085 enabled,
6086 config: StringInvertedIndexConfig {},
6087 })
6088 }
6089
6090 fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6091 proptest::option::of(
6092 (
6093 proptest::option::of(string_inverted_index_type_strategy()),
6094 proptest::option::of(fts_index_type_strategy()),
6095 )
6096 .prop_map(|(string_inverted_index, fts_index)| {
6097 StringValueType {
6098 string_inverted_index,
6099 fts_index,
6100 }
6101 }),
6102 )
6103 .boxed()
6104 }
6105
6106 fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6107 any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6108 enabled,
6109 config: FloatInvertedIndexConfig {},
6110 })
6111 }
6112
6113 fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6114 proptest::option::of(
6115 proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6116 |float_inverted_index| FloatValueType {
6117 float_inverted_index,
6118 },
6119 ),
6120 )
6121 .boxed()
6122 }
6123
6124 fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6125 any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6126 enabled,
6127 config: IntInvertedIndexConfig {},
6128 })
6129 }
6130
6131 fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6132 proptest::option::of(
6133 proptest::option::of(int_inverted_index_type_strategy())
6134 .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6135 )
6136 .boxed()
6137 }
6138
6139 fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6140 any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6141 enabled,
6142 config: BoolInvertedIndexConfig {},
6143 })
6144 }
6145
6146 fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6147 proptest::option::of(
6148 proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6149 |bool_inverted_index| BoolValueType {
6150 bool_inverted_index,
6151 },
6152 ),
6153 )
6154 .boxed()
6155 }
6156
6157 fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6158 (
6159 sparse_embedding_function_strategy(),
6160 source_key_strategy(),
6161 proptest::option::of(any::<bool>()),
6162 )
6163 .prop_map(|(embedding_function, source_key, bm25)| {
6164 SparseVectorIndexConfig {
6165 embedding_function,
6166 source_key,
6167 bm25,
6168 }
6169 })
6170 }
6171
6172 fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6173 proptest::option::of(
6174 (
6175 any::<bool>(),
6176 proptest::option::of(sparse_vector_index_config_strategy()),
6177 )
6178 .prop_map(|(enabled, config)| SparseVectorValueType {
6179 sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6180 enabled,
6181 config: cfg,
6182 }),
6183 }),
6184 )
6185 .boxed()
6186 }
6187
6188 fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6189 internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6190 ef_construction: Some(config.ef_construction),
6191 max_neighbors: Some(config.max_neighbors),
6192 ef_search: Some(config.ef_search),
6193 num_threads: Some(config.num_threads),
6194 batch_size: Some(config.batch_size),
6195 sync_threshold: Some(config.sync_threshold),
6196 resize_factor: Some(config.resize_factor),
6197 })
6198 }
6199
6200 fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6201 internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6202 search_nprobe: Some(config.search_nprobe),
6203 search_rng_factor: Some(config.search_rng_factor),
6204 search_rng_epsilon: Some(config.search_rng_epsilon),
6205 nreplica_count: Some(config.nreplica_count),
6206 write_rng_factor: Some(config.write_rng_factor),
6207 write_rng_epsilon: Some(config.write_rng_epsilon),
6208 split_threshold: Some(config.split_threshold),
6209 num_samples_kmeans: Some(config.num_samples_kmeans),
6210 initial_lambda: Some(config.initial_lambda),
6211 reassign_neighbor_count: Some(config.reassign_neighbor_count),
6212 merge_threshold: Some(config.merge_threshold),
6213 num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6214 write_nprobe: Some(config.write_nprobe),
6215 ef_construction: Some(config.ef_construction),
6216 ef_search: Some(config.ef_search),
6217 max_neighbors: Some(config.max_neighbors),
6218 })
6219 }
6220
6221 fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6222 (
6223 proptest::option::of(space_strategy()),
6224 embedding_function_strategy(),
6225 source_key_strategy(),
6226 proptest::option::of(hnsw_index_config_strategy()),
6227 proptest::option::of(spann_index_config_strategy()),
6228 )
6229 .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6230 VectorIndexConfig {
6231 space,
6232 embedding_function,
6233 source_key,
6234 hnsw,
6235 spann,
6236 }
6237 })
6238 }
6239
6240 fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6241 (any::<bool>(), vector_index_config_strategy())
6242 .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6243 }
6244
6245 fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6246 proptest::option::of(
6247 proptest::option::of(vector_index_type_strategy())
6248 .prop_map(|vector_index| FloatListValueType { vector_index }),
6249 )
6250 .boxed()
6251 }
6252
6253 fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6254 (
6255 string_value_type_strategy(),
6256 float_list_value_type_strategy(),
6257 sparse_vector_value_type_strategy(),
6258 int_value_type_strategy(),
6259 float_value_type_strategy(),
6260 bool_value_type_strategy(),
6261 )
6262 .prop_map(
6263 |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6264 string,
6265 float_list,
6266 sparse_vector,
6267 int,
6268 float,
6269 boolean,
6270 },
6271 )
6272 .boxed()
6273 }
6274
6275 fn schema_strategy() -> BoxedStrategy<Schema> {
6276 (
6277 value_types_strategy(),
6278 proptest::collection::hash_map(
6279 non_special_key_strategy(),
6280 value_types_strategy(),
6281 0..=3,
6282 ),
6283 proptest::option::of(value_types_strategy()),
6284 proptest::option::of(value_types_strategy()),
6285 )
6286 .prop_map(
6287 |(defaults, mut extra_keys, document_override, embedding_override)| {
6288 if let Some(doc) = document_override {
6289 extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6290 }
6291 if let Some(embed) = embedding_override {
6292 extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6293 }
6294 Schema {
6295 defaults,
6296 keys: extra_keys,
6297 cmek: None,
6298 }
6299 },
6300 )
6301 .boxed()
6302 }
6303
6304 fn force_non_default_schema(mut schema: Schema) -> Schema {
6305 if schema.is_default() {
6306 if let Some(string_value) = schema
6307 .defaults
6308 .string
6309 .as_mut()
6310 .and_then(|string_value| string_value.string_inverted_index.as_mut())
6311 {
6312 string_value.enabled = !string_value.enabled;
6313 } else {
6314 schema.defaults.string = Some(StringValueType {
6315 string_inverted_index: Some(StringInvertedIndexType {
6316 enabled: false,
6317 config: StringInvertedIndexConfig {},
6318 }),
6319 fts_index: None,
6320 });
6321 }
6322 }
6323 schema
6324 }
6325
6326 fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6327 schema_strategy().prop_map(force_non_default_schema).boxed()
6328 }
6329
6330 fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6331 let defaults = schema
6332 .defaults
6333 .float_list
6334 .as_ref()
6335 .and_then(|fl| fl.vector_index.as_ref())
6336 .map(|vi| vi.config.clone())
6337 .expect("defaults vector index missing");
6338
6339 let embedding = schema
6340 .keys
6341 .get(EMBEDDING_KEY)
6342 .and_then(|value_types| value_types.float_list.as_ref())
6343 .and_then(|fl| fl.vector_index.as_ref())
6344 .map(|vi| vi.config.clone())
6345 .expect("#embedding vector index missing");
6346
6347 (defaults, embedding)
6348 }
6349
6350 proptest! {
6351 #[test]
6352 fn reconcile_schema_and_config_matches_convert_for_config_only(
6353 config in internal_collection_configuration_strategy(),
6354 knn in knn_index_strategy(),
6355 ) {
6356 let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6357 .expect("reconciliation should succeed");
6358
6359 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6360 let expected_config = expected_vector_index_config(&config);
6361
6362 prop_assert_eq!(defaults_vi, expected_config.clone());
6363
6364 let mut expected_embedding_config = expected_config;
6365 expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6366 prop_assert_eq!(embedding_vi, expected_embedding_config);
6367
6368 prop_assert_eq!(result.keys.len(), 2);
6369 }
6370 }
6371
6372 proptest! {
6373 #[test]
6374 fn reconcile_schema_and_config_errors_when_both_non_default(
6375 config in non_default_internal_collection_configuration_strategy(),
6376 knn in knn_index_strategy(),
6377 ) {
6378 let schema = Schema::try_from(&config)
6379 .expect("conversion should succeed");
6380 prop_assume!(!schema.is_default());
6381
6382 let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6383
6384 prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6385 }
6386 }
6387
6388 proptest! {
6389 #[test]
6390 fn reconcile_schema_and_config_matches_schema_only_path(
6391 schema in schema_strategy(),
6392 knn in knn_index_strategy(),
6393 ) {
6394 let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6395 .expect("reconciliation should succeed");
6396
6397 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6398
6399 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6401 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6402 if let Some(schema_space) = &schema_vi.config.space {
6404 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6405 }
6406 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6407 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6408 }
6409 match knn {
6411 KnnIndex::Hnsw => {
6412 if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6413 if let Some(merged_hnsw) = &defaults_vi.hnsw {
6414 if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6415 prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6416 }
6417 }
6418 }
6419 }
6420 KnnIndex::Spann => {
6421 if let Some(schema_spann) = &schema_vi.config.spann {
6422 if let Some(merged_spann) = &defaults_vi.spann {
6423 if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6424 prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6425 }
6426 }
6427 }
6428 }
6429 }
6430 }
6431 }
6432
6433 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6435 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6436 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6437 if let Some(schema_space) = &embedding_vi_type.config.space {
6438 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6439 }
6440 }
6441 }
6442 }
6443 }
6444 }
6445
6446 proptest! {
6447 #[test]
6448 fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6449 embedding_function in default_embedding_function_strategy(),
6450 knn in knn_index_strategy(),
6451 ) {
6452 let schema = Schema::new_default(knn);
6453 let mut config = match knn {
6454 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6455 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6456 };
6457 config.embedding_function = embedding_function.clone();
6458
6459 let result = Schema::reconcile_schema_and_config(
6460 Some(&schema),
6461 Some(&config),
6462 knn,
6463 )
6464 .expect("reconciliation should succeed");
6465
6466 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6467
6468 if let Some(ef) = embedding_function {
6470 prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6471 prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6472 } else {
6473 prop_assert_eq!(defaults_vi.embedding_function, None);
6475 prop_assert_eq!(embedding_vi.embedding_function, None);
6476 }
6477 }
6478 }
6479
6480 proptest! {
6481 #[test]
6482 fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6483 schema in non_default_schema_strategy(),
6484 knn in knn_index_strategy(),
6485 ) {
6486 let default_config = match knn {
6487 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6488 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6489 };
6490
6491 let result = Schema::reconcile_schema_and_config(
6492 Some(&schema),
6493 Some(&default_config),
6494 knn,
6495 )
6496 .expect("reconciliation should succeed");
6497
6498 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6499
6500 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6503 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6504 if let Some(schema_space) = &schema_vi.config.space {
6505 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6506 }
6507 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6508 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6509 }
6510 }
6511 }
6512
6513 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6515 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6516 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6517 if let Some(schema_space) = &embedding_vi_type.config.space {
6518 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6519 }
6520 }
6521 }
6522 }
6523 }
6524 }
6525 }
6526}