1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11 EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12 UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18 default_batch_size, default_construction_ef, default_construction_ef_spann,
19 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
20 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
21 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
22 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
23 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
24 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
25 HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
26 InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
27};
28
29impl ChromaError for SchemaError {
30 fn code(&self) -> ErrorCodes {
31 match self {
32 SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
35 SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
36 SchemaError::DefaultsMismatch => ErrorCodes::Internal,
39 SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
40
41 SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
44 SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
45 SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
46 SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
47 SchemaError::Builder(e) => e.code(),
48 }
49 }
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaError {
54 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
55 MissingIndexConfiguration { key: String, value_type: String },
56 #[error("Schema reconciliation failed: {reason}")]
57 InvalidSchema { reason: String },
58 #[error("Cannot set both collection config and schema simultaneously")]
59 ConfigAndSchemaConflict,
60 #[error("Cannot merge schemas with differing defaults")]
61 DefaultsMismatch,
62 #[error("Conflicting configuration for {context}")]
63 ConfigurationConflict { context: String },
64 #[error("Invalid HNSW configuration: {0}")]
65 InvalidHnswConfig(validator::ValidationErrors),
66 #[error("Invalid SPANN configuration: {0}")]
67 InvalidSpannConfig(validator::ValidationErrors),
68 #[error("Invalid schema input: {reason}")]
69 InvalidUserInput { reason: String },
70 #[error(transparent)]
71 Builder(#[from] SchemaBuilderError),
72}
73
74#[derive(Debug, Error)]
75pub enum SchemaBuilderError {
76 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
77 VectorIndexMustBeGlobal { key: String },
78 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
79 FtsIndexMustBeGlobal { key: String },
80 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
81 SpecialKeyModificationNotAllowed { key: String },
82 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
83 SparseVectorRequiresKey,
84 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
85 MultipleSparseVectorIndexes { existing_key: String },
86 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
87 VectorIndexDeletionNotSupported,
88 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
89 FtsIndexDeletionNotSupported,
90 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91 SparseVectorIndexDeletionNotSupported,
92}
93
94#[derive(Debug, Error)]
95pub enum FilterValidationError {
96 #[error(
97 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
98 )]
99 IndexingDisabled {
100 key: String,
101 value_type: MetadataValueType,
102 },
103 #[error(transparent)]
104 Schema(#[from] SchemaError),
105}
106
107impl ChromaError for SchemaBuilderError {
108 fn code(&self) -> ErrorCodes {
109 ErrorCodes::InvalidArgument
110 }
111}
112
113impl ChromaError for FilterValidationError {
114 fn code(&self) -> ErrorCodes {
115 match self {
116 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
117 FilterValidationError::Schema(_) => ErrorCodes::Internal,
118 }
119 }
120}
121
122pub const STRING_VALUE_NAME: &str = "string";
129pub const INT_VALUE_NAME: &str = "int";
130pub const BOOL_VALUE_NAME: &str = "bool";
131pub const FLOAT_VALUE_NAME: &str = "float";
132pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
133pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
134
135pub const FTS_INDEX_NAME: &str = "fts_index";
137pub const VECTOR_INDEX_NAME: &str = "vector_index";
138pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
139pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
140pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
141pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
142pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
143
144pub const DOCUMENT_KEY: &str = "#document";
146pub const EMBEDDING_KEY: &str = "#embedding";
147
148static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
150 Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
151 .expect("The CMEK pattern for GCP should be valid")
152});
153
154#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Cmek {
161 Gcp(Arc<String>),
165}
166
167impl Cmek {
168 pub fn gcp(resource: String) -> Self {
178 Cmek::Gcp(Arc::new(resource))
179 }
180
181 pub fn validate_pattern(&self) -> bool {
187 match self {
188 Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
189 }
190 }
191}
192
193impl TryFrom<chroma_proto::Cmek> for Cmek {
194 type Error = ConversionError;
195
196 fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
197 match proto.provider {
198 Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
199 None => Err(ConversionError::DecodeError),
200 }
201 }
202}
203
204impl From<Cmek> for chroma_proto::Cmek {
205 fn from(cmek: Cmek) -> Self {
206 match cmek {
207 Cmek::Gcp(resource) => chroma_proto::Cmek {
208 provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
209 },
210 }
211 }
212}
213
214#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
223#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
224pub struct Schema {
225 pub defaults: ValueTypes,
227 #[serde(rename = "keys", alias = "key_overrides")]
230 pub keys: HashMap<String, ValueTypes>,
231 #[serde(skip_serializing_if = "Option::is_none")]
233 #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
234 pub cmek: Option<Cmek>,
235 #[serde(skip_serializing_if = "Option::is_none")]
237 pub source_attached_function_id: Option<String>,
238}
239
240impl Schema {
241 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
242 if let Some(vector_update) = &configuration.vector_index {
243 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
244 Self::apply_vector_index_update(default_vector_index, vector_update);
245 }
246 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
247 Self::apply_vector_index_update(embedding_vector_index, vector_update);
248 }
249 }
250
251 if let Some(embedding_function) = configuration.embedding_function.as_ref() {
252 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
253 default_vector_index.config.embedding_function = Some(embedding_function.clone());
254 }
255 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
256 embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
257 }
258 }
259 }
260
261 fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
262 self.defaults
263 .float_list
264 .as_mut()
265 .and_then(|float_list| float_list.vector_index.as_mut())
266 }
267
268 fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
269 self.keys
270 .get_mut(EMBEDDING_KEY)
271 .and_then(|value_types| value_types.float_list.as_mut())
272 .and_then(|float_list| float_list.vector_index.as_mut())
273 }
274
275 fn apply_vector_index_update(
276 vector_index: &mut VectorIndexType,
277 update: &UpdateVectorIndexConfiguration,
278 ) {
279 match update {
280 UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
281 if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
282 if let Some(ef_search) = hnsw_update.ef_search {
283 hnsw_config.ef_search = Some(ef_search);
284 }
285 if let Some(max_neighbors) = hnsw_update.max_neighbors {
286 hnsw_config.max_neighbors = Some(max_neighbors);
287 }
288 if let Some(num_threads) = hnsw_update.num_threads {
289 hnsw_config.num_threads = Some(num_threads);
290 }
291 if let Some(resize_factor) = hnsw_update.resize_factor {
292 hnsw_config.resize_factor = Some(resize_factor);
293 }
294 if let Some(sync_threshold) = hnsw_update.sync_threshold {
295 hnsw_config.sync_threshold = Some(sync_threshold);
296 }
297 if let Some(batch_size) = hnsw_update.batch_size {
298 hnsw_config.batch_size = Some(batch_size);
299 }
300 }
301 }
302 UpdateVectorIndexConfiguration::Hnsw(None) => {}
303 UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
304 if let Some(spann_config) = vector_index.config.spann.as_mut() {
305 if let Some(search_nprobe) = spann_update.search_nprobe {
306 spann_config.search_nprobe = Some(search_nprobe);
307 }
308 if let Some(ef_search) = spann_update.ef_search {
309 spann_config.ef_search = Some(ef_search);
310 }
311 }
312 }
313 UpdateVectorIndexConfiguration::Spann(None) => {}
314 }
315 }
316
317 pub fn is_sparse_index_enabled(&self) -> bool {
318 let defaults_enabled = self
319 .defaults
320 .sparse_vector
321 .as_ref()
322 .and_then(|sv| sv.sparse_vector_index.as_ref())
323 .is_some_and(|idx| idx.enabled);
324 let key_enabled = self.keys.values().any(|value_types| {
325 value_types
326 .sparse_vector
327 .as_ref()
328 .and_then(|sv| sv.sparse_vector_index.as_ref())
329 .is_some_and(|idx| idx.enabled)
330 });
331 defaults_enabled || key_enabled
332 }
333}
334
335impl Default for Schema {
336 fn default() -> Self {
353 let defaults = ValueTypes {
355 string: Some(StringValueType {
356 fts_index: Some(FtsIndexType {
357 enabled: false,
358 config: FtsIndexConfig {},
359 }),
360 string_inverted_index: Some(StringInvertedIndexType {
361 enabled: true,
362 config: StringInvertedIndexConfig {},
363 }),
364 }),
365 float_list: Some(FloatListValueType {
366 vector_index: Some(VectorIndexType {
367 enabled: false,
368 config: VectorIndexConfig {
369 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
371 source_key: None,
372 hnsw: None, spann: None, },
375 }),
376 }),
377 sparse_vector: Some(SparseVectorValueType {
378 sparse_vector_index: Some(SparseVectorIndexType {
379 enabled: false,
380 config: SparseVectorIndexConfig {
381 embedding_function: None,
382 source_key: None,
383 bm25: None,
384 },
385 }),
386 }),
387 int: Some(IntValueType {
388 int_inverted_index: Some(IntInvertedIndexType {
389 enabled: true,
390 config: IntInvertedIndexConfig {},
391 }),
392 }),
393 float: Some(FloatValueType {
394 float_inverted_index: Some(FloatInvertedIndexType {
395 enabled: true,
396 config: FloatInvertedIndexConfig {},
397 }),
398 }),
399 boolean: Some(BoolValueType {
400 bool_inverted_index: Some(BoolInvertedIndexType {
401 enabled: true,
402 config: BoolInvertedIndexConfig {},
403 }),
404 }),
405 };
406
407 let mut keys = HashMap::new();
409
410 keys.insert(
412 DOCUMENT_KEY.to_string(),
413 ValueTypes {
414 string: Some(StringValueType {
415 fts_index: Some(FtsIndexType {
416 enabled: true,
417 config: FtsIndexConfig {},
418 }),
419 string_inverted_index: Some(StringInvertedIndexType {
420 enabled: false,
421 config: StringInvertedIndexConfig {},
422 }),
423 }),
424 ..Default::default()
425 },
426 );
427
428 keys.insert(
430 EMBEDDING_KEY.to_string(),
431 ValueTypes {
432 float_list: Some(FloatListValueType {
433 vector_index: Some(VectorIndexType {
434 enabled: true,
435 config: VectorIndexConfig {
436 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
438 source_key: Some(DOCUMENT_KEY.to_string()),
439 hnsw: None, spann: None, },
442 }),
443 }),
444 ..Default::default()
445 },
446 );
447
448 Schema {
449 defaults,
450 keys,
451 cmek: None,
452 source_attached_function_id: None,
453 }
454 }
455}
456
457pub fn is_embedding_function_default(
458 embedding_function: &Option<EmbeddingFunctionConfiguration>,
459) -> bool {
460 match embedding_function {
461 None => true,
462 Some(embedding_function) => embedding_function.is_default(),
463 }
464}
465
466pub fn is_space_default(space: &Option<Space>) -> bool {
468 match space {
469 None => true, Some(s) => *s == default_space(), }
472}
473
474pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
476 hnsw_config.ef_construction == Some(default_construction_ef())
477 && hnsw_config.ef_search == Some(default_search_ef())
478 && hnsw_config.max_neighbors == Some(default_m())
479 && hnsw_config.num_threads == Some(default_num_threads())
480 && hnsw_config.batch_size == Some(default_batch_size())
481 && hnsw_config.sync_threshold == Some(default_sync_threshold())
482 && hnsw_config.resize_factor == Some(default_resize_factor())
483}
484
485#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
492#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
493pub struct ValueTypes {
494 #[serde(
495 rename = "string",
496 alias = "#string",
497 skip_serializing_if = "Option::is_none"
498 )] pub string: Option<StringValueType>,
500
501 #[serde(
502 rename = "float_list",
503 alias = "#float_list",
504 skip_serializing_if = "Option::is_none"
505 )]
506 pub float_list: Option<FloatListValueType>,
508
509 #[serde(
510 rename = "sparse_vector",
511 alias = "#sparse_vector",
512 skip_serializing_if = "Option::is_none"
513 )]
514 pub sparse_vector: Option<SparseVectorValueType>,
516
517 #[serde(
518 rename = "int",
519 alias = "#int",
520 skip_serializing_if = "Option::is_none"
521 )] pub int: Option<IntValueType>,
523
524 #[serde(
525 rename = "float",
526 alias = "#float",
527 skip_serializing_if = "Option::is_none"
528 )] pub float: Option<FloatValueType>,
530
531 #[serde(
532 rename = "bool",
533 alias = "#bool",
534 skip_serializing_if = "Option::is_none"
535 )] pub boolean: Option<BoolValueType>,
537}
538
539#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
541#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
542pub struct StringValueType {
543 #[serde(
544 rename = "fts_index",
545 alias = "$fts_index",
546 skip_serializing_if = "Option::is_none"
547 )] pub fts_index: Option<FtsIndexType>,
549
550 #[serde(
551 rename = "string_inverted_index", alias = "$string_inverted_index",
553 skip_serializing_if = "Option::is_none"
554 )]
555 pub string_inverted_index: Option<StringInvertedIndexType>,
556}
557
558#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
560#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
561pub struct FloatListValueType {
562 #[serde(
563 rename = "vector_index",
564 alias = "$vector_index",
565 skip_serializing_if = "Option::is_none"
566 )] pub vector_index: Option<VectorIndexType>,
568}
569
570#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
572#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
573pub struct SparseVectorValueType {
574 #[serde(
575 rename = "sparse_vector_index", alias = "$sparse_vector_index",
577 skip_serializing_if = "Option::is_none"
578 )]
579 pub sparse_vector_index: Option<SparseVectorIndexType>,
580}
581
582#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
584#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
585pub struct IntValueType {
586 #[serde(
587 rename = "int_inverted_index",
588 alias = "$int_inverted_index",
589 skip_serializing_if = "Option::is_none"
590 )]
591 pub int_inverted_index: Option<IntInvertedIndexType>,
593}
594
595#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
597#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
598pub struct FloatValueType {
599 #[serde(
600 rename = "float_inverted_index", alias = "$float_inverted_index",
602 skip_serializing_if = "Option::is_none"
603 )]
604 pub float_inverted_index: Option<FloatInvertedIndexType>,
605}
606
607#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
609#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
610pub struct BoolValueType {
611 #[serde(
612 rename = "bool_inverted_index", alias = "$bool_inverted_index",
614 skip_serializing_if = "Option::is_none"
615 )]
616 pub bool_inverted_index: Option<BoolInvertedIndexType>,
617}
618
619#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
621#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
622pub struct FtsIndexType {
623 pub enabled: bool,
624 pub config: FtsIndexConfig,
625}
626
627#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
628#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
629pub struct VectorIndexType {
630 pub enabled: bool,
631 pub config: VectorIndexConfig,
632}
633
634#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
635#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
636pub struct SparseVectorIndexType {
637 pub enabled: bool,
638 pub config: SparseVectorIndexConfig,
639}
640
641#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
642#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
643pub struct StringInvertedIndexType {
644 pub enabled: bool,
645 pub config: StringInvertedIndexConfig,
646}
647
648#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
649#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
650pub struct IntInvertedIndexType {
651 pub enabled: bool,
652 pub config: IntInvertedIndexConfig,
653}
654
655#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
656#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
657pub struct FloatInvertedIndexType {
658 pub enabled: bool,
659 pub config: FloatInvertedIndexConfig,
660}
661
662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
664pub struct BoolInvertedIndexType {
665 pub enabled: bool,
666 pub config: BoolInvertedIndexConfig,
667}
668
669impl Schema {
670 pub fn new_default(default_knn_index: KnnIndex) -> Self {
672 let vector_config = VectorIndexType {
674 enabled: false,
675 config: VectorIndexConfig {
676 space: Some(default_space()),
677 embedding_function: None,
678 source_key: None,
679 hnsw: match default_knn_index {
680 KnnIndex::Hnsw => Some(HnswIndexConfig {
681 ef_construction: Some(default_construction_ef()),
682 max_neighbors: Some(default_m()),
683 ef_search: Some(default_search_ef()),
684 num_threads: Some(default_num_threads()),
685 batch_size: Some(default_batch_size()),
686 sync_threshold: Some(default_sync_threshold()),
687 resize_factor: Some(default_resize_factor()),
688 }),
689 KnnIndex::Spann => None,
690 },
691 spann: match default_knn_index {
692 KnnIndex::Hnsw => None,
693 KnnIndex::Spann => Some(SpannIndexConfig {
694 search_nprobe: Some(default_search_nprobe()),
695 search_rng_factor: Some(default_search_rng_factor()),
696 search_rng_epsilon: Some(default_search_rng_epsilon()),
697 nreplica_count: Some(default_nreplica_count()),
698 write_rng_factor: Some(default_write_rng_factor()),
699 write_rng_epsilon: Some(default_write_rng_epsilon()),
700 split_threshold: Some(default_split_threshold()),
701 num_samples_kmeans: Some(default_num_samples_kmeans()),
702 initial_lambda: Some(default_initial_lambda()),
703 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
704 merge_threshold: Some(default_merge_threshold()),
705 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
706 write_nprobe: Some(default_write_nprobe()),
707 ef_construction: Some(default_construction_ef_spann()),
708 ef_search: Some(default_search_ef_spann()),
709 max_neighbors: Some(default_m_spann()),
710 }),
711 },
712 },
713 };
714
715 let defaults = ValueTypes {
717 string: Some(StringValueType {
718 string_inverted_index: Some(StringInvertedIndexType {
719 enabled: true,
720 config: StringInvertedIndexConfig {},
721 }),
722 fts_index: Some(FtsIndexType {
723 enabled: false,
724 config: FtsIndexConfig {},
725 }),
726 }),
727 float: Some(FloatValueType {
728 float_inverted_index: Some(FloatInvertedIndexType {
729 enabled: true,
730 config: FloatInvertedIndexConfig {},
731 }),
732 }),
733 int: Some(IntValueType {
734 int_inverted_index: Some(IntInvertedIndexType {
735 enabled: true,
736 config: IntInvertedIndexConfig {},
737 }),
738 }),
739 boolean: Some(BoolValueType {
740 bool_inverted_index: Some(BoolInvertedIndexType {
741 enabled: true,
742 config: BoolInvertedIndexConfig {},
743 }),
744 }),
745 float_list: Some(FloatListValueType {
746 vector_index: Some(vector_config),
747 }),
748 sparse_vector: Some(SparseVectorValueType {
749 sparse_vector_index: Some(SparseVectorIndexType {
750 enabled: false,
751 config: SparseVectorIndexConfig {
752 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
753 source_key: None,
754 bm25: Some(false),
755 },
756 }),
757 }),
758 };
759
760 let mut keys = HashMap::new();
762
763 let embedding_defaults = ValueTypes {
765 float_list: Some(FloatListValueType {
766 vector_index: Some(VectorIndexType {
767 enabled: true,
768 config: VectorIndexConfig {
769 space: Some(default_space()),
770 embedding_function: None,
771 source_key: Some(DOCUMENT_KEY.to_string()),
772 hnsw: match default_knn_index {
773 KnnIndex::Hnsw => Some(HnswIndexConfig {
774 ef_construction: Some(default_construction_ef()),
775 max_neighbors: Some(default_m()),
776 ef_search: Some(default_search_ef()),
777 num_threads: Some(default_num_threads()),
778 batch_size: Some(default_batch_size()),
779 sync_threshold: Some(default_sync_threshold()),
780 resize_factor: Some(default_resize_factor()),
781 }),
782 KnnIndex::Spann => None,
783 },
784 spann: match default_knn_index {
785 KnnIndex::Hnsw => None,
786 KnnIndex::Spann => Some(SpannIndexConfig {
787 search_nprobe: Some(default_search_nprobe()),
788 search_rng_factor: Some(default_search_rng_factor()),
789 search_rng_epsilon: Some(default_search_rng_epsilon()),
790 nreplica_count: Some(default_nreplica_count()),
791 write_rng_factor: Some(default_write_rng_factor()),
792 write_rng_epsilon: Some(default_write_rng_epsilon()),
793 split_threshold: Some(default_split_threshold()),
794 num_samples_kmeans: Some(default_num_samples_kmeans()),
795 initial_lambda: Some(default_initial_lambda()),
796 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
797 merge_threshold: Some(default_merge_threshold()),
798 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
799 write_nprobe: Some(default_write_nprobe()),
800 ef_construction: Some(default_construction_ef_spann()),
801 ef_search: Some(default_search_ef_spann()),
802 max_neighbors: Some(default_m_spann()),
803 }),
804 },
805 },
806 }),
807 }),
808 ..Default::default()
809 };
810 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
811
812 let document_defaults = ValueTypes {
814 string: Some(StringValueType {
815 fts_index: Some(FtsIndexType {
816 enabled: true,
817 config: FtsIndexConfig {},
818 }),
819 string_inverted_index: Some(StringInvertedIndexType {
820 enabled: false,
821 config: StringInvertedIndexConfig {},
822 }),
823 }),
824 ..Default::default()
825 };
826 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
827
828 Schema {
829 defaults,
830 keys,
831 cmek: None,
832 source_attached_function_id: None,
833 }
834 }
835
836 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
837 let to_internal = |vector_index: &VectorIndexType| {
838 let space = vector_index.config.space.clone();
839 vector_index
840 .config
841 .spann
842 .clone()
843 .map(|config| (space.as_ref(), &config).into())
844 };
845
846 self.keys
847 .get(EMBEDDING_KEY)
848 .and_then(|value_types| value_types.float_list.as_ref())
849 .and_then(|float_list| float_list.vector_index.as_ref())
850 .and_then(to_internal)
851 .or_else(|| {
852 self.defaults
853 .float_list
854 .as_ref()
855 .and_then(|float_list| float_list.vector_index.as_ref())
856 .and_then(to_internal)
857 })
858 }
859
860 pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
861 let to_internal = |vector_index: &VectorIndexType| {
862 if vector_index.config.spann.is_some() {
863 return None;
864 }
865 let space = vector_index.config.space.as_ref();
866 let hnsw_config = vector_index.config.hnsw.as_ref();
867 Some((space, hnsw_config).into())
868 };
869
870 self.keys
871 .get(EMBEDDING_KEY)
872 .and_then(|value_types| value_types.float_list.as_ref())
873 .and_then(|float_list| float_list.vector_index.as_ref())
874 .and_then(to_internal)
875 .or_else(|| {
876 self.defaults
877 .float_list
878 .as_ref()
879 .and_then(|float_list| float_list.vector_index.as_ref())
880 .and_then(to_internal)
881 })
882 }
883
884 pub fn get_internal_hnsw_config_with_legacy_fallback(
885 &self,
886 segment: &Segment,
887 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
888 if let Some(config) = self.get_internal_hnsw_config() {
889 let config_from_metadata =
890 InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
891
892 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
893 return Ok(Some(config_from_metadata));
894 }
895
896 return Ok(Some(config));
897 }
898
899 Ok(None)
900 }
901
902 pub fn reconcile_with_defaults(
909 user_schema: Option<&Schema>,
910 knn_index: KnnIndex,
911 ) -> Result<Self, SchemaError> {
912 let default_schema = Schema::new_default(knn_index);
913
914 match user_schema {
915 Some(user) => {
916 let merged_defaults =
918 Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
919
920 let mut merged_keys = default_schema.keys.clone();
922 for (key, user_value_types) in &user.keys {
923 if let Some(default_value_types) = merged_keys.get(key) {
924 let merged_value_types = Self::merge_value_types(
926 default_value_types,
927 user_value_types,
928 knn_index,
929 )?;
930 merged_keys.insert(key.clone(), merged_value_types);
931 } else {
932 merged_keys.insert(key.clone(), user_value_types.clone());
934 }
935 }
936
937 Ok(Schema {
938 defaults: merged_defaults,
939 keys: merged_keys,
940 cmek: user.cmek.clone().or(default_schema.cmek.clone()),
941 source_attached_function_id: user
942 .source_attached_function_id
943 .clone()
944 .or(default_schema.source_attached_function_id.clone()),
945 })
946 }
947 None => Ok(default_schema),
948 }
949 }
950
951 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
953 if self.defaults != other.defaults {
954 return Err(SchemaError::DefaultsMismatch);
955 }
956
957 let mut keys = self.keys.clone();
958
959 for (key, other_value_types) in &other.keys {
960 if let Some(existing) = keys.get(key).cloned() {
961 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
962 keys.insert(key.clone(), merged);
963 } else {
964 keys.insert(key.clone(), other_value_types.clone());
965 }
966 }
967
968 Ok(Schema {
969 defaults: self.defaults.clone(),
970 keys,
971 cmek: other.cmek.clone().or(self.cmek.clone()),
972 source_attached_function_id: other
973 .source_attached_function_id
974 .clone()
975 .or(self.source_attached_function_id.clone()),
976 })
977 }
978
979 fn merge_override_value_types(
980 key: &str,
981 left: &ValueTypes,
982 right: &ValueTypes,
983 ) -> Result<ValueTypes, SchemaError> {
984 Ok(ValueTypes {
985 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
986 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
987 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
988 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
989 float_list: Self::merge_float_list_override(
990 key,
991 left.float_list.as_ref(),
992 right.float_list.as_ref(),
993 )?,
994 sparse_vector: Self::merge_sparse_vector_override(
995 key,
996 left.sparse_vector.as_ref(),
997 right.sparse_vector.as_ref(),
998 )?,
999 })
1000 }
1001
1002 fn merge_string_override(
1003 key: &str,
1004 left: Option<&StringValueType>,
1005 right: Option<&StringValueType>,
1006 ) -> Result<Option<StringValueType>, SchemaError> {
1007 match (left, right) {
1008 (Some(l), Some(r)) => Ok(Some(StringValueType {
1009 string_inverted_index: Self::merge_index_or_error(
1010 l.string_inverted_index.as_ref(),
1011 r.string_inverted_index.as_ref(),
1012 &format!("key '{key}' string.string_inverted_index"),
1013 )?,
1014 fts_index: Self::merge_index_or_error(
1015 l.fts_index.as_ref(),
1016 r.fts_index.as_ref(),
1017 &format!("key '{key}' string.fts_index"),
1018 )?,
1019 })),
1020 (Some(l), None) => Ok(Some(l.clone())),
1021 (None, Some(r)) => Ok(Some(r.clone())),
1022 (None, None) => Ok(None),
1023 }
1024 }
1025
1026 fn merge_float_override(
1027 key: &str,
1028 left: Option<&FloatValueType>,
1029 right: Option<&FloatValueType>,
1030 ) -> Result<Option<FloatValueType>, SchemaError> {
1031 match (left, right) {
1032 (Some(l), Some(r)) => Ok(Some(FloatValueType {
1033 float_inverted_index: Self::merge_index_or_error(
1034 l.float_inverted_index.as_ref(),
1035 r.float_inverted_index.as_ref(),
1036 &format!("key '{key}' float.float_inverted_index"),
1037 )?,
1038 })),
1039 (Some(l), None) => Ok(Some(l.clone())),
1040 (None, Some(r)) => Ok(Some(r.clone())),
1041 (None, None) => Ok(None),
1042 }
1043 }
1044
1045 fn merge_int_override(
1046 key: &str,
1047 left: Option<&IntValueType>,
1048 right: Option<&IntValueType>,
1049 ) -> Result<Option<IntValueType>, SchemaError> {
1050 match (left, right) {
1051 (Some(l), Some(r)) => Ok(Some(IntValueType {
1052 int_inverted_index: Self::merge_index_or_error(
1053 l.int_inverted_index.as_ref(),
1054 r.int_inverted_index.as_ref(),
1055 &format!("key '{key}' int.int_inverted_index"),
1056 )?,
1057 })),
1058 (Some(l), None) => Ok(Some(l.clone())),
1059 (None, Some(r)) => Ok(Some(r.clone())),
1060 (None, None) => Ok(None),
1061 }
1062 }
1063
1064 fn merge_bool_override(
1065 key: &str,
1066 left: Option<&BoolValueType>,
1067 right: Option<&BoolValueType>,
1068 ) -> Result<Option<BoolValueType>, SchemaError> {
1069 match (left, right) {
1070 (Some(l), Some(r)) => Ok(Some(BoolValueType {
1071 bool_inverted_index: Self::merge_index_or_error(
1072 l.bool_inverted_index.as_ref(),
1073 r.bool_inverted_index.as_ref(),
1074 &format!("key '{key}' bool.bool_inverted_index"),
1075 )?,
1076 })),
1077 (Some(l), None) => Ok(Some(l.clone())),
1078 (None, Some(r)) => Ok(Some(r.clone())),
1079 (None, None) => Ok(None),
1080 }
1081 }
1082
1083 fn merge_float_list_override(
1084 key: &str,
1085 left: Option<&FloatListValueType>,
1086 right: Option<&FloatListValueType>,
1087 ) -> Result<Option<FloatListValueType>, SchemaError> {
1088 match (left, right) {
1089 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1090 vector_index: Self::merge_index_or_error(
1091 l.vector_index.as_ref(),
1092 r.vector_index.as_ref(),
1093 &format!("key '{key}' float_list.vector_index"),
1094 )?,
1095 })),
1096 (Some(l), None) => Ok(Some(l.clone())),
1097 (None, Some(r)) => Ok(Some(r.clone())),
1098 (None, None) => Ok(None),
1099 }
1100 }
1101
1102 fn merge_sparse_vector_override(
1103 key: &str,
1104 left: Option<&SparseVectorValueType>,
1105 right: Option<&SparseVectorValueType>,
1106 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1107 match (left, right) {
1108 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1109 sparse_vector_index: Self::merge_index_or_error(
1110 l.sparse_vector_index.as_ref(),
1111 r.sparse_vector_index.as_ref(),
1112 &format!("key '{key}' sparse_vector.sparse_vector_index"),
1113 )?,
1114 })),
1115 (Some(l), None) => Ok(Some(l.clone())),
1116 (None, Some(r)) => Ok(Some(r.clone())),
1117 (None, None) => Ok(None),
1118 }
1119 }
1120
1121 fn merge_index_or_error<T: Clone + PartialEq>(
1122 left: Option<&T>,
1123 right: Option<&T>,
1124 context: &str,
1125 ) -> Result<Option<T>, SchemaError> {
1126 match (left, right) {
1127 (Some(l), Some(r)) => {
1128 if l == r {
1129 Ok(Some(l.clone()))
1130 } else {
1131 Err(SchemaError::ConfigurationConflict {
1132 context: context.to_string(),
1133 })
1134 }
1135 }
1136 (Some(l), None) => Ok(Some(l.clone())),
1137 (None, Some(r)) => Ok(Some(r.clone())),
1138 (None, None) => Ok(None),
1139 }
1140 }
1141
1142 fn merge_value_types(
1145 default: &ValueTypes,
1146 user: &ValueTypes,
1147 knn_index: KnnIndex,
1148 ) -> Result<ValueTypes, SchemaError> {
1149 let float_list = Self::merge_float_list_type(
1151 default.float_list.as_ref(),
1152 user.float_list.as_ref(),
1153 knn_index,
1154 );
1155
1156 if let Some(ref fl) = float_list {
1158 Self::validate_float_list_value_type(fl)?;
1159 }
1160
1161 Ok(ValueTypes {
1162 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1163 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1164 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1165 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1166 float_list,
1167 sparse_vector: Self::merge_sparse_vector_type(
1168 default.sparse_vector.as_ref(),
1169 user.sparse_vector.as_ref(),
1170 )?,
1171 })
1172 }
1173
1174 fn merge_string_type(
1176 default: Option<&StringValueType>,
1177 user: Option<&StringValueType>,
1178 ) -> Result<Option<StringValueType>, SchemaError> {
1179 match (default, user) {
1180 (Some(default), Some(user)) => Ok(Some(StringValueType {
1181 string_inverted_index: Self::merge_string_inverted_index_type(
1182 default.string_inverted_index.as_ref(),
1183 user.string_inverted_index.as_ref(),
1184 )?,
1185 fts_index: Self::merge_fts_index_type(
1186 default.fts_index.as_ref(),
1187 user.fts_index.as_ref(),
1188 )?,
1189 })),
1190 (Some(default), None) => Ok(Some(default.clone())),
1191 (None, Some(user)) => Ok(Some(user.clone())),
1192 (None, None) => Ok(None),
1193 }
1194 }
1195
1196 fn merge_float_type(
1198 default: Option<&FloatValueType>,
1199 user: Option<&FloatValueType>,
1200 ) -> Result<Option<FloatValueType>, SchemaError> {
1201 match (default, user) {
1202 (Some(default), Some(user)) => Ok(Some(FloatValueType {
1203 float_inverted_index: Self::merge_float_inverted_index_type(
1204 default.float_inverted_index.as_ref(),
1205 user.float_inverted_index.as_ref(),
1206 )?,
1207 })),
1208 (Some(default), None) => Ok(Some(default.clone())),
1209 (None, Some(user)) => Ok(Some(user.clone())),
1210 (None, None) => Ok(None),
1211 }
1212 }
1213
1214 fn merge_int_type(
1216 default: Option<&IntValueType>,
1217 user: Option<&IntValueType>,
1218 ) -> Result<Option<IntValueType>, SchemaError> {
1219 match (default, user) {
1220 (Some(default), Some(user)) => Ok(Some(IntValueType {
1221 int_inverted_index: Self::merge_int_inverted_index_type(
1222 default.int_inverted_index.as_ref(),
1223 user.int_inverted_index.as_ref(),
1224 )?,
1225 })),
1226 (Some(default), None) => Ok(Some(default.clone())),
1227 (None, Some(user)) => Ok(Some(user.clone())),
1228 (None, None) => Ok(None),
1229 }
1230 }
1231
1232 fn merge_bool_type(
1234 default: Option<&BoolValueType>,
1235 user: Option<&BoolValueType>,
1236 ) -> Result<Option<BoolValueType>, SchemaError> {
1237 match (default, user) {
1238 (Some(default), Some(user)) => Ok(Some(BoolValueType {
1239 bool_inverted_index: Self::merge_bool_inverted_index_type(
1240 default.bool_inverted_index.as_ref(),
1241 user.bool_inverted_index.as_ref(),
1242 )?,
1243 })),
1244 (Some(default), None) => Ok(Some(default.clone())),
1245 (None, Some(user)) => Ok(Some(user.clone())),
1246 (None, None) => Ok(None),
1247 }
1248 }
1249
1250 fn merge_float_list_type(
1252 default: Option<&FloatListValueType>,
1253 user: Option<&FloatListValueType>,
1254 knn_index: KnnIndex,
1255 ) -> Option<FloatListValueType> {
1256 match (default, user) {
1257 (Some(default), Some(user)) => Some(FloatListValueType {
1258 vector_index: Self::merge_vector_index_type(
1259 default.vector_index.as_ref(),
1260 user.vector_index.as_ref(),
1261 knn_index,
1262 ),
1263 }),
1264 (Some(default), None) => Some(default.clone()),
1265 (None, Some(user)) => Some(user.clone()),
1266 (None, None) => None,
1267 }
1268 }
1269
1270 fn merge_sparse_vector_type(
1272 default: Option<&SparseVectorValueType>,
1273 user: Option<&SparseVectorValueType>,
1274 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1275 match (default, user) {
1276 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1277 sparse_vector_index: Self::merge_sparse_vector_index_type(
1278 default.sparse_vector_index.as_ref(),
1279 user.sparse_vector_index.as_ref(),
1280 )?,
1281 })),
1282 (Some(default), None) => Ok(Some(default.clone())),
1283 (None, Some(user)) => Ok(Some(user.clone())),
1284 (None, None) => Ok(None),
1285 }
1286 }
1287
1288 fn merge_string_inverted_index_type(
1290 default: Option<&StringInvertedIndexType>,
1291 user: Option<&StringInvertedIndexType>,
1292 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1293 match (default, user) {
1294 (Some(_default), Some(user)) => {
1295 Ok(Some(StringInvertedIndexType {
1296 enabled: user.enabled, config: user.config.clone(), }))
1299 }
1300 (Some(default), None) => Ok(Some(default.clone())),
1301 (None, Some(user)) => Ok(Some(user.clone())),
1302 (None, None) => Ok(None),
1303 }
1304 }
1305
1306 fn merge_fts_index_type(
1307 default: Option<&FtsIndexType>,
1308 user: Option<&FtsIndexType>,
1309 ) -> Result<Option<FtsIndexType>, SchemaError> {
1310 match (default, user) {
1311 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1312 enabled: user.enabled,
1313 config: user.config.clone(),
1314 })),
1315 (Some(default), None) => Ok(Some(default.clone())),
1316 (None, Some(user)) => Ok(Some(user.clone())),
1317 (None, None) => Ok(None),
1318 }
1319 }
1320
1321 fn merge_float_inverted_index_type(
1322 default: Option<&FloatInvertedIndexType>,
1323 user: Option<&FloatInvertedIndexType>,
1324 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1325 match (default, user) {
1326 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1327 enabled: user.enabled,
1328 config: user.config.clone(),
1329 })),
1330 (Some(default), None) => Ok(Some(default.clone())),
1331 (None, Some(user)) => Ok(Some(user.clone())),
1332 (None, None) => Ok(None),
1333 }
1334 }
1335
1336 fn merge_int_inverted_index_type(
1337 default: Option<&IntInvertedIndexType>,
1338 user: Option<&IntInvertedIndexType>,
1339 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1340 match (default, user) {
1341 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1342 enabled: user.enabled,
1343 config: user.config.clone(),
1344 })),
1345 (Some(default), None) => Ok(Some(default.clone())),
1346 (None, Some(user)) => Ok(Some(user.clone())),
1347 (None, None) => Ok(None),
1348 }
1349 }
1350
1351 fn merge_bool_inverted_index_type(
1352 default: Option<&BoolInvertedIndexType>,
1353 user: Option<&BoolInvertedIndexType>,
1354 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1355 match (default, user) {
1356 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1357 enabled: user.enabled,
1358 config: user.config.clone(),
1359 })),
1360 (Some(default), None) => Ok(Some(default.clone())),
1361 (None, Some(user)) => Ok(Some(user.clone())),
1362 (None, None) => Ok(None),
1363 }
1364 }
1365
1366 fn merge_vector_index_type(
1367 default: Option<&VectorIndexType>,
1368 user: Option<&VectorIndexType>,
1369 knn_index: KnnIndex,
1370 ) -> Option<VectorIndexType> {
1371 match (default, user) {
1372 (Some(default), Some(user)) => Some(VectorIndexType {
1373 enabled: user.enabled,
1374 config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1375 }),
1376 (Some(default), None) => Some(default.clone()),
1377 (None, Some(user)) => Some(user.clone()),
1378 (None, None) => None,
1379 }
1380 }
1381
1382 fn merge_sparse_vector_index_type(
1383 default: Option<&SparseVectorIndexType>,
1384 user: Option<&SparseVectorIndexType>,
1385 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1386 match (default, user) {
1387 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1388 enabled: user.enabled,
1389 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1390 })),
1391 (Some(default), None) => Ok(Some(default.clone())),
1392 (None, Some(user)) => Ok(Some(user.clone())),
1393 (None, None) => Ok(None),
1394 }
1395 }
1396
1397 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1400 if let Some(vector_index) = &float_list.vector_index {
1401 if let Some(hnsw) = &vector_index.config.hnsw {
1402 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1403 }
1404 if let Some(spann) = &vector_index.config.spann {
1405 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1406 }
1407 }
1408 Ok(())
1409 }
1410
1411 fn merge_vector_index_config(
1413 default: &VectorIndexConfig,
1414 user: &VectorIndexConfig,
1415 knn_index: KnnIndex,
1416 ) -> VectorIndexConfig {
1417 match knn_index {
1418 KnnIndex::Hnsw => VectorIndexConfig {
1419 space: user.space.clone().or(default.space.clone()),
1420 embedding_function: user
1421 .embedding_function
1422 .clone()
1423 .or(default.embedding_function.clone()),
1424 source_key: user.source_key.clone().or(default.source_key.clone()),
1425 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1426 spann: None,
1427 },
1428 KnnIndex::Spann => VectorIndexConfig {
1429 space: user.space.clone().or(default.space.clone()),
1430 embedding_function: user
1431 .embedding_function
1432 .clone()
1433 .or(default.embedding_function.clone()),
1434 source_key: user.source_key.clone().or(default.source_key.clone()),
1435 hnsw: None,
1436 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1437 },
1438 }
1439 }
1440
1441 fn merge_sparse_vector_index_config(
1443 default: &SparseVectorIndexConfig,
1444 user: &SparseVectorIndexConfig,
1445 ) -> SparseVectorIndexConfig {
1446 SparseVectorIndexConfig {
1447 embedding_function: user
1448 .embedding_function
1449 .clone()
1450 .or(default.embedding_function.clone()),
1451 source_key: user.source_key.clone().or(default.source_key.clone()),
1452 bm25: user.bm25.or(default.bm25),
1453 }
1454 }
1455
1456 fn merge_hnsw_configs(
1458 default_hnsw: Option<&HnswIndexConfig>,
1459 user_hnsw: Option<&HnswIndexConfig>,
1460 ) -> Option<HnswIndexConfig> {
1461 match (default_hnsw, user_hnsw) {
1462 (Some(default), Some(user)) => Some(HnswIndexConfig {
1463 ef_construction: user.ef_construction.or(default.ef_construction),
1464 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1465 ef_search: user.ef_search.or(default.ef_search),
1466 num_threads: user.num_threads.or(default.num_threads),
1467 batch_size: user.batch_size.or(default.batch_size),
1468 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1469 resize_factor: user.resize_factor.or(default.resize_factor),
1470 }),
1471 (Some(default), None) => Some(default.clone()),
1472 (None, Some(user)) => Some(user.clone()),
1473 (None, None) => None,
1474 }
1475 }
1476
1477 fn merge_spann_configs(
1479 default_spann: Option<&SpannIndexConfig>,
1480 user_spann: Option<&SpannIndexConfig>,
1481 ) -> Option<SpannIndexConfig> {
1482 match (default_spann, user_spann) {
1483 (Some(default), Some(user)) => Some(SpannIndexConfig {
1484 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1485 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1486 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1487 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1488 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1489 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1490 split_threshold: user.split_threshold.or(default.split_threshold),
1491 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1492 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1493 reassign_neighbor_count: user
1494 .reassign_neighbor_count
1495 .or(default.reassign_neighbor_count),
1496 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1497 num_centers_to_merge_to: user
1498 .num_centers_to_merge_to
1499 .or(default.num_centers_to_merge_to),
1500 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1501 ef_construction: user.ef_construction.or(default.ef_construction),
1502 ef_search: user.ef_search.or(default.ef_search),
1503 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1504 }),
1505 (Some(default), None) => Some(default.clone()),
1506 (None, Some(user)) => Some(user.clone()),
1507 (None, None) => None,
1508 }
1509 }
1510
1511 pub fn reconcile_with_collection_config(
1519 schema: &Schema,
1520 collection_config: &InternalCollectionConfiguration,
1521 default_knn_index: KnnIndex,
1522 ) -> Result<Schema, SchemaError> {
1523 if collection_config.is_default() {
1525 if schema.is_default() {
1526 let mut new_schema = Schema::new_default(default_knn_index);
1529
1530 if collection_config.embedding_function.is_some() {
1531 if let Some(float_list) = &mut new_schema.defaults.float_list {
1532 if let Some(vector_index) = &mut float_list.vector_index {
1533 vector_index.config.embedding_function =
1534 collection_config.embedding_function.clone();
1535 }
1536 }
1537 if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1538 if let Some(float_list) = &mut embedding_types.float_list {
1539 if let Some(vector_index) = &mut float_list.vector_index {
1540 vector_index.config.embedding_function =
1541 collection_config.embedding_function.clone();
1542 }
1543 }
1544 }
1545 }
1546 return Ok(new_schema);
1547 } else {
1548 return Ok(schema.clone());
1550 }
1551 }
1552
1553 Self::try_from(collection_config)
1556 }
1557
1558 pub fn reconcile_schema_and_config(
1559 schema: Option<&Schema>,
1560 configuration: Option<&InternalCollectionConfiguration>,
1561 knn_index: KnnIndex,
1562 ) -> Result<Schema, SchemaError> {
1563 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1565 if !user_schema.is_default() && !config.is_default() {
1566 return Err(SchemaError::ConfigAndSchemaConflict);
1567 }
1568 }
1569
1570 let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1571 if let Some(config) = configuration {
1572 Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1573 } else {
1574 Ok(reconciled_schema)
1575 }
1576 }
1577
1578 pub fn default_with_embedding_function(
1579 embedding_function: EmbeddingFunctionConfiguration,
1580 ) -> Schema {
1581 let mut schema = Schema::new_default(KnnIndex::Spann);
1582 if let Some(float_list) = &mut schema.defaults.float_list {
1583 if let Some(vector_index) = &mut float_list.vector_index {
1584 vector_index.config.embedding_function = Some(embedding_function.clone());
1585 }
1586 }
1587 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1588 if let Some(float_list) = &mut embedding_types.float_list {
1589 if let Some(vector_index) = &mut float_list.vector_index {
1590 vector_index.config.embedding_function = Some(embedding_function);
1591 }
1592 }
1593 }
1594 schema
1595 }
1596
1597 pub fn is_default(&self) -> bool {
1599 if !Self::is_value_types_default(&self.defaults) {
1601 return false;
1602 }
1603
1604 for key in self.keys.keys() {
1605 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1606 return false;
1607 }
1608 }
1609
1610 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1612 if !Self::is_embedding_value_types_default(embedding_value) {
1613 return false;
1614 }
1615 }
1616
1617 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1619 if !Self::is_document_value_types_default(document_value) {
1620 return false;
1621 }
1622 }
1623
1624 if self.cmek.is_some() {
1626 return false;
1627 }
1628
1629 true
1630 }
1631
1632 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1634 if let Some(string) = &value_types.string {
1636 if let Some(string_inverted) = &string.string_inverted_index {
1637 if !string_inverted.enabled {
1638 return false;
1639 }
1640 }
1642 if let Some(fts) = &string.fts_index {
1643 if fts.enabled {
1644 return false;
1645 }
1646 }
1648 }
1649
1650 if let Some(float) = &value_types.float {
1652 if let Some(float_inverted) = &float.float_inverted_index {
1653 if !float_inverted.enabled {
1654 return false;
1655 }
1656 }
1658 }
1659
1660 if let Some(int) = &value_types.int {
1662 if let Some(int_inverted) = &int.int_inverted_index {
1663 if !int_inverted.enabled {
1664 return false;
1665 }
1666 }
1668 }
1669
1670 if let Some(boolean) = &value_types.boolean {
1672 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1673 if !bool_inverted.enabled {
1674 return false;
1675 }
1676 }
1678 }
1679
1680 if let Some(float_list) = &value_types.float_list {
1682 if let Some(vector_index) = &float_list.vector_index {
1683 if vector_index.enabled {
1684 return false;
1685 }
1686 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1687 return false;
1688 }
1689 if !is_space_default(&vector_index.config.space) {
1690 return false;
1691 }
1692 if vector_index.config.source_key.is_some() {
1694 return false;
1695 }
1696 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1699 (Some(hnsw_config), None) => {
1700 if !hnsw_config.is_default() {
1701 return false;
1702 }
1703 }
1704 (None, Some(spann_config)) => {
1705 if !spann_config.is_default() {
1706 return false;
1707 }
1708 }
1709 (Some(_), Some(_)) => return false, (None, None) => {}
1711 }
1712 }
1713 }
1714
1715 if let Some(sparse_vector) = &value_types.sparse_vector {
1717 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1718 if sparse_index.enabled {
1719 return false;
1720 }
1721 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1723 return false;
1724 }
1725 if sparse_index.config.source_key.is_some() {
1726 return false;
1727 }
1728 if let Some(bm25) = &sparse_index.config.bm25 {
1729 if bm25 != &false {
1730 return false;
1731 }
1732 }
1733 }
1734 }
1735
1736 true
1737 }
1738
1739 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1741 if value_types.string.is_some()
1743 || value_types.float.is_some()
1744 || value_types.int.is_some()
1745 || value_types.boolean.is_some()
1746 || value_types.sparse_vector.is_some()
1747 {
1748 return false;
1749 }
1750
1751 if let Some(float_list) = &value_types.float_list {
1753 if let Some(vector_index) = &float_list.vector_index {
1754 if !vector_index.enabled {
1755 return false;
1756 }
1757 if !is_space_default(&vector_index.config.space) {
1758 return false;
1759 }
1760 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1762 return false;
1763 }
1764 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1766 return false;
1767 }
1768 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1771 (Some(hnsw_config), None) => {
1772 if !hnsw_config.is_default() {
1773 return false;
1774 }
1775 }
1776 (None, Some(spann_config)) => {
1777 if !spann_config.is_default() {
1778 return false;
1779 }
1780 }
1781 (Some(_), Some(_)) => return false, (None, None) => {}
1783 }
1784 }
1785 }
1786
1787 true
1788 }
1789
1790 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1792 if value_types.float_list.is_some()
1794 || value_types.float.is_some()
1795 || value_types.int.is_some()
1796 || value_types.boolean.is_some()
1797 || value_types.sparse_vector.is_some()
1798 {
1799 return false;
1800 }
1801
1802 if let Some(string) = &value_types.string {
1804 if let Some(fts) = &string.fts_index {
1805 if !fts.enabled {
1806 return false;
1807 }
1808 }
1810 if let Some(string_inverted) = &string.string_inverted_index {
1811 if string_inverted.enabled {
1812 return false;
1813 }
1814 }
1816 }
1817
1818 true
1819 }
1820
1821 pub fn is_metadata_type_index_enabled(
1823 &self,
1824 key: &str,
1825 value_type: MetadataValueType,
1826 ) -> Result<bool, SchemaError> {
1827 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1828
1829 match value_type {
1830 MetadataValueType::Bool => match &v_type.boolean {
1831 Some(bool_type) => match &bool_type.bool_inverted_index {
1832 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1833 None => Err(SchemaError::MissingIndexConfiguration {
1834 key: key.to_string(),
1835 value_type: "bool".to_string(),
1836 }),
1837 },
1838 None => match &self.defaults.boolean {
1839 Some(bool_type) => match &bool_type.bool_inverted_index {
1840 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1841 None => Err(SchemaError::MissingIndexConfiguration {
1842 key: key.to_string(),
1843 value_type: "bool".to_string(),
1844 }),
1845 },
1846 None => Err(SchemaError::MissingIndexConfiguration {
1847 key: key.to_string(),
1848 value_type: "bool".to_string(),
1849 }),
1850 },
1851 },
1852 MetadataValueType::Int => match &v_type.int {
1853 Some(int_type) => match &int_type.int_inverted_index {
1854 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1855 None => Err(SchemaError::MissingIndexConfiguration {
1856 key: key.to_string(),
1857 value_type: "int".to_string(),
1858 }),
1859 },
1860 None => match &self.defaults.int {
1861 Some(int_type) => match &int_type.int_inverted_index {
1862 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1863 None => Err(SchemaError::MissingIndexConfiguration {
1864 key: key.to_string(),
1865 value_type: "int".to_string(),
1866 }),
1867 },
1868 None => Err(SchemaError::MissingIndexConfiguration {
1869 key: key.to_string(),
1870 value_type: "int".to_string(),
1871 }),
1872 },
1873 },
1874 MetadataValueType::Float => match &v_type.float {
1875 Some(float_type) => match &float_type.float_inverted_index {
1876 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1877 None => Err(SchemaError::MissingIndexConfiguration {
1878 key: key.to_string(),
1879 value_type: "float".to_string(),
1880 }),
1881 },
1882 None => match &self.defaults.float {
1883 Some(float_type) => match &float_type.float_inverted_index {
1884 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1885 None => Err(SchemaError::MissingIndexConfiguration {
1886 key: key.to_string(),
1887 value_type: "float".to_string(),
1888 }),
1889 },
1890 None => Err(SchemaError::MissingIndexConfiguration {
1891 key: key.to_string(),
1892 value_type: "float".to_string(),
1893 }),
1894 },
1895 },
1896 MetadataValueType::Str => match &v_type.string {
1897 Some(string_type) => match &string_type.string_inverted_index {
1898 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1899 None => Err(SchemaError::MissingIndexConfiguration {
1900 key: key.to_string(),
1901 value_type: "string".to_string(),
1902 }),
1903 },
1904 None => match &self.defaults.string {
1905 Some(string_type) => match &string_type.string_inverted_index {
1906 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1907 None => Err(SchemaError::MissingIndexConfiguration {
1908 key: key.to_string(),
1909 value_type: "string".to_string(),
1910 }),
1911 },
1912 None => Err(SchemaError::MissingIndexConfiguration {
1913 key: key.to_string(),
1914 value_type: "string".to_string(),
1915 }),
1916 },
1917 },
1918 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1919 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1920 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1921 None => Err(SchemaError::MissingIndexConfiguration {
1922 key: key.to_string(),
1923 value_type: "sparse_vector".to_string(),
1924 }),
1925 },
1926 None => match &self.defaults.sparse_vector {
1927 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1928 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1929 None => Err(SchemaError::MissingIndexConfiguration {
1930 key: key.to_string(),
1931 value_type: "sparse_vector".to_string(),
1932 }),
1933 },
1934 None => Err(SchemaError::MissingIndexConfiguration {
1935 key: key.to_string(),
1936 value_type: "sparse_vector".to_string(),
1937 }),
1938 },
1939 },
1940 }
1941 }
1942
1943 pub fn is_metadata_where_indexing_enabled(
1944 &self,
1945 where_clause: &Where,
1946 ) -> Result<(), FilterValidationError> {
1947 match where_clause {
1948 Where::Composite(composite) => {
1949 for child in &composite.children {
1950 self.is_metadata_where_indexing_enabled(child)?;
1951 }
1952 Ok(())
1953 }
1954 Where::Document(_) => Ok(()),
1955 Where::Metadata(expression) => {
1956 let value_type = match &expression.comparison {
1957 MetadataComparison::Primitive(_, value) => value.value_type(),
1958 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1959 };
1960 let is_enabled = self
1961 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1962 .map_err(FilterValidationError::Schema)?;
1963 if !is_enabled {
1964 return Err(FilterValidationError::IndexingDisabled {
1965 key: expression.key.clone(),
1966 value_type,
1967 });
1968 }
1969 Ok(())
1970 }
1971 }
1972 }
1973
1974 pub fn is_knn_key_indexing_enabled(
1975 &self,
1976 key: &str,
1977 query: &QueryVector,
1978 ) -> Result<(), FilterValidationError> {
1979 match query {
1980 QueryVector::Sparse(_) => {
1981 let is_enabled = self
1982 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1983 .map_err(FilterValidationError::Schema)?;
1984 if !is_enabled {
1985 return Err(FilterValidationError::IndexingDisabled {
1986 key: key.to_string(),
1987 value_type: MetadataValueType::SparseVector,
1988 });
1989 }
1990 Ok(())
1991 }
1992 QueryVector::Dense(_) => {
1993 Ok(())
1996 }
1997 }
1998 }
1999
2000 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2001 if key.starts_with(CHROMA_KEY) {
2002 return false;
2003 }
2004 let value_types = self.keys.entry(key.to_string()).or_default();
2005 match value_type {
2006 MetadataValueType::Bool => {
2007 if value_types.boolean.is_none() {
2008 value_types.boolean = self.defaults.boolean.clone();
2009 return true;
2010 }
2011 }
2012 MetadataValueType::Int => {
2013 if value_types.int.is_none() {
2014 value_types.int = self.defaults.int.clone();
2015 return true;
2016 }
2017 }
2018 MetadataValueType::Float => {
2019 if value_types.float.is_none() {
2020 value_types.float = self.defaults.float.clone();
2021 return true;
2022 }
2023 }
2024 MetadataValueType::Str => {
2025 if value_types.string.is_none() {
2026 value_types.string = self.defaults.string.clone();
2027 return true;
2028 }
2029 }
2030 MetadataValueType::SparseVector => {
2031 if value_types.sparse_vector.is_none() {
2032 value_types.sparse_vector = self.defaults.sparse_vector.clone();
2033 return true;
2034 }
2035 }
2036 }
2037 false
2038 }
2039
2040 pub fn create_index(
2080 mut self,
2081 key: Option<&str>,
2082 config: IndexConfig,
2083 ) -> Result<Self, SchemaBuilderError> {
2084 match (&key, &config) {
2086 (None, IndexConfig::Vector(cfg)) => {
2087 self._set_vector_index_config_builder(cfg.clone());
2088 return Ok(self);
2089 }
2090 (None, IndexConfig::Fts(cfg)) => {
2091 self._set_fts_index_config_builder(cfg.clone());
2092 return Ok(self);
2093 }
2094 (Some(k), IndexConfig::Vector(_)) => {
2095 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2096 }
2097 (Some(k), IndexConfig::Fts(_)) => {
2098 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2099 }
2100 _ => {}
2101 }
2102
2103 if let Some(k) = key {
2105 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2106 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2107 key: k.to_string(),
2108 });
2109 }
2110 }
2111
2112 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2114 return Err(SchemaBuilderError::SparseVectorRequiresKey);
2115 }
2116
2117 match key {
2119 Some(k) => self._set_index_for_key_builder(k, config, true)?,
2120 None => self._set_index_in_defaults_builder(config, true)?,
2121 }
2122
2123 Ok(self)
2124 }
2125
2126 pub fn delete_index(
2154 mut self,
2155 key: Option<&str>,
2156 config: IndexConfig,
2157 ) -> Result<Self, SchemaBuilderError> {
2158 if let Some(k) = key {
2160 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2161 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2162 key: k.to_string(),
2163 });
2164 }
2165 }
2166
2167 match &config {
2169 IndexConfig::Vector(_) => {
2170 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2171 }
2172 IndexConfig::Fts(_) => {
2173 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2174 }
2175 IndexConfig::SparseVector(_) => {
2176 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2177 }
2178 _ => {}
2179 }
2180
2181 match key {
2183 Some(k) => self._set_index_for_key_builder(k, config, false)?,
2184 None => self._set_index_in_defaults_builder(config, false)?,
2185 }
2186
2187 Ok(self)
2188 }
2189
2190 pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2208 self.cmek = Some(cmek);
2209 self
2210 }
2211
2212 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2214 if let Some(float_list) = &mut self.defaults.float_list {
2216 if let Some(vector_index) = &mut float_list.vector_index {
2217 vector_index.config = config.clone();
2218 }
2219 }
2220
2221 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2223 if let Some(float_list) = &mut embedding_types.float_list {
2224 if let Some(vector_index) = &mut float_list.vector_index {
2225 let mut updated_config = config;
2226 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2228 vector_index.config = updated_config;
2229 }
2230 }
2231 }
2232 }
2233
2234 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2236 if let Some(string) = &mut self.defaults.string {
2238 if let Some(fts_index) = &mut string.fts_index {
2239 fts_index.config = config.clone();
2240 }
2241 }
2242
2243 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2245 if let Some(string) = &mut document_types.string {
2246 if let Some(fts_index) = &mut string.fts_index {
2247 fts_index.config = config;
2248 }
2249 }
2250 }
2251 }
2252
2253 fn _set_index_for_key_builder(
2255 &mut self,
2256 key: &str,
2257 config: IndexConfig,
2258 enabled: bool,
2259 ) -> Result<(), SchemaBuilderError> {
2260 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2262 let existing_key = self
2264 .keys
2265 .iter()
2266 .find(|(k, v)| {
2267 k.as_str() != key
2268 && v.sparse_vector
2269 .as_ref()
2270 .and_then(|sv| sv.sparse_vector_index.as_ref())
2271 .map(|idx| idx.enabled)
2272 .unwrap_or(false)
2273 })
2274 .map(|(k, _)| k.clone());
2275
2276 if let Some(existing_key) = existing_key {
2277 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2278 }
2279 }
2280
2281 let value_types = self.keys.entry(key.to_string()).or_default();
2283
2284 match config {
2286 IndexConfig::Vector(_) => {
2287 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2288 key: key.to_string(),
2289 });
2290 }
2291 IndexConfig::Fts(_) => {
2292 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2293 key: key.to_string(),
2294 });
2295 }
2296 IndexConfig::SparseVector(cfg) => {
2297 value_types.sparse_vector = Some(SparseVectorValueType {
2298 sparse_vector_index: Some(SparseVectorIndexType {
2299 enabled,
2300 config: cfg,
2301 }),
2302 });
2303 }
2304 IndexConfig::StringInverted(cfg) => {
2305 if value_types.string.is_none() {
2306 value_types.string = Some(StringValueType {
2307 fts_index: None,
2308 string_inverted_index: None,
2309 });
2310 }
2311 if let Some(string) = &mut value_types.string {
2312 string.string_inverted_index = Some(StringInvertedIndexType {
2313 enabled,
2314 config: cfg,
2315 });
2316 }
2317 }
2318 IndexConfig::IntInverted(cfg) => {
2319 value_types.int = Some(IntValueType {
2320 int_inverted_index: Some(IntInvertedIndexType {
2321 enabled,
2322 config: cfg,
2323 }),
2324 });
2325 }
2326 IndexConfig::FloatInverted(cfg) => {
2327 value_types.float = Some(FloatValueType {
2328 float_inverted_index: Some(FloatInvertedIndexType {
2329 enabled,
2330 config: cfg,
2331 }),
2332 });
2333 }
2334 IndexConfig::BoolInverted(cfg) => {
2335 value_types.boolean = Some(BoolValueType {
2336 bool_inverted_index: Some(BoolInvertedIndexType {
2337 enabled,
2338 config: cfg,
2339 }),
2340 });
2341 }
2342 }
2343
2344 Ok(())
2345 }
2346
2347 fn _set_index_in_defaults_builder(
2349 &mut self,
2350 config: IndexConfig,
2351 enabled: bool,
2352 ) -> Result<(), SchemaBuilderError> {
2353 match config {
2354 IndexConfig::Vector(_) => {
2355 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2356 key: "defaults".to_string(),
2357 });
2358 }
2359 IndexConfig::Fts(_) => {
2360 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2361 key: "defaults".to_string(),
2362 });
2363 }
2364 IndexConfig::SparseVector(cfg) => {
2365 self.defaults.sparse_vector = Some(SparseVectorValueType {
2366 sparse_vector_index: Some(SparseVectorIndexType {
2367 enabled,
2368 config: cfg,
2369 }),
2370 });
2371 }
2372 IndexConfig::StringInverted(cfg) => {
2373 if self.defaults.string.is_none() {
2374 self.defaults.string = Some(StringValueType {
2375 fts_index: None,
2376 string_inverted_index: None,
2377 });
2378 }
2379 if let Some(string) = &mut self.defaults.string {
2380 string.string_inverted_index = Some(StringInvertedIndexType {
2381 enabled,
2382 config: cfg,
2383 });
2384 }
2385 }
2386 IndexConfig::IntInverted(cfg) => {
2387 self.defaults.int = Some(IntValueType {
2388 int_inverted_index: Some(IntInvertedIndexType {
2389 enabled,
2390 config: cfg,
2391 }),
2392 });
2393 }
2394 IndexConfig::FloatInverted(cfg) => {
2395 self.defaults.float = Some(FloatValueType {
2396 float_inverted_index: Some(FloatInvertedIndexType {
2397 enabled,
2398 config: cfg,
2399 }),
2400 });
2401 }
2402 IndexConfig::BoolInverted(cfg) => {
2403 self.defaults.boolean = Some(BoolValueType {
2404 bool_inverted_index: Some(BoolInvertedIndexType {
2405 enabled,
2406 config: cfg,
2407 }),
2408 });
2409 }
2410 }
2411
2412 Ok(())
2413 }
2414}
2415
2416#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2421#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2422#[serde(deny_unknown_fields)]
2423pub struct VectorIndexConfig {
2424 #[serde(skip_serializing_if = "Option::is_none")]
2426 pub space: Option<Space>,
2427 #[serde(skip_serializing_if = "Option::is_none")]
2429 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2430 #[serde(skip_serializing_if = "Option::is_none")]
2432 pub source_key: Option<String>,
2433 #[serde(skip_serializing_if = "Option::is_none")]
2435 pub hnsw: Option<HnswIndexConfig>,
2436 #[serde(skip_serializing_if = "Option::is_none")]
2438 pub spann: Option<SpannIndexConfig>,
2439}
2440
2441#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2443#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2444#[serde(deny_unknown_fields)]
2445pub struct HnswIndexConfig {
2446 #[serde(skip_serializing_if = "Option::is_none")]
2447 pub ef_construction: Option<usize>,
2448 #[serde(skip_serializing_if = "Option::is_none")]
2449 pub max_neighbors: Option<usize>,
2450 #[serde(skip_serializing_if = "Option::is_none")]
2451 pub ef_search: Option<usize>,
2452 #[serde(skip_serializing_if = "Option::is_none")]
2453 pub num_threads: Option<usize>,
2454 #[serde(skip_serializing_if = "Option::is_none")]
2455 #[validate(range(min = 2))]
2456 pub batch_size: Option<usize>,
2457 #[serde(skip_serializing_if = "Option::is_none")]
2458 #[validate(range(min = 2))]
2459 pub sync_threshold: Option<usize>,
2460 #[serde(skip_serializing_if = "Option::is_none")]
2461 pub resize_factor: Option<f64>,
2462}
2463
2464impl HnswIndexConfig {
2465 pub fn is_default(&self) -> bool {
2469 if let Some(ef_construction) = self.ef_construction {
2470 if ef_construction != default_construction_ef() {
2471 return false;
2472 }
2473 }
2474 if let Some(max_neighbors) = self.max_neighbors {
2475 if max_neighbors != default_m() {
2476 return false;
2477 }
2478 }
2479 if let Some(ef_search) = self.ef_search {
2480 if ef_search != default_search_ef() {
2481 return false;
2482 }
2483 }
2484 if let Some(batch_size) = self.batch_size {
2485 if batch_size != default_batch_size() {
2486 return false;
2487 }
2488 }
2489 if let Some(sync_threshold) = self.sync_threshold {
2490 if sync_threshold != default_sync_threshold() {
2491 return false;
2492 }
2493 }
2494 if let Some(resize_factor) = self.resize_factor {
2495 if resize_factor != default_resize_factor() {
2496 return false;
2497 }
2498 }
2499 true
2501 }
2502}
2503
2504#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2506#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2507#[serde(deny_unknown_fields)]
2508pub struct SpannIndexConfig {
2509 #[serde(skip_serializing_if = "Option::is_none")]
2510 #[validate(range(max = 128))]
2511 pub search_nprobe: Option<u32>,
2512 #[serde(skip_serializing_if = "Option::is_none")]
2513 #[validate(range(min = 1.0, max = 1.0))]
2514 pub search_rng_factor: Option<f32>,
2515 #[serde(skip_serializing_if = "Option::is_none")]
2516 #[validate(range(min = 5.0, max = 10.0))]
2517 pub search_rng_epsilon: Option<f32>,
2518 #[serde(skip_serializing_if = "Option::is_none")]
2519 #[validate(range(max = 8))]
2520 pub nreplica_count: Option<u32>,
2521 #[serde(skip_serializing_if = "Option::is_none")]
2522 #[validate(range(min = 1.0, max = 1.0))]
2523 pub write_rng_factor: Option<f32>,
2524 #[serde(skip_serializing_if = "Option::is_none")]
2525 #[validate(range(min = 5.0, max = 10.0))]
2526 pub write_rng_epsilon: Option<f32>,
2527 #[serde(skip_serializing_if = "Option::is_none")]
2528 #[validate(range(min = 50, max = 200))]
2529 pub split_threshold: Option<u32>,
2530 #[serde(skip_serializing_if = "Option::is_none")]
2531 #[validate(range(max = 1000))]
2532 pub num_samples_kmeans: Option<usize>,
2533 #[serde(skip_serializing_if = "Option::is_none")]
2534 #[validate(range(min = 100.0, max = 100.0))]
2535 pub initial_lambda: Option<f32>,
2536 #[serde(skip_serializing_if = "Option::is_none")]
2537 #[validate(range(max = 64))]
2538 pub reassign_neighbor_count: Option<u32>,
2539 #[serde(skip_serializing_if = "Option::is_none")]
2540 #[validate(range(min = 25, max = 100))]
2541 pub merge_threshold: Option<u32>,
2542 #[serde(skip_serializing_if = "Option::is_none")]
2543 #[validate(range(max = 8))]
2544 pub num_centers_to_merge_to: Option<u32>,
2545 #[serde(skip_serializing_if = "Option::is_none")]
2546 #[validate(range(max = 64))]
2547 pub write_nprobe: Option<u32>,
2548 #[serde(skip_serializing_if = "Option::is_none")]
2549 #[validate(range(max = 200))]
2550 pub ef_construction: Option<usize>,
2551 #[serde(skip_serializing_if = "Option::is_none")]
2552 #[validate(range(max = 200))]
2553 pub ef_search: Option<usize>,
2554 #[serde(skip_serializing_if = "Option::is_none")]
2555 #[validate(range(max = 64))]
2556 pub max_neighbors: Option<usize>,
2557}
2558
2559impl SpannIndexConfig {
2560 pub fn is_default(&self) -> bool {
2563 if let Some(search_nprobe) = self.search_nprobe {
2564 if search_nprobe != default_search_nprobe() {
2565 return false;
2566 }
2567 }
2568 if let Some(search_rng_factor) = self.search_rng_factor {
2569 if search_rng_factor != default_search_rng_factor() {
2570 return false;
2571 }
2572 }
2573 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2574 if search_rng_epsilon != default_search_rng_epsilon() {
2575 return false;
2576 }
2577 }
2578 if let Some(nreplica_count) = self.nreplica_count {
2579 if nreplica_count != default_nreplica_count() {
2580 return false;
2581 }
2582 }
2583 if let Some(write_rng_factor) = self.write_rng_factor {
2584 if write_rng_factor != default_write_rng_factor() {
2585 return false;
2586 }
2587 }
2588 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2589 if write_rng_epsilon != default_write_rng_epsilon() {
2590 return false;
2591 }
2592 }
2593 if let Some(split_threshold) = self.split_threshold {
2594 if split_threshold != default_split_threshold() {
2595 return false;
2596 }
2597 }
2598 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2599 if num_samples_kmeans != default_num_samples_kmeans() {
2600 return false;
2601 }
2602 }
2603 if let Some(initial_lambda) = self.initial_lambda {
2604 if initial_lambda != default_initial_lambda() {
2605 return false;
2606 }
2607 }
2608 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2609 if reassign_neighbor_count != default_reassign_neighbor_count() {
2610 return false;
2611 }
2612 }
2613 if let Some(merge_threshold) = self.merge_threshold {
2614 if merge_threshold != default_merge_threshold() {
2615 return false;
2616 }
2617 }
2618 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2619 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2620 return false;
2621 }
2622 }
2623 if let Some(write_nprobe) = self.write_nprobe {
2624 if write_nprobe != default_write_nprobe() {
2625 return false;
2626 }
2627 }
2628 if let Some(ef_construction) = self.ef_construction {
2629 if ef_construction != default_construction_ef_spann() {
2630 return false;
2631 }
2632 }
2633 if let Some(ef_search) = self.ef_search {
2634 if ef_search != default_search_ef_spann() {
2635 return false;
2636 }
2637 }
2638 if let Some(max_neighbors) = self.max_neighbors {
2639 if max_neighbors != default_m_spann() {
2640 return false;
2641 }
2642 }
2643 true
2644 }
2645}
2646
2647#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2648#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2649#[serde(deny_unknown_fields)]
2650pub struct SparseVectorIndexConfig {
2651 #[serde(skip_serializing_if = "Option::is_none")]
2653 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2654 #[serde(skip_serializing_if = "Option::is_none")]
2656 pub source_key: Option<String>,
2657 #[serde(skip_serializing_if = "Option::is_none")]
2659 pub bm25: Option<bool>,
2660}
2661
2662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2664#[serde(deny_unknown_fields)]
2665pub struct FtsIndexConfig {
2666 }
2668
2669#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2671#[serde(deny_unknown_fields)]
2672pub struct StringInvertedIndexConfig {
2673 }
2675
2676#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2677#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2678#[serde(deny_unknown_fields)]
2679pub struct IntInvertedIndexConfig {
2680 }
2682
2683#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2684#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2685#[serde(deny_unknown_fields)]
2686pub struct FloatInvertedIndexConfig {
2687 }
2689
2690#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2691#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2692#[serde(deny_unknown_fields)]
2693pub struct BoolInvertedIndexConfig {
2694 }
2696
2697#[derive(Clone, Debug)]
2703pub enum IndexConfig {
2704 Vector(VectorIndexConfig),
2705 SparseVector(SparseVectorIndexConfig),
2706 Fts(FtsIndexConfig),
2707 StringInverted(StringInvertedIndexConfig),
2708 IntInverted(IntInvertedIndexConfig),
2709 FloatInverted(FloatInvertedIndexConfig),
2710 BoolInverted(BoolInvertedIndexConfig),
2711}
2712
2713impl From<VectorIndexConfig> for IndexConfig {
2715 fn from(config: VectorIndexConfig) -> Self {
2716 IndexConfig::Vector(config)
2717 }
2718}
2719
2720impl From<SparseVectorIndexConfig> for IndexConfig {
2721 fn from(config: SparseVectorIndexConfig) -> Self {
2722 IndexConfig::SparseVector(config)
2723 }
2724}
2725
2726impl From<FtsIndexConfig> for IndexConfig {
2727 fn from(config: FtsIndexConfig) -> Self {
2728 IndexConfig::Fts(config)
2729 }
2730}
2731
2732impl From<StringInvertedIndexConfig> for IndexConfig {
2733 fn from(config: StringInvertedIndexConfig) -> Self {
2734 IndexConfig::StringInverted(config)
2735 }
2736}
2737
2738impl From<IntInvertedIndexConfig> for IndexConfig {
2739 fn from(config: IntInvertedIndexConfig) -> Self {
2740 IndexConfig::IntInverted(config)
2741 }
2742}
2743
2744impl From<FloatInvertedIndexConfig> for IndexConfig {
2745 fn from(config: FloatInvertedIndexConfig) -> Self {
2746 IndexConfig::FloatInverted(config)
2747 }
2748}
2749
2750impl From<BoolInvertedIndexConfig> for IndexConfig {
2751 fn from(config: BoolInvertedIndexConfig) -> Self {
2752 IndexConfig::BoolInverted(config)
2753 }
2754}
2755
2756impl TryFrom<&InternalCollectionConfiguration> for Schema {
2757 type Error = SchemaError;
2758
2759 fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2760 let mut schema = match &config.vector_index {
2762 VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2763 VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2764 };
2765 let vector_config = match &config.vector_index {
2767 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2768 space: Some(hnsw_config.space.clone()),
2769 embedding_function: config.embedding_function.clone(),
2770 source_key: None,
2771 hnsw: Some(HnswIndexConfig {
2772 ef_construction: Some(hnsw_config.ef_construction),
2773 max_neighbors: Some(hnsw_config.max_neighbors),
2774 ef_search: Some(hnsw_config.ef_search),
2775 num_threads: Some(hnsw_config.num_threads),
2776 batch_size: Some(hnsw_config.batch_size),
2777 sync_threshold: Some(hnsw_config.sync_threshold),
2778 resize_factor: Some(hnsw_config.resize_factor),
2779 }),
2780 spann: None,
2781 },
2782 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
2783 space: Some(spann_config.space.clone()),
2784 embedding_function: config.embedding_function.clone(),
2785 source_key: None,
2786 hnsw: None,
2787 spann: Some(SpannIndexConfig {
2788 search_nprobe: Some(spann_config.search_nprobe),
2789 search_rng_factor: Some(spann_config.search_rng_factor),
2790 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
2791 nreplica_count: Some(spann_config.nreplica_count),
2792 write_rng_factor: Some(spann_config.write_rng_factor),
2793 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
2794 split_threshold: Some(spann_config.split_threshold),
2795 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
2796 initial_lambda: Some(spann_config.initial_lambda),
2797 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
2798 merge_threshold: Some(spann_config.merge_threshold),
2799 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
2800 write_nprobe: Some(spann_config.write_nprobe),
2801 ef_construction: Some(spann_config.ef_construction),
2802 ef_search: Some(spann_config.ef_search),
2803 max_neighbors: Some(spann_config.max_neighbors),
2804 }),
2805 },
2806 };
2807
2808 if let Some(float_list) = &mut schema.defaults.float_list {
2811 if let Some(vector_index) = &mut float_list.vector_index {
2812 vector_index.config = vector_config.clone();
2813 }
2814 }
2815
2816 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
2820 if let Some(float_list) = &mut embedding_types.float_list {
2821 if let Some(vector_index) = &mut float_list.vector_index {
2822 let mut vector_config = vector_config;
2823 vector_config.source_key = Some(DOCUMENT_KEY.to_string());
2824 vector_index.config = vector_config;
2825 }
2826 }
2827 }
2828
2829 Ok(schema)
2830 }
2831}
2832
2833#[cfg(test)]
2834mod tests {
2835 use super::*;
2836 use crate::hnsw_configuration::Space;
2837 use crate::metadata::SparseVector;
2838 use crate::{
2839 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2840 };
2841 use serde_json::json;
2842
2843 #[test]
2844 fn test_reconcile_with_defaults_none_user_schema() {
2845 let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2847 let expected = Schema::new_default(KnnIndex::Spann);
2848 assert_eq!(result, expected);
2849 }
2850
2851 #[test]
2852 fn test_reconcile_with_defaults_empty_user_schema() {
2853 let user_schema = Schema {
2855 defaults: ValueTypes::default(),
2856 keys: HashMap::new(),
2857 cmek: None,
2858 source_attached_function_id: None,
2859 };
2860
2861 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2862 let expected = Schema::new_default(KnnIndex::Spann);
2863 assert_eq!(result, expected);
2864 }
2865
2866 #[test]
2867 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2868 let mut user_schema = Schema {
2870 defaults: ValueTypes::default(),
2871 keys: HashMap::new(),
2872 cmek: None,
2873 source_attached_function_id: None,
2874 };
2875
2876 user_schema.defaults.string = Some(StringValueType {
2877 string_inverted_index: Some(StringInvertedIndexType {
2878 enabled: false, config: StringInvertedIndexConfig {},
2880 }),
2881 fts_index: None,
2882 });
2883
2884 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2885
2886 assert!(
2888 !result
2889 .defaults
2890 .string
2891 .as_ref()
2892 .unwrap()
2893 .string_inverted_index
2894 .as_ref()
2895 .unwrap()
2896 .enabled
2897 );
2898 assert!(result.defaults.float.is_some());
2900 assert!(result.defaults.int.is_some());
2901 }
2902
2903 #[test]
2904 fn test_reconcile_with_defaults_user_overrides_vector_config() {
2905 let mut user_schema = Schema {
2907 defaults: ValueTypes::default(),
2908 keys: HashMap::new(),
2909 cmek: None,
2910 source_attached_function_id: None,
2911 };
2912
2913 user_schema.defaults.float_list = Some(FloatListValueType {
2914 vector_index: Some(VectorIndexType {
2915 enabled: true, config: VectorIndexConfig {
2917 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
2921 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
2925 batch_size: None,
2926 sync_threshold: None,
2927 resize_factor: None,
2928 }),
2929 spann: None,
2930 },
2931 }),
2932 });
2933
2934 let result = {
2936 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2937 let merged_defaults = Schema::merge_value_types(
2938 &default_schema.defaults,
2939 &user_schema.defaults,
2940 KnnIndex::Hnsw,
2941 )
2942 .unwrap();
2943 let mut merged_keys = default_schema.keys.clone();
2944 for (key, user_value_types) in user_schema.keys {
2945 if let Some(default_value_types) = merged_keys.get(&key) {
2946 let merged_value_types = Schema::merge_value_types(
2947 default_value_types,
2948 &user_value_types,
2949 KnnIndex::Hnsw,
2950 )
2951 .unwrap();
2952 merged_keys.insert(key, merged_value_types);
2953 } else {
2954 merged_keys.insert(key, user_value_types);
2955 }
2956 }
2957 Schema {
2958 defaults: merged_defaults,
2959 keys: merged_keys,
2960 cmek: None,
2961 source_attached_function_id: None,
2962 }
2963 };
2964
2965 let vector_config = &result
2966 .defaults
2967 .float_list
2968 .as_ref()
2969 .unwrap()
2970 .vector_index
2971 .as_ref()
2972 .unwrap()
2973 .config;
2974
2975 assert_eq!(vector_config.space, Some(Space::L2));
2977 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2978 assert_eq!(
2979 vector_config.hnsw.as_ref().unwrap().ef_construction,
2980 Some(500)
2981 );
2982
2983 assert_eq!(vector_config.embedding_function, None);
2985 assert_eq!(
2987 vector_config.hnsw.as_ref().unwrap().max_neighbors,
2988 Some(default_m())
2989 );
2990 }
2991
2992 #[test]
2993 fn test_reconcile_with_defaults_keys() {
2994 let mut user_schema = Schema {
2996 defaults: ValueTypes::default(),
2997 keys: HashMap::new(),
2998 cmek: None,
2999 source_attached_function_id: None,
3000 };
3001
3002 let custom_key_types = ValueTypes {
3004 string: Some(StringValueType {
3005 fts_index: Some(FtsIndexType {
3006 enabled: true,
3007 config: FtsIndexConfig {},
3008 }),
3009 string_inverted_index: Some(StringInvertedIndexType {
3010 enabled: false,
3011 config: StringInvertedIndexConfig {},
3012 }),
3013 }),
3014 ..Default::default()
3015 };
3016 user_schema
3017 .keys
3018 .insert("custom_key".to_string(), custom_key_types);
3019
3020 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3021
3022 assert!(result.keys.contains_key(EMBEDDING_KEY));
3024 assert!(result.keys.contains_key(DOCUMENT_KEY));
3025
3026 assert!(result.keys.contains_key("custom_key"));
3028 let custom_override = result.keys.get("custom_key").unwrap();
3029 assert!(
3030 custom_override
3031 .string
3032 .as_ref()
3033 .unwrap()
3034 .fts_index
3035 .as_ref()
3036 .unwrap()
3037 .enabled
3038 );
3039 }
3040
3041 #[test]
3042 fn test_reconcile_with_defaults_override_existing_key() {
3043 let mut user_schema = Schema {
3045 defaults: ValueTypes::default(),
3046 keys: HashMap::new(),
3047 cmek: None,
3048 source_attached_function_id: None,
3049 };
3050
3051 let embedding_override = ValueTypes {
3053 float_list: Some(FloatListValueType {
3054 vector_index: Some(VectorIndexType {
3055 enabled: false, config: VectorIndexConfig {
3057 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3059 source_key: Some("custom_embedding_key".to_string()),
3060 hnsw: None,
3061 spann: None,
3062 },
3063 }),
3064 }),
3065 ..Default::default()
3066 };
3067 user_schema
3068 .keys
3069 .insert(EMBEDDING_KEY.to_string(), embedding_override);
3070
3071 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3072
3073 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3074 let vector_config = &embedding_config
3075 .float_list
3076 .as_ref()
3077 .unwrap()
3078 .vector_index
3079 .as_ref()
3080 .unwrap();
3081
3082 assert!(!vector_config.enabled);
3084 assert_eq!(vector_config.config.space, Some(Space::Ip));
3085 assert_eq!(
3086 vector_config.config.source_key,
3087 Some("custom_embedding_key".to_string())
3088 );
3089 }
3090
3091 #[test]
3092 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3093 let collection_config = InternalCollectionConfiguration {
3094 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3095 space: Space::Cosine,
3096 ef_construction: 128,
3097 ef_search: 96,
3098 max_neighbors: 42,
3099 num_threads: 8,
3100 resize_factor: 1.5,
3101 sync_threshold: 2_000,
3102 batch_size: 256,
3103 }),
3104 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3105 EmbeddingFunctionNewConfiguration {
3106 name: "custom".to_string(),
3107 config: json!({"alpha": 1}),
3108 },
3109 )),
3110 };
3111
3112 let schema = Schema::try_from(&collection_config).unwrap();
3113 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3114
3115 assert_eq!(reconstructed, collection_config);
3116 }
3117
3118 #[test]
3119 fn test_convert_schema_to_collection_config_spann_roundtrip() {
3120 let spann_config = InternalSpannConfiguration {
3121 space: Space::Cosine,
3122 search_nprobe: 11,
3123 search_rng_factor: 1.7,
3124 write_nprobe: 5,
3125 nreplica_count: 3,
3126 split_threshold: 150,
3127 merge_threshold: 80,
3128 ef_construction: 120,
3129 ef_search: 90,
3130 max_neighbors: 40,
3131 ..Default::default()
3132 };
3133
3134 let collection_config = InternalCollectionConfiguration {
3135 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3136 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3137 EmbeddingFunctionNewConfiguration {
3138 name: "custom".to_string(),
3139 config: json!({"beta": true}),
3140 },
3141 )),
3142 };
3143
3144 let schema = Schema::try_from(&collection_config).unwrap();
3145 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3146
3147 assert_eq!(reconstructed, collection_config);
3148 }
3149
3150 #[test]
3151 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3152 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3153 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3154 if let Some(float_list) = &mut embedding.float_list {
3155 if let Some(vector_index) = &mut float_list.vector_index {
3156 vector_index.config.spann = Some(SpannIndexConfig {
3157 search_nprobe: Some(1),
3158 search_rng_factor: Some(1.0),
3159 search_rng_epsilon: Some(0.1),
3160 nreplica_count: Some(1),
3161 write_rng_factor: Some(1.0),
3162 write_rng_epsilon: Some(0.1),
3163 split_threshold: Some(100),
3164 num_samples_kmeans: Some(10),
3165 initial_lambda: Some(0.5),
3166 reassign_neighbor_count: Some(10),
3167 merge_threshold: Some(50),
3168 num_centers_to_merge_to: Some(3),
3169 write_nprobe: Some(1),
3170 ef_construction: Some(50),
3171 ef_search: Some(40),
3172 max_neighbors: Some(20),
3173 });
3174 }
3175 }
3176 }
3177
3178 let result = InternalCollectionConfiguration::try_from(&schema);
3179 assert!(result.is_err());
3180 }
3181
3182 #[test]
3183 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3184 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3185 let before = schema.clone();
3186 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3187 assert!(!modified);
3188 assert_eq!(schema, before);
3189 }
3190
3191 #[test]
3192 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3193 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3194 assert!(!schema.keys.contains_key("custom_field"));
3195
3196 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3197
3198 assert!(modified);
3199 let entry = schema
3200 .keys
3201 .get("custom_field")
3202 .expect("expected new key override to be inserted");
3203 assert_eq!(entry.boolean, schema.defaults.boolean);
3204 assert!(entry.string.is_none());
3205 assert!(entry.int.is_none());
3206 assert!(entry.float.is_none());
3207 assert!(entry.float_list.is_none());
3208 assert!(entry.sparse_vector.is_none());
3209 }
3210
3211 #[test]
3212 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3213 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3214 let initial_len = schema.keys.len();
3215 schema.keys.insert(
3216 "custom_field".to_string(),
3217 ValueTypes {
3218 string: schema.defaults.string.clone(),
3219 ..Default::default()
3220 },
3221 );
3222
3223 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3224
3225 assert!(modified);
3226 assert_eq!(schema.keys.len(), initial_len + 1);
3227 let entry = schema
3228 .keys
3229 .get("custom_field")
3230 .expect("expected key override to exist after ensure call");
3231 assert!(entry.string.is_some());
3232 assert_eq!(entry.boolean, schema.defaults.boolean);
3233 }
3234
3235 #[test]
3236 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3237 let schema = Schema::new_default(KnnIndex::Spann);
3238 let result = schema.is_knn_key_indexing_enabled(
3239 "custom_sparse",
3240 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3241 );
3242
3243 let err = result.expect_err("expected indexing disabled error");
3244 match err {
3245 FilterValidationError::IndexingDisabled { key, value_type } => {
3246 assert_eq!(key, "custom_sparse");
3247 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3248 }
3249 other => panic!("unexpected error variant: {other:?}"),
3250 }
3251 }
3252
3253 #[test]
3254 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3255 let mut schema = Schema::new_default(KnnIndex::Spann);
3256 schema.keys.insert(
3257 "sparse_enabled".to_string(),
3258 ValueTypes {
3259 sparse_vector: Some(SparseVectorValueType {
3260 sparse_vector_index: Some(SparseVectorIndexType {
3261 enabled: true,
3262 config: SparseVectorIndexConfig {
3263 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3264 source_key: None,
3265 bm25: None,
3266 },
3267 }),
3268 }),
3269 ..Default::default()
3270 },
3271 );
3272
3273 let result = schema.is_knn_key_indexing_enabled(
3274 "sparse_enabled",
3275 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3276 );
3277
3278 assert!(result.is_ok());
3279 }
3280
3281 #[test]
3282 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3283 let schema = Schema::new_default(KnnIndex::Spann);
3284 let result = schema.is_knn_key_indexing_enabled(
3285 EMBEDDING_KEY,
3286 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3287 );
3288
3289 assert!(result.is_ok());
3290 }
3291
3292 #[test]
3293 fn test_merge_hnsw_configs_field_level() {
3294 let default_hnsw = HnswIndexConfig {
3296 ef_construction: Some(200),
3297 max_neighbors: Some(16),
3298 ef_search: Some(10),
3299 num_threads: Some(4),
3300 batch_size: Some(100),
3301 sync_threshold: Some(1000),
3302 resize_factor: Some(1.2),
3303 };
3304
3305 let user_hnsw = HnswIndexConfig {
3306 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
3314
3315 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3316
3317 assert_eq!(result.ef_construction, Some(300));
3319 assert_eq!(result.ef_search, Some(20));
3320 assert_eq!(result.sync_threshold, Some(2000));
3321
3322 assert_eq!(result.max_neighbors, Some(16));
3324 assert_eq!(result.num_threads, Some(4));
3325 assert_eq!(result.batch_size, Some(100));
3326 assert_eq!(result.resize_factor, Some(1.2));
3327 }
3328
3329 #[test]
3330 fn test_merge_spann_configs_field_level() {
3331 let default_spann = SpannIndexConfig {
3333 search_nprobe: Some(10),
3334 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
3337 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
3341 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
3343 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
3346 ef_construction: Some(100),
3347 ef_search: Some(10),
3348 max_neighbors: Some(16),
3349 };
3350
3351 let user_spann = SpannIndexConfig {
3352 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
3357 write_rng_epsilon: None,
3358 split_threshold: Some(150), num_samples_kmeans: None,
3360 initial_lambda: None,
3361 reassign_neighbor_count: None,
3362 merge_threshold: None,
3363 num_centers_to_merge_to: None,
3364 write_nprobe: None,
3365 ef_construction: None,
3366 ef_search: None,
3367 max_neighbors: None,
3368 };
3369
3370 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3371
3372 assert_eq!(result.search_nprobe, Some(20));
3374 assert_eq!(result.search_rng_epsilon, Some(8.0));
3375 assert_eq!(result.split_threshold, Some(150));
3376
3377 assert_eq!(result.search_rng_factor, Some(1.0));
3379 assert_eq!(result.nreplica_count, Some(3));
3380 assert_eq!(result.initial_lambda, Some(100.0));
3381 }
3382
3383 #[test]
3384 fn test_spann_index_config_into_internal_configuration() {
3385 let config = SpannIndexConfig {
3386 search_nprobe: Some(33),
3387 search_rng_factor: Some(1.2),
3388 search_rng_epsilon: None,
3389 nreplica_count: None,
3390 write_rng_factor: Some(1.5),
3391 write_rng_epsilon: None,
3392 split_threshold: Some(75),
3393 num_samples_kmeans: None,
3394 initial_lambda: Some(0.9),
3395 reassign_neighbor_count: Some(40),
3396 merge_threshold: None,
3397 num_centers_to_merge_to: Some(4),
3398 write_nprobe: Some(60),
3399 ef_construction: Some(180),
3400 ef_search: Some(170),
3401 max_neighbors: Some(32),
3402 };
3403
3404 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3405 assert_eq!(with_space.space, Space::Cosine);
3406 assert_eq!(with_space.search_nprobe, 33);
3407 assert_eq!(with_space.search_rng_factor, 1.2);
3408 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3409 assert_eq!(with_space.write_rng_factor, 1.5);
3410 assert_eq!(with_space.write_nprobe, 60);
3411 assert_eq!(with_space.ef_construction, 180);
3412 assert_eq!(with_space.ef_search, 170);
3413 assert_eq!(with_space.max_neighbors, 32);
3414 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3415
3416 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3417 assert_eq!(default_space_config.space, default_space());
3418 }
3419
3420 #[test]
3421 fn test_merge_string_type_combinations() {
3422 let default = StringValueType {
3426 string_inverted_index: Some(StringInvertedIndexType {
3427 enabled: true,
3428 config: StringInvertedIndexConfig {},
3429 }),
3430 fts_index: Some(FtsIndexType {
3431 enabled: false,
3432 config: FtsIndexConfig {},
3433 }),
3434 };
3435
3436 let user = StringValueType {
3437 string_inverted_index: Some(StringInvertedIndexType {
3438 enabled: false, config: StringInvertedIndexConfig {},
3440 }),
3441 fts_index: None, };
3443
3444 let result = Schema::merge_string_type(Some(&default), Some(&user))
3445 .unwrap()
3446 .unwrap();
3447 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3452 .unwrap()
3453 .unwrap();
3454 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3455
3456 let result = Schema::merge_string_type(None, Some(&user))
3458 .unwrap()
3459 .unwrap();
3460 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3461
3462 let result = Schema::merge_string_type(None, None).unwrap();
3464 assert!(result.is_none());
3465 }
3466
3467 #[test]
3468 fn test_merge_vector_index_config_comprehensive() {
3469 let default_config = VectorIndexConfig {
3471 space: Some(Space::Cosine),
3472 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3473 source_key: Some("default_key".to_string()),
3474 hnsw: Some(HnswIndexConfig {
3475 ef_construction: Some(200),
3476 max_neighbors: Some(16),
3477 ef_search: Some(10),
3478 num_threads: Some(4),
3479 batch_size: Some(100),
3480 sync_threshold: Some(1000),
3481 resize_factor: Some(1.2),
3482 }),
3483 spann: None,
3484 };
3485
3486 let user_config = VectorIndexConfig {
3487 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3491 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3495 batch_size: None,
3496 sync_threshold: None,
3497 resize_factor: None,
3498 }),
3499 spann: Some(SpannIndexConfig {
3500 search_nprobe: Some(15),
3501 search_rng_factor: None,
3502 search_rng_epsilon: None,
3503 nreplica_count: None,
3504 write_rng_factor: None,
3505 write_rng_epsilon: None,
3506 split_threshold: None,
3507 num_samples_kmeans: None,
3508 initial_lambda: None,
3509 reassign_neighbor_count: None,
3510 merge_threshold: None,
3511 num_centers_to_merge_to: None,
3512 write_nprobe: None,
3513 ef_construction: None,
3514 ef_search: None,
3515 max_neighbors: None,
3516 }), };
3518
3519 let result =
3520 Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3521
3522 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3525 result.embedding_function,
3526 Some(EmbeddingFunctionConfiguration::Legacy)
3527 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_none());
3536 }
3537
3538 #[test]
3539 fn test_merge_sparse_vector_index_config() {
3540 let default_config = SparseVectorIndexConfig {
3542 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3543 source_key: Some("default_sparse_key".to_string()),
3544 bm25: None,
3545 };
3546
3547 let user_config = SparseVectorIndexConfig {
3548 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
3551 };
3552
3553 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3554
3555 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3557 assert_eq!(
3559 result.embedding_function,
3560 Some(EmbeddingFunctionConfiguration::Legacy)
3561 );
3562 }
3563
3564 #[test]
3565 fn test_complex_nested_merging_scenario() {
3566 let mut user_schema = Schema {
3568 defaults: ValueTypes::default(),
3569 keys: HashMap::new(),
3570 cmek: None,
3571 source_attached_function_id: None,
3572 };
3573
3574 user_schema.defaults.string = Some(StringValueType {
3576 string_inverted_index: Some(StringInvertedIndexType {
3577 enabled: false,
3578 config: StringInvertedIndexConfig {},
3579 }),
3580 fts_index: Some(FtsIndexType {
3581 enabled: true,
3582 config: FtsIndexConfig {},
3583 }),
3584 });
3585
3586 user_schema.defaults.float_list = Some(FloatListValueType {
3587 vector_index: Some(VectorIndexType {
3588 enabled: true,
3589 config: VectorIndexConfig {
3590 space: Some(Space::Ip),
3591 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
3593 hnsw: Some(HnswIndexConfig {
3594 ef_construction: Some(400),
3595 max_neighbors: Some(32),
3596 ef_search: None, num_threads: None,
3598 batch_size: None,
3599 sync_threshold: None,
3600 resize_factor: None,
3601 }),
3602 spann: None,
3603 },
3604 }),
3605 });
3606
3607 let custom_key_override = ValueTypes {
3609 string: Some(StringValueType {
3610 fts_index: Some(FtsIndexType {
3611 enabled: true,
3612 config: FtsIndexConfig {},
3613 }),
3614 string_inverted_index: None,
3615 }),
3616 ..Default::default()
3617 };
3618 user_schema
3619 .keys
3620 .insert("custom_field".to_string(), custom_key_override);
3621
3622 let result = {
3624 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3625 let merged_defaults = Schema::merge_value_types(
3626 &default_schema.defaults,
3627 &user_schema.defaults,
3628 KnnIndex::Hnsw,
3629 )
3630 .unwrap();
3631 let mut merged_keys = default_schema.keys.clone();
3632 for (key, user_value_types) in user_schema.keys {
3633 if let Some(default_value_types) = merged_keys.get(&key) {
3634 let merged_value_types = Schema::merge_value_types(
3635 default_value_types,
3636 &user_value_types,
3637 KnnIndex::Hnsw,
3638 )
3639 .unwrap();
3640 merged_keys.insert(key, merged_value_types);
3641 } else {
3642 merged_keys.insert(key, user_value_types);
3643 }
3644 }
3645 Schema {
3646 defaults: merged_defaults,
3647 keys: merged_keys,
3648 cmek: None,
3649 source_attached_function_id: None,
3650 }
3651 };
3652
3653 assert!(
3657 !result
3658 .defaults
3659 .string
3660 .as_ref()
3661 .unwrap()
3662 .string_inverted_index
3663 .as_ref()
3664 .unwrap()
3665 .enabled
3666 );
3667 assert!(
3668 result
3669 .defaults
3670 .string
3671 .as_ref()
3672 .unwrap()
3673 .fts_index
3674 .as_ref()
3675 .unwrap()
3676 .enabled
3677 );
3678
3679 let vector_config = &result
3680 .defaults
3681 .float_list
3682 .as_ref()
3683 .unwrap()
3684 .vector_index
3685 .as_ref()
3686 .unwrap()
3687 .config;
3688 assert_eq!(vector_config.space, Some(Space::Ip));
3689 assert_eq!(vector_config.embedding_function, None); assert_eq!(
3691 vector_config.source_key,
3692 Some("custom_vector_key".to_string())
3693 );
3694 assert_eq!(
3695 vector_config.hnsw.as_ref().unwrap().ef_construction,
3696 Some(400)
3697 );
3698 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3699 assert_eq!(
3700 vector_config.hnsw.as_ref().unwrap().ef_search,
3701 Some(default_search_ef())
3702 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
3710 assert!(
3711 custom_override
3712 .string
3713 .as_ref()
3714 .unwrap()
3715 .fts_index
3716 .as_ref()
3717 .unwrap()
3718 .enabled
3719 );
3720 assert!(custom_override
3721 .string
3722 .as_ref()
3723 .unwrap()
3724 .string_inverted_index
3725 .is_none());
3726 }
3727
3728 #[test]
3729 fn test_reconcile_with_collection_config_default_config() {
3730 let collection_config = InternalCollectionConfiguration::default_hnsw();
3732 let schema = Schema::try_from(&collection_config).unwrap();
3733
3734 let result =
3735 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3736 .unwrap();
3737 assert_eq!(result, schema);
3738 }
3739
3740 #[test]
3742 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
3743 let collection_config = InternalCollectionConfiguration::default_hnsw();
3744 let schema = Schema::new_default(KnnIndex::Hnsw);
3745 let result =
3746 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3747 .unwrap();
3748
3749 assert!(result.defaults.float_list.is_some());
3751 assert!(result
3752 .defaults
3753 .float_list
3754 .as_ref()
3755 .unwrap()
3756 .vector_index
3757 .as_ref()
3758 .unwrap()
3759 .config
3760 .hnsw
3761 .is_some());
3762 assert!(result
3763 .defaults
3764 .float_list
3765 .as_ref()
3766 .unwrap()
3767 .vector_index
3768 .as_ref()
3769 .unwrap()
3770 .config
3771 .spann
3772 .is_none());
3773 }
3774
3775 #[test]
3776 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
3777 let collection_config = InternalCollectionConfiguration::default_hnsw();
3778 let schema = Schema::new_default(KnnIndex::Hnsw);
3779 let result =
3780 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3781 .unwrap();
3782
3783 assert!(result.defaults.float_list.is_some());
3785 assert!(result
3786 .defaults
3787 .float_list
3788 .as_ref()
3789 .unwrap()
3790 .vector_index
3791 .as_ref()
3792 .unwrap()
3793 .config
3794 .spann
3795 .is_some());
3796 assert!(result
3797 .defaults
3798 .float_list
3799 .as_ref()
3800 .unwrap()
3801 .vector_index
3802 .as_ref()
3803 .unwrap()
3804 .config
3805 .hnsw
3806 .is_none());
3807 }
3808
3809 #[test]
3810 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
3811 let collection_config = InternalCollectionConfiguration::default_hnsw();
3812 let schema = Schema::new_default(KnnIndex::Spann);
3813 let result =
3814 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3815 .unwrap();
3816
3817 assert!(result.defaults.float_list.is_some());
3819 assert!(result
3820 .defaults
3821 .float_list
3822 .as_ref()
3823 .unwrap()
3824 .vector_index
3825 .as_ref()
3826 .unwrap()
3827 .config
3828 .hnsw
3829 .is_some());
3830 assert!(result
3831 .defaults
3832 .float_list
3833 .as_ref()
3834 .unwrap()
3835 .vector_index
3836 .as_ref()
3837 .unwrap()
3838 .config
3839 .spann
3840 .is_none());
3841 }
3842
3843 #[test]
3844 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
3845 let collection_config = InternalCollectionConfiguration::default_hnsw();
3846 let schema = Schema::new_default(KnnIndex::Spann);
3847 let result =
3848 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3849 .unwrap();
3850
3851 assert!(result.defaults.float_list.is_some());
3853 assert!(result
3854 .defaults
3855 .float_list
3856 .as_ref()
3857 .unwrap()
3858 .vector_index
3859 .as_ref()
3860 .unwrap()
3861 .config
3862 .spann
3863 .is_some());
3864 assert!(result
3865 .defaults
3866 .float_list
3867 .as_ref()
3868 .unwrap()
3869 .vector_index
3870 .as_ref()
3871 .unwrap()
3872 .config
3873 .hnsw
3874 .is_none());
3875 }
3876
3877 #[test]
3878 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
3879 let collection_config = InternalCollectionConfiguration::default_spann();
3880 let schema = Schema::new_default(KnnIndex::Spann);
3881 let result =
3882 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3883 .unwrap();
3884
3885 assert!(result.defaults.float_list.is_some());
3887 assert!(result
3888 .defaults
3889 .float_list
3890 .as_ref()
3891 .unwrap()
3892 .vector_index
3893 .as_ref()
3894 .unwrap()
3895 .config
3896 .hnsw
3897 .is_some());
3898 assert!(result
3899 .defaults
3900 .float_list
3901 .as_ref()
3902 .unwrap()
3903 .vector_index
3904 .as_ref()
3905 .unwrap()
3906 .config
3907 .spann
3908 .is_none());
3909 }
3910
3911 #[test]
3912 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
3913 let collection_config = InternalCollectionConfiguration::default_spann();
3914 let schema = Schema::new_default(KnnIndex::Spann);
3915 let result =
3916 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3917 .unwrap();
3918
3919 assert!(result.defaults.float_list.is_some());
3921 assert!(result
3922 .defaults
3923 .float_list
3924 .as_ref()
3925 .unwrap()
3926 .vector_index
3927 .as_ref()
3928 .unwrap()
3929 .config
3930 .spann
3931 .is_some());
3932 assert!(result
3933 .defaults
3934 .float_list
3935 .as_ref()
3936 .unwrap()
3937 .vector_index
3938 .as_ref()
3939 .unwrap()
3940 .config
3941 .hnsw
3942 .is_none());
3943 assert_eq!(
3945 result
3946 .defaults
3947 .float_list
3948 .as_ref()
3949 .unwrap()
3950 .vector_index
3951 .as_ref()
3952 .unwrap()
3953 .config
3954 .source_key,
3955 None
3956 );
3957 }
3958
3959 #[test]
3960 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
3961 let collection_config = InternalCollectionConfiguration::default_spann();
3962 let schema = Schema::new_default(KnnIndex::Hnsw);
3963 let result =
3964 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3965 .unwrap();
3966
3967 assert!(result.defaults.float_list.is_some());
3969 assert!(result
3970 .defaults
3971 .float_list
3972 .as_ref()
3973 .unwrap()
3974 .vector_index
3975 .as_ref()
3976 .unwrap()
3977 .config
3978 .hnsw
3979 .is_some());
3980 assert!(result
3981 .defaults
3982 .float_list
3983 .as_ref()
3984 .unwrap()
3985 .vector_index
3986 .as_ref()
3987 .unwrap()
3988 .config
3989 .spann
3990 .is_none());
3991 }
3992
3993 #[test]
3994 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
3995 let collection_config = InternalCollectionConfiguration::default_spann();
3996 let schema = Schema::new_default(KnnIndex::Hnsw);
3997 let result =
3998 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3999 .unwrap();
4000
4001 assert!(result.defaults.float_list.is_some());
4003 assert!(result
4004 .defaults
4005 .float_list
4006 .as_ref()
4007 .unwrap()
4008 .vector_index
4009 .as_ref()
4010 .unwrap()
4011 .config
4012 .spann
4013 .is_some());
4014 assert!(result
4015 .defaults
4016 .float_list
4017 .as_ref()
4018 .unwrap()
4019 .vector_index
4020 .as_ref()
4021 .unwrap()
4022 .config
4023 .hnsw
4024 .is_none());
4025 }
4026
4027 #[test]
4028 fn test_defaults_source_key_not_document() {
4029 let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4031 let schema_spann = Schema::new_default(KnnIndex::Spann);
4032
4033 let defaults_hnsw = schema_hnsw
4035 .defaults
4036 .float_list
4037 .as_ref()
4038 .unwrap()
4039 .vector_index
4040 .as_ref()
4041 .unwrap();
4042 assert_eq!(defaults_hnsw.config.source_key, None);
4043
4044 let defaults_spann = schema_spann
4046 .defaults
4047 .float_list
4048 .as_ref()
4049 .unwrap()
4050 .vector_index
4051 .as_ref()
4052 .unwrap();
4053 assert_eq!(defaults_spann.config.source_key, None);
4054
4055 let collection_config_hnsw = InternalCollectionConfiguration {
4058 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4059 ef_construction: 300,
4060 max_neighbors: 32,
4061 ef_search: 50,
4062 num_threads: 8,
4063 batch_size: 200,
4064 sync_threshold: 2000,
4065 resize_factor: 1.5,
4066 space: Space::L2,
4067 }),
4068 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4069 };
4070 let result_hnsw = Schema::reconcile_with_collection_config(
4071 &schema_hnsw,
4072 &collection_config_hnsw,
4073 KnnIndex::Hnsw,
4074 )
4075 .unwrap();
4076 let reconciled_defaults_hnsw = result_hnsw
4077 .defaults
4078 .float_list
4079 .as_ref()
4080 .unwrap()
4081 .vector_index
4082 .as_ref()
4083 .unwrap();
4084 assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4085
4086 let collection_config_spann = InternalCollectionConfiguration {
4087 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4088 search_nprobe: 20,
4089 search_rng_factor: 3.0,
4090 search_rng_epsilon: 0.2,
4091 nreplica_count: 5,
4092 write_rng_factor: 2.0,
4093 write_rng_epsilon: 0.1,
4094 split_threshold: 2000,
4095 num_samples_kmeans: 200,
4096 initial_lambda: 0.8,
4097 reassign_neighbor_count: 100,
4098 merge_threshold: 800,
4099 num_centers_to_merge_to: 20,
4100 write_nprobe: 10,
4101 ef_construction: 400,
4102 ef_search: 60,
4103 max_neighbors: 24,
4104 space: Space::Cosine,
4105 }),
4106 embedding_function: None,
4107 };
4108 let result_spann = Schema::reconcile_with_collection_config(
4109 &schema_spann,
4110 &collection_config_spann,
4111 KnnIndex::Spann,
4112 )
4113 .unwrap();
4114 let reconciled_defaults_spann = result_spann
4115 .defaults
4116 .float_list
4117 .as_ref()
4118 .unwrap()
4119 .vector_index
4120 .as_ref()
4121 .unwrap();
4122 assert_eq!(reconciled_defaults_spann.config.source_key, None);
4123
4124 let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4126 let embedding_vector_index_hnsw = embedding_hnsw
4127 .float_list
4128 .as_ref()
4129 .unwrap()
4130 .vector_index
4131 .as_ref()
4132 .unwrap();
4133 assert_eq!(
4134 embedding_vector_index_hnsw.config.source_key,
4135 Some(DOCUMENT_KEY.to_string())
4136 );
4137
4138 let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4139 let embedding_vector_index_spann = embedding_spann
4140 .float_list
4141 .as_ref()
4142 .unwrap()
4143 .vector_index
4144 .as_ref()
4145 .unwrap();
4146 assert_eq!(
4147 embedding_vector_index_spann.config.source_key,
4148 Some(DOCUMENT_KEY.to_string())
4149 );
4150 }
4151
4152 #[test]
4153 fn test_try_from_source_key() {
4154 let collection_config_hnsw = InternalCollectionConfiguration {
4159 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4160 ef_construction: 300,
4161 max_neighbors: 32,
4162 ef_search: 50,
4163 num_threads: 8,
4164 batch_size: 200,
4165 sync_threshold: 2000,
4166 resize_factor: 1.5,
4167 space: Space::L2,
4168 }),
4169 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4170 };
4171 let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4172
4173 let defaults_hnsw = schema_hnsw
4175 .defaults
4176 .float_list
4177 .as_ref()
4178 .unwrap()
4179 .vector_index
4180 .as_ref()
4181 .unwrap();
4182 assert_eq!(defaults_hnsw.config.source_key, None);
4183
4184 let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4186 let embedding_vector_index_hnsw = embedding_hnsw
4187 .float_list
4188 .as_ref()
4189 .unwrap()
4190 .vector_index
4191 .as_ref()
4192 .unwrap();
4193 assert_eq!(
4194 embedding_vector_index_hnsw.config.source_key,
4195 Some(DOCUMENT_KEY.to_string())
4196 );
4197
4198 let collection_config_spann = InternalCollectionConfiguration {
4200 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4201 search_nprobe: 20,
4202 search_rng_factor: 3.0,
4203 search_rng_epsilon: 0.2,
4204 nreplica_count: 5,
4205 write_rng_factor: 2.0,
4206 write_rng_epsilon: 0.1,
4207 split_threshold: 2000,
4208 num_samples_kmeans: 200,
4209 initial_lambda: 0.8,
4210 reassign_neighbor_count: 100,
4211 merge_threshold: 800,
4212 num_centers_to_merge_to: 20,
4213 write_nprobe: 10,
4214 ef_construction: 400,
4215 ef_search: 60,
4216 max_neighbors: 24,
4217 space: Space::Cosine,
4218 }),
4219 embedding_function: None,
4220 };
4221 let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4222
4223 let defaults_spann = schema_spann
4225 .defaults
4226 .float_list
4227 .as_ref()
4228 .unwrap()
4229 .vector_index
4230 .as_ref()
4231 .unwrap();
4232 assert_eq!(defaults_spann.config.source_key, None);
4233
4234 let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4236 let embedding_vector_index_spann = embedding_spann
4237 .float_list
4238 .as_ref()
4239 .unwrap()
4240 .vector_index
4241 .as_ref()
4242 .unwrap();
4243 assert_eq!(
4244 embedding_vector_index_spann.config.source_key,
4245 Some(DOCUMENT_KEY.to_string())
4246 );
4247 }
4248
4249 #[test]
4250 fn test_default_hnsw_with_default_embedding_function() {
4251 use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4255
4256 let collection_config = InternalCollectionConfiguration {
4257 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4258 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4259 EmbeddingFunctionNewConfiguration {
4260 name: "default".to_string(),
4261 config: serde_json::json!({}),
4262 },
4263 )),
4264 };
4265
4266 assert!(collection_config.is_default());
4268
4269 let schema = Schema::new_default(KnnIndex::Hnsw);
4270 let result =
4271 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4272 .unwrap();
4273
4274 let defaults = result
4276 .defaults
4277 .float_list
4278 .as_ref()
4279 .unwrap()
4280 .vector_index
4281 .as_ref()
4282 .unwrap();
4283 assert_eq!(defaults.config.source_key, None);
4284
4285 let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4287 let embedding_vector_index = embedding
4288 .float_list
4289 .as_ref()
4290 .unwrap()
4291 .vector_index
4292 .as_ref()
4293 .unwrap();
4294 assert_eq!(
4295 embedding_vector_index.config.source_key,
4296 Some(DOCUMENT_KEY.to_string())
4297 );
4298
4299 let vector_index_config = defaults.config.clone();
4301 assert!(vector_index_config.spann.is_some());
4302 assert!(vector_index_config.hnsw.is_none());
4303
4304 assert_eq!(
4306 embedding_vector_index.config.embedding_function,
4307 Some(EmbeddingFunctionConfiguration::Known(
4308 EmbeddingFunctionNewConfiguration {
4309 name: "default".to_string(),
4310 config: serde_json::json!({}),
4311 },
4312 ))
4313 );
4314 assert_eq!(
4315 defaults.config.embedding_function,
4316 Some(EmbeddingFunctionConfiguration::Known(
4317 EmbeddingFunctionNewConfiguration {
4318 name: "default".to_string(),
4319 config: serde_json::json!({}),
4320 },
4321 ))
4322 );
4323 }
4324
4325 #[test]
4326 fn test_reconcile_with_collection_config_both_non_default() {
4327 let mut schema = Schema::new_default(KnnIndex::Hnsw);
4329 schema.defaults.string = Some(StringValueType {
4330 fts_index: Some(FtsIndexType {
4331 enabled: true,
4332 config: FtsIndexConfig {},
4333 }),
4334 string_inverted_index: None,
4335 });
4336
4337 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4338 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4340 {
4341 hnsw_config.ef_construction = 500; }
4343
4344 let result = Schema::reconcile_schema_and_config(
4346 Some(&schema),
4347 Some(&collection_config),
4348 KnnIndex::Spann,
4349 );
4350 assert!(result.is_err());
4351 assert!(matches!(
4352 result.unwrap_err(),
4353 SchemaError::ConfigAndSchemaConflict
4354 ));
4355 }
4356
4357 #[test]
4358 fn test_reconcile_with_collection_config_hnsw_override() {
4359 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
4363 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4364 ef_construction: 300,
4365 max_neighbors: 32,
4366 ef_search: 50,
4367 num_threads: 8,
4368 batch_size: 200,
4369 sync_threshold: 2000,
4370 resize_factor: 1.5,
4371 space: Space::L2,
4372 }),
4373 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4374 };
4375
4376 let result =
4377 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4378 .unwrap();
4379
4380 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4382 let vector_index = embedding_override
4383 .float_list
4384 .as_ref()
4385 .unwrap()
4386 .vector_index
4387 .as_ref()
4388 .unwrap();
4389
4390 assert!(vector_index.enabled);
4391 assert_eq!(vector_index.config.space, Some(Space::L2));
4392 assert_eq!(
4393 vector_index.config.embedding_function,
4394 Some(EmbeddingFunctionConfiguration::Legacy)
4395 );
4396 assert_eq!(
4397 vector_index.config.source_key,
4398 Some(DOCUMENT_KEY.to_string())
4399 );
4400
4401 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4402 assert_eq!(hnsw_config.ef_construction, Some(300));
4403 assert_eq!(hnsw_config.max_neighbors, Some(32));
4404 assert_eq!(hnsw_config.ef_search, Some(50));
4405 assert_eq!(hnsw_config.num_threads, Some(8));
4406 assert_eq!(hnsw_config.batch_size, Some(200));
4407 assert_eq!(hnsw_config.sync_threshold, Some(2000));
4408 assert_eq!(hnsw_config.resize_factor, Some(1.5));
4409
4410 assert!(vector_index.config.spann.is_none());
4411 }
4412
4413 #[test]
4414 fn test_reconcile_with_collection_config_spann_override() {
4415 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
4419 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4420 search_nprobe: 20,
4421 search_rng_factor: 3.0,
4422 search_rng_epsilon: 0.2,
4423 nreplica_count: 5,
4424 write_rng_factor: 2.0,
4425 write_rng_epsilon: 0.1,
4426 split_threshold: 2000,
4427 num_samples_kmeans: 200,
4428 initial_lambda: 0.8,
4429 reassign_neighbor_count: 100,
4430 merge_threshold: 800,
4431 num_centers_to_merge_to: 20,
4432 write_nprobe: 10,
4433 ef_construction: 400,
4434 ef_search: 60,
4435 max_neighbors: 24,
4436 space: Space::Cosine,
4437 }),
4438 embedding_function: None,
4439 };
4440
4441 let result =
4442 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4443 .unwrap();
4444
4445 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4447 let vector_index = embedding_override
4448 .float_list
4449 .as_ref()
4450 .unwrap()
4451 .vector_index
4452 .as_ref()
4453 .unwrap();
4454
4455 assert!(vector_index.enabled);
4456 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4457 assert_eq!(vector_index.config.embedding_function, None);
4458 assert_eq!(
4459 vector_index.config.source_key,
4460 Some(DOCUMENT_KEY.to_string())
4461 );
4462
4463 assert!(vector_index.config.hnsw.is_none());
4464
4465 let spann_config = vector_index.config.spann.as_ref().unwrap();
4466 assert_eq!(spann_config.search_nprobe, Some(20));
4467 assert_eq!(spann_config.search_rng_factor, Some(3.0));
4468 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4469 assert_eq!(spann_config.nreplica_count, Some(5));
4470 assert_eq!(spann_config.write_rng_factor, Some(2.0));
4471 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4472 assert_eq!(spann_config.split_threshold, Some(2000));
4473 assert_eq!(spann_config.num_samples_kmeans, Some(200));
4474 assert_eq!(spann_config.initial_lambda, Some(0.8));
4475 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4476 assert_eq!(spann_config.merge_threshold, Some(800));
4477 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4478 assert_eq!(spann_config.write_nprobe, Some(10));
4479 assert_eq!(spann_config.ef_construction, Some(400));
4480 assert_eq!(spann_config.ef_search, Some(60));
4481 assert_eq!(spann_config.max_neighbors, Some(24));
4482 }
4483
4484 #[test]
4485 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4486 let schema = Schema::new_default(KnnIndex::Hnsw);
4489
4490 let collection_config = InternalCollectionConfiguration {
4491 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4492 ef_construction: 300,
4493 max_neighbors: 32,
4494 ef_search: 50,
4495 num_threads: 8,
4496 batch_size: 200,
4497 sync_threshold: 2000,
4498 resize_factor: 1.5,
4499 space: Space::L2,
4500 }),
4501 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4502 };
4503
4504 let result =
4505 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4506 .unwrap();
4507
4508 let defaults_vector_index = result
4510 .defaults
4511 .float_list
4512 .as_ref()
4513 .unwrap()
4514 .vector_index
4515 .as_ref()
4516 .unwrap();
4517
4518 assert!(!defaults_vector_index.enabled);
4520 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4522 assert_eq!(
4523 defaults_vector_index.config.embedding_function,
4524 Some(EmbeddingFunctionConfiguration::Legacy)
4525 );
4526 assert_eq!(defaults_vector_index.config.source_key, None);
4527 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4528 assert_eq!(defaults_hnsw.ef_construction, Some(300));
4529 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4530
4531 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4533 let embedding_vector_index = embedding_override
4534 .float_list
4535 .as_ref()
4536 .unwrap()
4537 .vector_index
4538 .as_ref()
4539 .unwrap();
4540
4541 assert!(embedding_vector_index.enabled);
4543 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4545 assert_eq!(
4546 embedding_vector_index.config.embedding_function,
4547 Some(EmbeddingFunctionConfiguration::Legacy)
4548 );
4549 assert_eq!(
4550 embedding_vector_index.config.source_key,
4551 Some(DOCUMENT_KEY.to_string())
4552 );
4553 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4554 assert_eq!(embedding_hnsw.ef_construction, Some(300));
4555 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4556 }
4557
4558 #[test]
4559 fn test_is_schema_default() {
4560 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4562 assert!(default_hnsw_schema.is_default());
4563
4564 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4565 assert!(default_spann_schema.is_default());
4566
4567 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4569 if let Some(ref mut string_type) = modified_schema.defaults.string {
4571 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4572 string_inverted.enabled = false; }
4574 }
4575 assert!(!modified_schema.is_default());
4576
4577 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4579 schema_with_extra_overrides
4580 .keys
4581 .insert("custom_key".to_string(), ValueTypes::default());
4582 assert!(!schema_with_extra_overrides.is_default());
4583 }
4584
4585 #[test]
4586 fn test_is_schema_default_with_space() {
4587 let schema = Schema::new_default(KnnIndex::Hnsw);
4588 assert!(schema.is_default());
4589
4590 let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4591 if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4592 if let Some(ref mut vector_index) = float_list.vector_index {
4593 vector_index.config.space = Some(Space::Cosine);
4594 }
4595 }
4596 assert!(!schema_with_space.is_default());
4597
4598 let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4599 if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4600 .keys
4601 .get_mut(EMBEDDING_KEY)
4602 {
4603 if let Some(ref mut float_list) = embedding_key.float_list {
4604 if let Some(ref mut vector_index) = float_list.vector_index {
4605 vector_index.config.space = Some(Space::Cosine);
4606 }
4607 }
4608 }
4609 assert!(!schema_with_space_in_embedding_key.is_default());
4610 }
4611
4612 #[test]
4613 fn test_is_schema_default_with_embedding_function() {
4614 let schema = Schema::new_default(KnnIndex::Hnsw);
4615 assert!(schema.is_default());
4616
4617 let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4618 if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4619 if let Some(ref mut vector_index) = float_list.vector_index {
4620 vector_index.config.embedding_function =
4621 Some(EmbeddingFunctionConfiguration::Legacy);
4622 }
4623 }
4624 assert!(!schema_with_embedding_function.is_default());
4625
4626 let mut schema_with_embedding_function_in_embedding_key =
4627 Schema::new_default(KnnIndex::Spann);
4628 if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4629 .keys
4630 .get_mut(EMBEDDING_KEY)
4631 {
4632 if let Some(ref mut float_list) = embedding_key.float_list {
4633 if let Some(ref mut vector_index) = float_list.vector_index {
4634 vector_index.config.embedding_function =
4635 Some(EmbeddingFunctionConfiguration::Legacy);
4636 }
4637 }
4638 }
4639 assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4640 }
4641
4642 #[test]
4643 fn test_add_merges_keys_by_value_type() {
4644 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4645 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4646
4647 let string_override = ValueTypes {
4648 string: Some(StringValueType {
4649 string_inverted_index: Some(StringInvertedIndexType {
4650 enabled: true,
4651 config: StringInvertedIndexConfig {},
4652 }),
4653 fts_index: None,
4654 }),
4655 ..Default::default()
4656 };
4657 schema_a
4658 .keys
4659 .insert("custom_field".to_string(), string_override);
4660
4661 let float_override = ValueTypes {
4662 float: Some(FloatValueType {
4663 float_inverted_index: Some(FloatInvertedIndexType {
4664 enabled: true,
4665 config: FloatInvertedIndexConfig {},
4666 }),
4667 }),
4668 ..Default::default()
4669 };
4670 schema_b
4671 .keys
4672 .insert("custom_field".to_string(), float_override);
4673
4674 let merged = schema_a.merge(&schema_b).unwrap();
4675 let merged_override = merged.keys.get("custom_field").unwrap();
4676
4677 assert!(merged_override.string.is_some());
4678 assert!(merged_override.float.is_some());
4679 assert!(
4680 merged_override
4681 .string
4682 .as_ref()
4683 .unwrap()
4684 .string_inverted_index
4685 .as_ref()
4686 .unwrap()
4687 .enabled
4688 );
4689 assert!(
4690 merged_override
4691 .float
4692 .as_ref()
4693 .unwrap()
4694 .float_inverted_index
4695 .as_ref()
4696 .unwrap()
4697 .enabled
4698 );
4699 }
4700
4701 #[test]
4702 fn test_add_rejects_different_defaults() {
4703 let schema_a = Schema::new_default(KnnIndex::Hnsw);
4704 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4705
4706 if let Some(string_type) = schema_b.defaults.string.as_mut() {
4707 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
4708 string_index.enabled = false;
4709 }
4710 }
4711
4712 let err = schema_a.merge(&schema_b).unwrap_err();
4713 assert!(matches!(err, SchemaError::DefaultsMismatch));
4714 }
4715
4716 #[test]
4717 fn test_add_detects_conflicting_value_type_configuration() {
4718 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4719 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4720
4721 let string_override_enabled = ValueTypes {
4722 string: Some(StringValueType {
4723 string_inverted_index: Some(StringInvertedIndexType {
4724 enabled: true,
4725 config: StringInvertedIndexConfig {},
4726 }),
4727 fts_index: None,
4728 }),
4729 ..Default::default()
4730 };
4731 schema_a
4732 .keys
4733 .insert("custom_field".to_string(), string_override_enabled);
4734
4735 let string_override_disabled = ValueTypes {
4736 string: Some(StringValueType {
4737 string_inverted_index: Some(StringInvertedIndexType {
4738 enabled: false,
4739 config: StringInvertedIndexConfig {},
4740 }),
4741 fts_index: None,
4742 }),
4743 ..Default::default()
4744 };
4745 schema_b
4746 .keys
4747 .insert("custom_field".to_string(), string_override_disabled);
4748
4749 let err = schema_a.merge(&schema_b).unwrap_err();
4750 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
4751 }
4752
4753 #[test]
4755 fn test_backward_compatibility_aliases() {
4756 let old_format_json = r###"{
4758 "defaults": {
4759 "#string": {
4760 "$fts_index": {
4761 "enabled": true,
4762 "config": {}
4763 }
4764 },
4765 "#int": {
4766 "$int_inverted_index": {
4767 "enabled": true,
4768 "config": {}
4769 }
4770 },
4771 "#float_list": {
4772 "$vector_index": {
4773 "enabled": true,
4774 "config": {
4775 "spann": {
4776 "search_nprobe": 10
4777 }
4778 }
4779 }
4780 }
4781 },
4782 "key_overrides": {
4783 "#document": {
4784 "#string": {
4785 "$fts_index": {
4786 "enabled": false,
4787 "config": {}
4788 }
4789 }
4790 }
4791 }
4792 }"###;
4793
4794 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
4795
4796 let new_format_json = r###"{
4798 "defaults": {
4799 "string": {
4800 "fts_index": {
4801 "enabled": true,
4802 "config": {}
4803 }
4804 },
4805 "int": {
4806 "int_inverted_index": {
4807 "enabled": true,
4808 "config": {}
4809 }
4810 },
4811 "float_list": {
4812 "vector_index": {
4813 "enabled": true,
4814 "config": {
4815 "spann": {
4816 "search_nprobe": 10
4817 }
4818 }
4819 }
4820 }
4821 },
4822 "keys": {
4823 "#document": {
4824 "string": {
4825 "fts_index": {
4826 "enabled": false,
4827 "config": {}
4828 }
4829 }
4830 }
4831 }
4832 }"###;
4833
4834 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4835
4836 assert_eq!(schema_from_old, schema_from_new);
4838
4839 assert!(schema_from_old.defaults.string.is_some());
4841 assert!(schema_from_old
4842 .defaults
4843 .string
4844 .as_ref()
4845 .unwrap()
4846 .fts_index
4847 .is_some());
4848 assert!(
4849 schema_from_old
4850 .defaults
4851 .string
4852 .as_ref()
4853 .unwrap()
4854 .fts_index
4855 .as_ref()
4856 .unwrap()
4857 .enabled
4858 );
4859
4860 assert!(schema_from_old.defaults.int.is_some());
4861 assert!(schema_from_old
4862 .defaults
4863 .int
4864 .as_ref()
4865 .unwrap()
4866 .int_inverted_index
4867 .is_some());
4868
4869 assert!(schema_from_old.defaults.float_list.is_some());
4870 assert!(schema_from_old
4871 .defaults
4872 .float_list
4873 .as_ref()
4874 .unwrap()
4875 .vector_index
4876 .is_some());
4877
4878 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4879 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4880 assert!(doc_override.string.is_some());
4881 assert!(
4882 !doc_override
4883 .string
4884 .as_ref()
4885 .unwrap()
4886 .fts_index
4887 .as_ref()
4888 .unwrap()
4889 .enabled
4890 );
4891
4892 let serialized = serde_json::to_string(&schema_from_old).unwrap();
4894
4895 assert!(serialized.contains(r#""keys":"#));
4897 assert!(serialized.contains(r#""string":"#));
4898 assert!(serialized.contains(r#""fts_index":"#));
4899 assert!(serialized.contains(r#""int_inverted_index":"#));
4900 assert!(serialized.contains(r#""vector_index":"#));
4901
4902 assert!(!serialized.contains(r#""key_overrides":"#));
4904 assert!(!serialized.contains(r###""#string":"###));
4905 assert!(!serialized.contains(r###""$fts_index":"###));
4906 assert!(!serialized.contains(r###""$int_inverted_index":"###));
4907 assert!(!serialized.contains(r###""$vector_index":"###));
4908 }
4909
4910 #[test]
4911 fn test_hnsw_index_config_validation() {
4912 use validator::Validate;
4913
4914 let valid_config = HnswIndexConfig {
4916 batch_size: Some(10),
4917 sync_threshold: Some(100),
4918 ef_construction: Some(100),
4919 max_neighbors: Some(16),
4920 ..Default::default()
4921 };
4922 assert!(valid_config.validate().is_ok());
4923
4924 let invalid_batch_size = HnswIndexConfig {
4926 batch_size: Some(1),
4927 ..Default::default()
4928 };
4929 assert!(invalid_batch_size.validate().is_err());
4930
4931 let invalid_sync_threshold = HnswIndexConfig {
4933 sync_threshold: Some(1),
4934 ..Default::default()
4935 };
4936 assert!(invalid_sync_threshold.validate().is_err());
4937
4938 let boundary_config = HnswIndexConfig {
4940 batch_size: Some(2),
4941 sync_threshold: Some(2),
4942 ..Default::default()
4943 };
4944 assert!(boundary_config.validate().is_ok());
4945
4946 let all_none_config = HnswIndexConfig {
4948 ..Default::default()
4949 };
4950 assert!(all_none_config.validate().is_ok());
4951
4952 let other_fields_config = HnswIndexConfig {
4954 ef_construction: Some(1),
4955 max_neighbors: Some(1),
4956 ef_search: Some(1),
4957 num_threads: Some(1),
4958 resize_factor: Some(0.1),
4959 ..Default::default()
4960 };
4961 assert!(other_fields_config.validate().is_ok());
4962 }
4963
4964 #[test]
4965 fn test_spann_index_config_validation() {
4966 use validator::Validate;
4967
4968 let valid_config = SpannIndexConfig {
4970 write_nprobe: Some(32),
4971 nreplica_count: Some(4),
4972 split_threshold: Some(100),
4973 merge_threshold: Some(50),
4974 reassign_neighbor_count: Some(32),
4975 num_centers_to_merge_to: Some(4),
4976 ef_construction: Some(100),
4977 ef_search: Some(100),
4978 max_neighbors: Some(32),
4979 search_rng_factor: Some(1.0),
4980 write_rng_factor: Some(1.0),
4981 search_rng_epsilon: Some(7.5),
4982 write_rng_epsilon: Some(7.5),
4983 ..Default::default()
4984 };
4985 assert!(valid_config.validate().is_ok());
4986
4987 let invalid_write_nprobe = SpannIndexConfig {
4989 write_nprobe: Some(200),
4990 ..Default::default()
4991 };
4992 assert!(invalid_write_nprobe.validate().is_err());
4993
4994 let invalid_split_threshold = SpannIndexConfig {
4996 split_threshold: Some(10),
4997 ..Default::default()
4998 };
4999 assert!(invalid_split_threshold.validate().is_err());
5000
5001 let invalid_split_threshold_high = SpannIndexConfig {
5003 split_threshold: Some(250),
5004 ..Default::default()
5005 };
5006 assert!(invalid_split_threshold_high.validate().is_err());
5007
5008 let invalid_nreplica = SpannIndexConfig {
5010 nreplica_count: Some(10),
5011 ..Default::default()
5012 };
5013 assert!(invalid_nreplica.validate().is_err());
5014
5015 let invalid_reassign = SpannIndexConfig {
5017 reassign_neighbor_count: Some(100),
5018 ..Default::default()
5019 };
5020 assert!(invalid_reassign.validate().is_err());
5021
5022 let invalid_merge_threshold_low = SpannIndexConfig {
5024 merge_threshold: Some(5),
5025 ..Default::default()
5026 };
5027 assert!(invalid_merge_threshold_low.validate().is_err());
5028
5029 let invalid_merge_threshold_high = SpannIndexConfig {
5030 merge_threshold: Some(150),
5031 ..Default::default()
5032 };
5033 assert!(invalid_merge_threshold_high.validate().is_err());
5034
5035 let invalid_num_centers = SpannIndexConfig {
5037 num_centers_to_merge_to: Some(10),
5038 ..Default::default()
5039 };
5040 assert!(invalid_num_centers.validate().is_err());
5041
5042 let invalid_ef_construction = SpannIndexConfig {
5044 ef_construction: Some(300),
5045 ..Default::default()
5046 };
5047 assert!(invalid_ef_construction.validate().is_err());
5048
5049 let invalid_ef_search = SpannIndexConfig {
5051 ef_search: Some(300),
5052 ..Default::default()
5053 };
5054 assert!(invalid_ef_search.validate().is_err());
5055
5056 let invalid_max_neighbors = SpannIndexConfig {
5058 max_neighbors: Some(100),
5059 ..Default::default()
5060 };
5061 assert!(invalid_max_neighbors.validate().is_err());
5062
5063 let invalid_search_nprobe = SpannIndexConfig {
5065 search_nprobe: Some(200),
5066 ..Default::default()
5067 };
5068 assert!(invalid_search_nprobe.validate().is_err());
5069
5070 let invalid_search_rng_factor_low = SpannIndexConfig {
5072 search_rng_factor: Some(0.9),
5073 ..Default::default()
5074 };
5075 assert!(invalid_search_rng_factor_low.validate().is_err());
5076
5077 let invalid_search_rng_factor_high = SpannIndexConfig {
5078 search_rng_factor: Some(1.1),
5079 ..Default::default()
5080 };
5081 assert!(invalid_search_rng_factor_high.validate().is_err());
5082
5083 let valid_search_rng_factor = SpannIndexConfig {
5085 search_rng_factor: Some(1.0),
5086 ..Default::default()
5087 };
5088 assert!(valid_search_rng_factor.validate().is_ok());
5089
5090 let invalid_search_rng_epsilon_low = SpannIndexConfig {
5092 search_rng_epsilon: Some(4.0),
5093 ..Default::default()
5094 };
5095 assert!(invalid_search_rng_epsilon_low.validate().is_err());
5096
5097 let invalid_search_rng_epsilon_high = SpannIndexConfig {
5098 search_rng_epsilon: Some(11.0),
5099 ..Default::default()
5100 };
5101 assert!(invalid_search_rng_epsilon_high.validate().is_err());
5102
5103 let valid_search_rng_epsilon = SpannIndexConfig {
5105 search_rng_epsilon: Some(7.5),
5106 ..Default::default()
5107 };
5108 assert!(valid_search_rng_epsilon.validate().is_ok());
5109
5110 let invalid_write_rng_factor_low = SpannIndexConfig {
5112 write_rng_factor: Some(0.9),
5113 ..Default::default()
5114 };
5115 assert!(invalid_write_rng_factor_low.validate().is_err());
5116
5117 let invalid_write_rng_factor_high = SpannIndexConfig {
5118 write_rng_factor: Some(1.1),
5119 ..Default::default()
5120 };
5121 assert!(invalid_write_rng_factor_high.validate().is_err());
5122
5123 let valid_write_rng_factor = SpannIndexConfig {
5125 write_rng_factor: Some(1.0),
5126 ..Default::default()
5127 };
5128 assert!(valid_write_rng_factor.validate().is_ok());
5129
5130 let invalid_write_rng_epsilon_low = SpannIndexConfig {
5132 write_rng_epsilon: Some(4.0),
5133 ..Default::default()
5134 };
5135 assert!(invalid_write_rng_epsilon_low.validate().is_err());
5136
5137 let invalid_write_rng_epsilon_high = SpannIndexConfig {
5138 write_rng_epsilon: Some(11.0),
5139 ..Default::default()
5140 };
5141 assert!(invalid_write_rng_epsilon_high.validate().is_err());
5142
5143 let valid_write_rng_epsilon = SpannIndexConfig {
5145 write_rng_epsilon: Some(7.5),
5146 ..Default::default()
5147 };
5148 assert!(valid_write_rng_epsilon.validate().is_ok());
5149
5150 let invalid_num_samples_kmeans = SpannIndexConfig {
5152 num_samples_kmeans: Some(1500),
5153 ..Default::default()
5154 };
5155 assert!(invalid_num_samples_kmeans.validate().is_err());
5156
5157 let valid_num_samples_kmeans = SpannIndexConfig {
5159 num_samples_kmeans: Some(500),
5160 ..Default::default()
5161 };
5162 assert!(valid_num_samples_kmeans.validate().is_ok());
5163
5164 let invalid_initial_lambda_high = SpannIndexConfig {
5166 initial_lambda: Some(150.0),
5167 ..Default::default()
5168 };
5169 assert!(invalid_initial_lambda_high.validate().is_err());
5170
5171 let invalid_initial_lambda_low = SpannIndexConfig {
5172 initial_lambda: Some(50.0),
5173 ..Default::default()
5174 };
5175 assert!(invalid_initial_lambda_low.validate().is_err());
5176
5177 let valid_initial_lambda = SpannIndexConfig {
5179 initial_lambda: Some(100.0),
5180 ..Default::default()
5181 };
5182 assert!(valid_initial_lambda.validate().is_ok());
5183
5184 let all_none_config = SpannIndexConfig {
5186 ..Default::default()
5187 };
5188 assert!(all_none_config.validate().is_ok());
5189 }
5190
5191 #[test]
5192 fn test_builder_pattern_crud_workflow() {
5193 let schema = Schema::new_default(KnnIndex::Hnsw)
5197 .create_index(
5198 None,
5199 IndexConfig::Vector(VectorIndexConfig {
5200 space: Some(Space::Cosine),
5201 embedding_function: None,
5202 source_key: None,
5203 hnsw: Some(HnswIndexConfig {
5204 ef_construction: Some(200),
5205 max_neighbors: Some(32),
5206 ef_search: Some(50),
5207 num_threads: None,
5208 batch_size: None,
5209 sync_threshold: None,
5210 resize_factor: None,
5211 }),
5212 spann: None,
5213 }),
5214 )
5215 .expect("vector config should succeed")
5216 .create_index(
5217 Some("category"),
5218 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5219 )
5220 .expect("string inverted on key should succeed")
5221 .create_index(
5222 Some("year"),
5223 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5224 )
5225 .expect("int inverted on key should succeed")
5226 .create_index(
5227 Some("rating"),
5228 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5229 )
5230 .expect("float inverted on key should succeed")
5231 .create_index(
5232 Some("is_active"),
5233 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5234 )
5235 .expect("bool inverted on key should succeed");
5236
5237 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5240 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5241 assert!(embedding.float_list.is_some());
5242 let vector_index = embedding
5243 .float_list
5244 .as_ref()
5245 .unwrap()
5246 .vector_index
5247 .as_ref()
5248 .unwrap();
5249 assert!(vector_index.enabled);
5250 assert_eq!(vector_index.config.space, Some(Space::Cosine));
5251 assert_eq!(
5252 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5253 Some(200)
5254 );
5255
5256 assert!(schema.keys.contains_key("category"));
5258 assert!(schema.keys.contains_key("year"));
5259 assert!(schema.keys.contains_key("rating"));
5260 assert!(schema.keys.contains_key("is_active"));
5261
5262 let category = schema.keys.get("category").unwrap();
5264 assert!(category.string.is_some());
5265 let string_idx = category
5266 .string
5267 .as_ref()
5268 .unwrap()
5269 .string_inverted_index
5270 .as_ref()
5271 .unwrap();
5272 assert!(string_idx.enabled);
5273
5274 let year = schema.keys.get("year").unwrap();
5276 assert!(year.int.is_some());
5277 let int_idx = year
5278 .int
5279 .as_ref()
5280 .unwrap()
5281 .int_inverted_index
5282 .as_ref()
5283 .unwrap();
5284 assert!(int_idx.enabled);
5285
5286 let schema = schema
5288 .delete_index(
5289 Some("category"),
5290 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5291 )
5292 .expect("delete string inverted should succeed")
5293 .delete_index(
5294 Some("year"),
5295 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5296 )
5297 .expect("delete int inverted should succeed");
5298
5299 let category = schema.keys.get("category").unwrap();
5301 let string_idx = category
5302 .string
5303 .as_ref()
5304 .unwrap()
5305 .string_inverted_index
5306 .as_ref()
5307 .unwrap();
5308 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
5311 let int_idx = year
5312 .int
5313 .as_ref()
5314 .unwrap()
5315 .int_inverted_index
5316 .as_ref()
5317 .unwrap();
5318 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
5322 let float_idx = rating
5323 .float
5324 .as_ref()
5325 .unwrap()
5326 .float_inverted_index
5327 .as_ref()
5328 .unwrap();
5329 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
5332 let bool_idx = is_active
5333 .boolean
5334 .as_ref()
5335 .unwrap()
5336 .bool_inverted_index
5337 .as_ref()
5338 .unwrap();
5339 assert!(bool_idx.enabled); }
5341
5342 #[test]
5343 fn test_builder_create_index_validation_errors() {
5344 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5351 Some("my_vectors"),
5352 IndexConfig::Vector(VectorIndexConfig {
5353 space: Some(Space::L2),
5354 embedding_function: None,
5355 source_key: None,
5356 hnsw: None,
5357 spann: None,
5358 }),
5359 );
5360 assert!(result.is_err());
5361 assert!(matches!(
5362 result.unwrap_err(),
5363 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5364 ));
5365
5366 let result = Schema::new_default(KnnIndex::Hnsw)
5368 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5369 assert!(result.is_err());
5370 assert!(matches!(
5371 result.unwrap_err(),
5372 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5373 ));
5374
5375 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5377 Some(DOCUMENT_KEY),
5378 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5379 );
5380 assert!(result.is_err());
5381 assert!(matches!(
5382 result.unwrap_err(),
5383 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5384 ));
5385
5386 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5388 Some(EMBEDDING_KEY),
5389 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5390 );
5391 assert!(result.is_err());
5392 assert!(matches!(
5393 result.unwrap_err(),
5394 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5395 ));
5396
5397 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5399 None,
5400 IndexConfig::SparseVector(SparseVectorIndexConfig {
5401 embedding_function: None,
5402 source_key: None,
5403 bm25: None,
5404 }),
5405 );
5406 assert!(result.is_err());
5407 assert!(matches!(
5408 result.unwrap_err(),
5409 SchemaBuilderError::SparseVectorRequiresKey
5410 ));
5411
5412 let result = Schema::new_default(KnnIndex::Hnsw)
5414 .create_index(
5415 Some("sparse1"),
5416 IndexConfig::SparseVector(SparseVectorIndexConfig {
5417 embedding_function: None,
5418 source_key: None,
5419 bm25: None,
5420 }),
5421 )
5422 .expect("first sparse should succeed")
5423 .create_index(
5424 Some("sparse2"),
5425 IndexConfig::SparseVector(SparseVectorIndexConfig {
5426 embedding_function: None,
5427 source_key: None,
5428 bm25: None,
5429 }),
5430 );
5431 assert!(result.is_err());
5432 assert!(matches!(
5433 result.unwrap_err(),
5434 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5435 ));
5436 }
5437
5438 #[test]
5439 fn test_builder_delete_index_validation_errors() {
5440 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5446 Some(EMBEDDING_KEY),
5447 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5448 );
5449 assert!(result.is_err());
5450 assert!(matches!(
5451 result.unwrap_err(),
5452 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5453 ));
5454
5455 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5457 Some(DOCUMENT_KEY),
5458 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5459 );
5460 assert!(result.is_err());
5461 assert!(matches!(
5462 result.unwrap_err(),
5463 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5464 ));
5465
5466 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5468 None,
5469 IndexConfig::Vector(VectorIndexConfig {
5470 space: None,
5471 embedding_function: None,
5472 source_key: None,
5473 hnsw: None,
5474 spann: None,
5475 }),
5476 );
5477 assert!(result.is_err());
5478 assert!(matches!(
5479 result.unwrap_err(),
5480 SchemaBuilderError::VectorIndexDeletionNotSupported
5481 ));
5482
5483 let result = Schema::new_default(KnnIndex::Hnsw)
5485 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5486 assert!(result.is_err());
5487 assert!(matches!(
5488 result.unwrap_err(),
5489 SchemaBuilderError::FtsIndexDeletionNotSupported
5490 ));
5491
5492 let result = Schema::new_default(KnnIndex::Hnsw)
5494 .create_index(
5495 Some("sparse"),
5496 IndexConfig::SparseVector(SparseVectorIndexConfig {
5497 embedding_function: None,
5498 source_key: None,
5499 bm25: None,
5500 }),
5501 )
5502 .expect("create should succeed")
5503 .delete_index(
5504 Some("sparse"),
5505 IndexConfig::SparseVector(SparseVectorIndexConfig {
5506 embedding_function: None,
5507 source_key: None,
5508 bm25: None,
5509 }),
5510 );
5511 assert!(result.is_err());
5512 assert!(matches!(
5513 result.unwrap_err(),
5514 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5515 ));
5516 }
5517
5518 #[test]
5519 fn test_builder_pattern_chaining() {
5520 let schema = Schema::new_default(KnnIndex::Hnsw)
5522 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5523 .unwrap()
5524 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5525 .unwrap()
5526 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5527 .unwrap()
5528 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5529 .unwrap()
5530 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5531 .unwrap()
5532 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5533 .unwrap();
5534
5535 assert!(
5537 schema
5538 .keys
5539 .get("tag1")
5540 .unwrap()
5541 .string
5542 .as_ref()
5543 .unwrap()
5544 .string_inverted_index
5545 .as_ref()
5546 .unwrap()
5547 .enabled
5548 );
5549
5550 assert!(
5552 !schema
5553 .keys
5554 .get("tag2")
5555 .unwrap()
5556 .string
5557 .as_ref()
5558 .unwrap()
5559 .string_inverted_index
5560 .as_ref()
5561 .unwrap()
5562 .enabled
5563 );
5564
5565 assert!(
5567 schema
5568 .keys
5569 .get("tag3")
5570 .unwrap()
5571 .string
5572 .as_ref()
5573 .unwrap()
5574 .string_inverted_index
5575 .as_ref()
5576 .unwrap()
5577 .enabled
5578 );
5579
5580 assert!(
5582 schema
5583 .keys
5584 .get("count")
5585 .unwrap()
5586 .int
5587 .as_ref()
5588 .unwrap()
5589 .int_inverted_index
5590 .as_ref()
5591 .unwrap()
5592 .enabled
5593 );
5594
5595 assert!(
5597 schema
5598 .keys
5599 .get("score")
5600 .unwrap()
5601 .float
5602 .as_ref()
5603 .unwrap()
5604 .float_inverted_index
5605 .as_ref()
5606 .unwrap()
5607 .enabled
5608 );
5609 }
5610
5611 #[test]
5612 fn test_schema_default_matches_python() {
5613 let schema = Schema::default();
5615
5616 assert!(schema.defaults.string.is_some());
5622 let string = schema.defaults.string.as_ref().unwrap();
5623 assert!(!string.fts_index.as_ref().unwrap().enabled);
5624 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5625
5626 assert!(schema.defaults.float_list.is_some());
5628 let float_list = schema.defaults.float_list.as_ref().unwrap();
5629 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5630 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5631 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
5635
5636 assert!(schema.defaults.sparse_vector.is_some());
5638 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5639 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5640
5641 assert!(schema.defaults.int.is_some());
5643 assert!(
5644 schema
5645 .defaults
5646 .int
5647 .as_ref()
5648 .unwrap()
5649 .int_inverted_index
5650 .as_ref()
5651 .unwrap()
5652 .enabled
5653 );
5654
5655 assert!(schema.defaults.float.is_some());
5657 assert!(
5658 schema
5659 .defaults
5660 .float
5661 .as_ref()
5662 .unwrap()
5663 .float_inverted_index
5664 .as_ref()
5665 .unwrap()
5666 .enabled
5667 );
5668
5669 assert!(schema.defaults.boolean.is_some());
5671 assert!(
5672 schema
5673 .defaults
5674 .boolean
5675 .as_ref()
5676 .unwrap()
5677 .bool_inverted_index
5678 .as_ref()
5679 .unwrap()
5680 .enabled
5681 );
5682
5683 assert!(schema.keys.contains_key(DOCUMENT_KEY));
5689 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
5690 assert!(doc.string.is_some());
5691 assert!(
5692 doc.string
5693 .as_ref()
5694 .unwrap()
5695 .fts_index
5696 .as_ref()
5697 .unwrap()
5698 .enabled
5699 );
5700 assert!(
5701 !doc.string
5702 .as_ref()
5703 .unwrap()
5704 .string_inverted_index
5705 .as_ref()
5706 .unwrap()
5707 .enabled
5708 );
5709
5710 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5712 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5713 assert!(embedding.float_list.is_some());
5714 let vec_idx = embedding
5715 .float_list
5716 .as_ref()
5717 .unwrap()
5718 .vector_index
5719 .as_ref()
5720 .unwrap();
5721 assert!(vec_idx.enabled);
5722 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
5723 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
5729 }
5730
5731 #[test]
5732 fn test_schema_default_works_with_builder() {
5733 let schema = Schema::default()
5735 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
5736 .expect("should succeed");
5737
5738 assert!(schema.keys.contains_key("category"));
5740 assert!(schema.keys.contains_key(DOCUMENT_KEY));
5741 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5742 assert_eq!(schema.keys.len(), 3);
5743 }
5744
5745 #[cfg(feature = "testing")]
5746 mod proptests {
5747 use super::*;
5748 use crate::strategies::{
5749 embedding_function_strategy, internal_collection_configuration_strategy,
5750 internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
5751 knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
5752 };
5753 use crate::{
5754 HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
5755 };
5756 use proptest::prelude::*;
5757 use proptest::strategy::BoxedStrategy;
5758 use proptest::string::string_regex;
5759 use serde_json::json;
5760
5761 fn default_embedding_function_strategy(
5762 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5763 proptest::option::of(prop_oneof![
5764 Just(EmbeddingFunctionConfiguration::Unknown),
5765 Just(EmbeddingFunctionConfiguration::Known(
5766 EmbeddingFunctionNewConfiguration {
5767 name: "default".to_string(),
5768 config: json!({ "alpha": 1 }),
5769 }
5770 )),
5771 ])
5772 }
5773
5774 fn sparse_embedding_function_strategy(
5775 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5776 let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
5777 EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
5778 name,
5779 config: json!({ "alpha": 1 }),
5780 })
5781 });
5782
5783 proptest::option::of(prop_oneof![
5784 Just(EmbeddingFunctionConfiguration::Unknown),
5785 known_strategy,
5786 ])
5787 }
5788
5789 fn non_default_internal_collection_configuration_strategy(
5790 ) -> impl Strategy<Value = InternalCollectionConfiguration> {
5791 internal_collection_configuration_strategy()
5792 .prop_filter("non-default configuration", |config| !config.is_default())
5793 }
5794
5795 fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
5796 (
5797 proptest::option::of(1usize..=512),
5798 proptest::option::of(1usize..=128),
5799 proptest::option::of(1usize..=512),
5800 proptest::option::of(1usize..=64),
5801 proptest::option::of(2usize..=4096),
5802 proptest::option::of(2usize..=4096),
5803 proptest::option::of(prop_oneof![
5804 Just(0.5f64),
5805 Just(1.0f64),
5806 Just(1.5f64),
5807 Just(2.0f64)
5808 ]),
5809 )
5810 .prop_map(
5811 |(
5812 ef_construction,
5813 max_neighbors,
5814 ef_search,
5815 num_threads,
5816 batch_size,
5817 sync_threshold,
5818 resize_factor,
5819 )| HnswIndexConfig {
5820 ef_construction,
5821 max_neighbors,
5822 ef_search,
5823 num_threads,
5824 batch_size,
5825 sync_threshold,
5826 resize_factor,
5827 },
5828 )
5829 }
5830
5831 fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
5832 let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
5833 (
5834 (
5835 proptest::option::of(1u32..=128), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy.clone()), proptest::option::of(1u32..=8), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy), proptest::option::of(50u32..=200), proptest::option::of(1usize..=1000), ),
5844 (
5845 proptest::option::of(Just(100.0f32)), proptest::option::of(1u32..=64), proptest::option::of(25u32..=100), proptest::option::of(1u32..=8), proptest::option::of(1u32..=64), proptest::option::of(1usize..=200), proptest::option::of(1usize..=200), proptest::option::of(1usize..=64), ),
5854 )
5855 .prop_map(
5856 |(
5857 (
5858 search_nprobe,
5859 search_rng_factor,
5860 search_rng_epsilon,
5861 nreplica_count,
5862 write_rng_factor,
5863 write_rng_epsilon,
5864 split_threshold,
5865 num_samples_kmeans,
5866 ),
5867 (
5868 initial_lambda,
5869 reassign_neighbor_count,
5870 merge_threshold,
5871 num_centers_to_merge_to,
5872 write_nprobe,
5873 ef_construction,
5874 ef_search,
5875 max_neighbors,
5876 ),
5877 )| SpannIndexConfig {
5878 search_nprobe,
5879 search_rng_factor,
5880 search_rng_epsilon,
5881 nreplica_count,
5882 write_rng_factor,
5883 write_rng_epsilon,
5884 split_threshold,
5885 num_samples_kmeans,
5886 initial_lambda,
5887 reassign_neighbor_count,
5888 merge_threshold,
5889 num_centers_to_merge_to,
5890 write_nprobe,
5891 ef_construction,
5892 ef_search,
5893 max_neighbors,
5894 },
5895 )
5896 }
5897
5898 proptest! {
5899 #[test]
5900 fn merge_hnsw_configs_preserves_user_overrides(
5901 base in partial_hnsw_index_config_strategy(),
5902 user in partial_hnsw_index_config_strategy(),
5903 ) {
5904 let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
5905 .expect("merge should return Some when both are Some");
5906
5907 if user.ef_construction.is_some() {
5909 prop_assert_eq!(merged.ef_construction, user.ef_construction);
5910 }
5911 if user.max_neighbors.is_some() {
5912 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5913 }
5914 if user.ef_search.is_some() {
5915 prop_assert_eq!(merged.ef_search, user.ef_search);
5916 }
5917 if user.num_threads.is_some() {
5918 prop_assert_eq!(merged.num_threads, user.num_threads);
5919 }
5920 if user.batch_size.is_some() {
5921 prop_assert_eq!(merged.batch_size, user.batch_size);
5922 }
5923 if user.sync_threshold.is_some() {
5924 prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
5925 }
5926 if user.resize_factor.is_some() {
5927 prop_assert_eq!(merged.resize_factor, user.resize_factor);
5928 }
5929 }
5930
5931 #[test]
5932 fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
5933 base in partial_hnsw_index_config_strategy(),
5934 ) {
5935 let merged = Schema::merge_hnsw_configs(Some(&base), None)
5936 .expect("merge should return Some when base is Some");
5937
5938 prop_assert_eq!(merged, base);
5940 }
5941
5942 #[test]
5943 fn merge_hnsw_configs_returns_user_when_base_is_none(
5944 user in partial_hnsw_index_config_strategy(),
5945 ) {
5946 let merged = Schema::merge_hnsw_configs(None, Some(&user))
5947 .expect("merge should return Some when user is Some");
5948
5949 prop_assert_eq!(merged, user);
5951 }
5952
5953 #[test]
5954 fn merge_spann_configs_preserves_user_overrides(
5955 base in partial_spann_index_config_strategy(),
5956 user in partial_spann_index_config_strategy(),
5957 ) {
5958 let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
5959 .expect("merge should return Some when both are Some");
5960
5961 if user.search_nprobe.is_some() {
5963 prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
5964 }
5965 if user.search_rng_epsilon.is_some() {
5966 prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
5967 }
5968 if user.split_threshold.is_some() {
5969 prop_assert_eq!(merged.split_threshold, user.split_threshold);
5970 }
5971 if user.ef_construction.is_some() {
5972 prop_assert_eq!(merged.ef_construction, user.ef_construction);
5973 }
5974 if user.ef_search.is_some() {
5975 prop_assert_eq!(merged.ef_search, user.ef_search);
5976 }
5977 if user.max_neighbors.is_some() {
5978 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5979 }
5980 }
5981
5982 #[test]
5983 fn merge_spann_configs_falls_back_to_base_when_user_is_none(
5984 base in partial_spann_index_config_strategy(),
5985 ) {
5986 let merged = Schema::merge_spann_configs(Some(&base), None)
5987 .expect("merge should return Some when base is Some");
5988
5989 prop_assert_eq!(merged, base);
5991 }
5992
5993 #[test]
5994 fn merge_vector_index_config_preserves_user_overrides(
5995 base in vector_index_config_strategy(),
5996 user in vector_index_config_strategy(),
5997 knn in knn_index_strategy(),
5998 ) {
5999 let merged = Schema::merge_vector_index_config(&base, &user, knn);
6000
6001 if user.space.is_some() {
6003 prop_assert_eq!(merged.space, user.space);
6004 }
6005 if user.embedding_function.is_some() {
6006 prop_assert_eq!(merged.embedding_function, user.embedding_function);
6007 }
6008 if user.source_key.is_some() {
6009 prop_assert_eq!(merged.source_key, user.source_key);
6010 }
6011
6012 match knn {
6014 KnnIndex::Hnsw => {
6015 if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6016 let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6017 if user_hnsw.ef_construction.is_some() {
6018 prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6019 }
6020 }
6021 }
6022 KnnIndex::Spann => {
6023 if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6024 let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6025 if user_spann.search_nprobe.is_some() {
6026 prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6027 }
6028 }
6029 }
6030 }
6031 }
6032 }
6033
6034 fn expected_vector_index_config(
6035 config: &InternalCollectionConfiguration,
6036 ) -> VectorIndexConfig {
6037 match &config.vector_index {
6038 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6039 space: Some(hnsw_config.space.clone()),
6040 embedding_function: config.embedding_function.clone(),
6041 source_key: None,
6042 hnsw: Some(HnswIndexConfig {
6043 ef_construction: Some(hnsw_config.ef_construction),
6044 max_neighbors: Some(hnsw_config.max_neighbors),
6045 ef_search: Some(hnsw_config.ef_search),
6046 num_threads: Some(hnsw_config.num_threads),
6047 batch_size: Some(hnsw_config.batch_size),
6048 sync_threshold: Some(hnsw_config.sync_threshold),
6049 resize_factor: Some(hnsw_config.resize_factor),
6050 }),
6051 spann: None,
6052 },
6053 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6054 space: Some(spann_config.space.clone()),
6055 embedding_function: config.embedding_function.clone(),
6056 source_key: None,
6057 hnsw: None,
6058 spann: Some(SpannIndexConfig {
6059 search_nprobe: Some(spann_config.search_nprobe),
6060 search_rng_factor: Some(spann_config.search_rng_factor),
6061 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6062 nreplica_count: Some(spann_config.nreplica_count),
6063 write_rng_factor: Some(spann_config.write_rng_factor),
6064 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6065 split_threshold: Some(spann_config.split_threshold),
6066 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6067 initial_lambda: Some(spann_config.initial_lambda),
6068 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6069 merge_threshold: Some(spann_config.merge_threshold),
6070 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6071 write_nprobe: Some(spann_config.write_nprobe),
6072 ef_construction: Some(spann_config.ef_construction),
6073 ef_search: Some(spann_config.ef_search),
6074 max_neighbors: Some(spann_config.max_neighbors),
6075 }),
6076 },
6077 }
6078 }
6079
6080 fn non_special_key_strategy() -> BoxedStrategy<String> {
6081 string_regex(TEST_NAME_PATTERN)
6082 .unwrap()
6083 .prop_filter("exclude special keys", |key| {
6084 key != DOCUMENT_KEY && key != EMBEDDING_KEY
6085 })
6086 .boxed()
6087 }
6088
6089 fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6090 proptest::option::of(prop_oneof![
6091 Just(DOCUMENT_KEY.to_string()),
6092 string_regex(TEST_NAME_PATTERN).unwrap(),
6093 ])
6094 .boxed()
6095 }
6096
6097 fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6098 any::<bool>().prop_map(|enabled| FtsIndexType {
6099 enabled,
6100 config: FtsIndexConfig {},
6101 })
6102 }
6103
6104 fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6105 any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6106 enabled,
6107 config: StringInvertedIndexConfig {},
6108 })
6109 }
6110
6111 fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6112 proptest::option::of(
6113 (
6114 proptest::option::of(string_inverted_index_type_strategy()),
6115 proptest::option::of(fts_index_type_strategy()),
6116 )
6117 .prop_map(|(string_inverted_index, fts_index)| {
6118 StringValueType {
6119 string_inverted_index,
6120 fts_index,
6121 }
6122 }),
6123 )
6124 .boxed()
6125 }
6126
6127 fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6128 any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6129 enabled,
6130 config: FloatInvertedIndexConfig {},
6131 })
6132 }
6133
6134 fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6135 proptest::option::of(
6136 proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6137 |float_inverted_index| FloatValueType {
6138 float_inverted_index,
6139 },
6140 ),
6141 )
6142 .boxed()
6143 }
6144
6145 fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6146 any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6147 enabled,
6148 config: IntInvertedIndexConfig {},
6149 })
6150 }
6151
6152 fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6153 proptest::option::of(
6154 proptest::option::of(int_inverted_index_type_strategy())
6155 .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6156 )
6157 .boxed()
6158 }
6159
6160 fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6161 any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6162 enabled,
6163 config: BoolInvertedIndexConfig {},
6164 })
6165 }
6166
6167 fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6168 proptest::option::of(
6169 proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6170 |bool_inverted_index| BoolValueType {
6171 bool_inverted_index,
6172 },
6173 ),
6174 )
6175 .boxed()
6176 }
6177
6178 fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6179 (
6180 sparse_embedding_function_strategy(),
6181 source_key_strategy(),
6182 proptest::option::of(any::<bool>()),
6183 )
6184 .prop_map(|(embedding_function, source_key, bm25)| {
6185 SparseVectorIndexConfig {
6186 embedding_function,
6187 source_key,
6188 bm25,
6189 }
6190 })
6191 }
6192
6193 fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6194 proptest::option::of(
6195 (
6196 any::<bool>(),
6197 proptest::option::of(sparse_vector_index_config_strategy()),
6198 )
6199 .prop_map(|(enabled, config)| SparseVectorValueType {
6200 sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6201 enabled,
6202 config: cfg,
6203 }),
6204 }),
6205 )
6206 .boxed()
6207 }
6208
6209 fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6210 internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6211 ef_construction: Some(config.ef_construction),
6212 max_neighbors: Some(config.max_neighbors),
6213 ef_search: Some(config.ef_search),
6214 num_threads: Some(config.num_threads),
6215 batch_size: Some(config.batch_size),
6216 sync_threshold: Some(config.sync_threshold),
6217 resize_factor: Some(config.resize_factor),
6218 })
6219 }
6220
6221 fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6222 internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6223 search_nprobe: Some(config.search_nprobe),
6224 search_rng_factor: Some(config.search_rng_factor),
6225 search_rng_epsilon: Some(config.search_rng_epsilon),
6226 nreplica_count: Some(config.nreplica_count),
6227 write_rng_factor: Some(config.write_rng_factor),
6228 write_rng_epsilon: Some(config.write_rng_epsilon),
6229 split_threshold: Some(config.split_threshold),
6230 num_samples_kmeans: Some(config.num_samples_kmeans),
6231 initial_lambda: Some(config.initial_lambda),
6232 reassign_neighbor_count: Some(config.reassign_neighbor_count),
6233 merge_threshold: Some(config.merge_threshold),
6234 num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6235 write_nprobe: Some(config.write_nprobe),
6236 ef_construction: Some(config.ef_construction),
6237 ef_search: Some(config.ef_search),
6238 max_neighbors: Some(config.max_neighbors),
6239 })
6240 }
6241
6242 fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6243 (
6244 proptest::option::of(space_strategy()),
6245 embedding_function_strategy(),
6246 source_key_strategy(),
6247 proptest::option::of(hnsw_index_config_strategy()),
6248 proptest::option::of(spann_index_config_strategy()),
6249 )
6250 .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6251 VectorIndexConfig {
6252 space,
6253 embedding_function,
6254 source_key,
6255 hnsw,
6256 spann,
6257 }
6258 })
6259 }
6260
6261 fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6262 (any::<bool>(), vector_index_config_strategy())
6263 .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6264 }
6265
6266 fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6267 proptest::option::of(
6268 proptest::option::of(vector_index_type_strategy())
6269 .prop_map(|vector_index| FloatListValueType { vector_index }),
6270 )
6271 .boxed()
6272 }
6273
6274 fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6275 (
6276 string_value_type_strategy(),
6277 float_list_value_type_strategy(),
6278 sparse_vector_value_type_strategy(),
6279 int_value_type_strategy(),
6280 float_value_type_strategy(),
6281 bool_value_type_strategy(),
6282 )
6283 .prop_map(
6284 |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6285 string,
6286 float_list,
6287 sparse_vector,
6288 int,
6289 float,
6290 boolean,
6291 },
6292 )
6293 .boxed()
6294 }
6295
6296 fn schema_strategy() -> BoxedStrategy<Schema> {
6297 (
6298 value_types_strategy(),
6299 proptest::collection::hash_map(
6300 non_special_key_strategy(),
6301 value_types_strategy(),
6302 0..=3,
6303 ),
6304 proptest::option::of(value_types_strategy()),
6305 proptest::option::of(value_types_strategy()),
6306 )
6307 .prop_map(
6308 |(defaults, mut extra_keys, document_override, embedding_override)| {
6309 if let Some(doc) = document_override {
6310 extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6311 }
6312 if let Some(embed) = embedding_override {
6313 extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6314 }
6315 Schema {
6316 defaults,
6317 keys: extra_keys,
6318 cmek: None,
6319 source_attached_function_id: None,
6320 }
6321 },
6322 )
6323 .boxed()
6324 }
6325
6326 fn force_non_default_schema(mut schema: Schema) -> Schema {
6327 if schema.is_default() {
6328 if let Some(string_value) = schema
6329 .defaults
6330 .string
6331 .as_mut()
6332 .and_then(|string_value| string_value.string_inverted_index.as_mut())
6333 {
6334 string_value.enabled = !string_value.enabled;
6335 } else {
6336 schema.defaults.string = Some(StringValueType {
6337 string_inverted_index: Some(StringInvertedIndexType {
6338 enabled: false,
6339 config: StringInvertedIndexConfig {},
6340 }),
6341 fts_index: None,
6342 });
6343 }
6344 }
6345 schema
6346 }
6347
6348 fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6349 schema_strategy().prop_map(force_non_default_schema).boxed()
6350 }
6351
6352 fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6353 let defaults = schema
6354 .defaults
6355 .float_list
6356 .as_ref()
6357 .and_then(|fl| fl.vector_index.as_ref())
6358 .map(|vi| vi.config.clone())
6359 .expect("defaults vector index missing");
6360
6361 let embedding = schema
6362 .keys
6363 .get(EMBEDDING_KEY)
6364 .and_then(|value_types| value_types.float_list.as_ref())
6365 .and_then(|fl| fl.vector_index.as_ref())
6366 .map(|vi| vi.config.clone())
6367 .expect("#embedding vector index missing");
6368
6369 (defaults, embedding)
6370 }
6371
6372 proptest! {
6373 #[test]
6374 fn reconcile_schema_and_config_matches_convert_for_config_only(
6375 config in internal_collection_configuration_strategy(),
6376 knn in knn_index_strategy(),
6377 ) {
6378 let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6379 .expect("reconciliation should succeed");
6380
6381 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6382 let expected_config = expected_vector_index_config(&config);
6383
6384 prop_assert_eq!(defaults_vi, expected_config.clone());
6385
6386 let mut expected_embedding_config = expected_config;
6387 expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6388 prop_assert_eq!(embedding_vi, expected_embedding_config);
6389
6390 prop_assert_eq!(result.keys.len(), 2);
6391 }
6392 }
6393
6394 proptest! {
6395 #[test]
6396 fn reconcile_schema_and_config_errors_when_both_non_default(
6397 config in non_default_internal_collection_configuration_strategy(),
6398 knn in knn_index_strategy(),
6399 ) {
6400 let schema = Schema::try_from(&config)
6401 .expect("conversion should succeed");
6402 prop_assume!(!schema.is_default());
6403
6404 let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6405
6406 prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6407 }
6408 }
6409
6410 proptest! {
6411 #[test]
6412 fn reconcile_schema_and_config_matches_schema_only_path(
6413 schema in schema_strategy(),
6414 knn in knn_index_strategy(),
6415 ) {
6416 let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6417 .expect("reconciliation should succeed");
6418
6419 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6420
6421 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6423 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6424 if let Some(schema_space) = &schema_vi.config.space {
6426 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6427 }
6428 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6429 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6430 }
6431 match knn {
6433 KnnIndex::Hnsw => {
6434 if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6435 if let Some(merged_hnsw) = &defaults_vi.hnsw {
6436 if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6437 prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6438 }
6439 }
6440 }
6441 }
6442 KnnIndex::Spann => {
6443 if let Some(schema_spann) = &schema_vi.config.spann {
6444 if let Some(merged_spann) = &defaults_vi.spann {
6445 if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6446 prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6447 }
6448 }
6449 }
6450 }
6451 }
6452 }
6453 }
6454
6455 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6457 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6458 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6459 if let Some(schema_space) = &embedding_vi_type.config.space {
6460 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6461 }
6462 }
6463 }
6464 }
6465 }
6466 }
6467
6468 proptest! {
6469 #[test]
6470 fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6471 embedding_function in default_embedding_function_strategy(),
6472 knn in knn_index_strategy(),
6473 ) {
6474 let schema = Schema::new_default(knn);
6475 let mut config = match knn {
6476 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6477 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6478 };
6479 config.embedding_function = embedding_function.clone();
6480
6481 let result = Schema::reconcile_schema_and_config(
6482 Some(&schema),
6483 Some(&config),
6484 knn,
6485 )
6486 .expect("reconciliation should succeed");
6487
6488 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6489
6490 if let Some(ef) = embedding_function {
6492 prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6493 prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6494 } else {
6495 prop_assert_eq!(defaults_vi.embedding_function, None);
6497 prop_assert_eq!(embedding_vi.embedding_function, None);
6498 }
6499 }
6500 }
6501
6502 proptest! {
6503 #[test]
6504 fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6505 schema in non_default_schema_strategy(),
6506 knn in knn_index_strategy(),
6507 ) {
6508 let default_config = match knn {
6509 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6510 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6511 };
6512
6513 let result = Schema::reconcile_schema_and_config(
6514 Some(&schema),
6515 Some(&default_config),
6516 knn,
6517 )
6518 .expect("reconciliation should succeed");
6519
6520 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6521
6522 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6525 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6526 if let Some(schema_space) = &schema_vi.config.space {
6527 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6528 }
6529 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6530 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6531 }
6532 }
6533 }
6534
6535 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6537 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6538 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6539 if let Some(schema_space) = &embedding_vi_type.config.space {
6540 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6541 }
6542 }
6543 }
6544 }
6545 }
6546 }
6547 }
6548}