1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11 EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12 UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18 default_batch_size, default_construction_ef, default_construction_ef_spann,
19 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
20 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
21 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
22 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
23 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
24 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
25 HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
26 InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
27};
28
29impl ChromaError for SchemaError {
30 fn code(&self) -> ErrorCodes {
31 match self {
32 SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
35 SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
36 SchemaError::DefaultsMismatch => ErrorCodes::Internal,
39 SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
40
41 SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
44 SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
45 SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
46 SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
47 SchemaError::Builder(e) => e.code(),
48 }
49 }
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaError {
54 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
55 MissingIndexConfiguration { key: String, value_type: String },
56 #[error("Schema reconciliation failed: {reason}")]
57 InvalidSchema { reason: String },
58 #[error("Cannot set both collection config and schema simultaneously")]
59 ConfigAndSchemaConflict,
60 #[error("Cannot merge schemas with differing defaults")]
61 DefaultsMismatch,
62 #[error("Conflicting configuration for {context}")]
63 ConfigurationConflict { context: String },
64 #[error("Invalid HNSW configuration: {0}")]
65 InvalidHnswConfig(validator::ValidationErrors),
66 #[error("Invalid SPANN configuration: {0}")]
67 InvalidSpannConfig(validator::ValidationErrors),
68 #[error("Invalid schema input: {reason}")]
69 InvalidUserInput { reason: String },
70 #[error(transparent)]
71 Builder(#[from] SchemaBuilderError),
72}
73
74#[derive(Debug, Error)]
75pub enum SchemaBuilderError {
76 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
77 VectorIndexMustBeGlobal { key: String },
78 #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
79 FtsIndexMustBeGlobal { key: String },
80 #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
81 SpecialKeyModificationNotAllowed { key: String },
82 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
83 SparseVectorRequiresKey,
84 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
85 MultipleSparseVectorIndexes { existing_key: String },
86 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
87 VectorIndexDeletionNotSupported,
88 #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
89 FtsIndexDeletionNotSupported,
90 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91 SparseVectorIndexDeletionNotSupported,
92}
93
94#[derive(Debug, Error)]
95pub enum FilterValidationError {
96 #[error(
97 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
98 )]
99 IndexingDisabled {
100 key: String,
101 value_type: MetadataValueType,
102 },
103 #[error(transparent)]
104 Schema(#[from] SchemaError),
105}
106
107impl ChromaError for SchemaBuilderError {
108 fn code(&self) -> ErrorCodes {
109 ErrorCodes::InvalidArgument
110 }
111}
112
113impl ChromaError for FilterValidationError {
114 fn code(&self) -> ErrorCodes {
115 match self {
116 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
117 FilterValidationError::Schema(_) => ErrorCodes::Internal,
118 }
119 }
120}
121
122pub const STRING_VALUE_NAME: &str = "string";
129pub const INT_VALUE_NAME: &str = "int";
130pub const BOOL_VALUE_NAME: &str = "bool";
131pub const FLOAT_VALUE_NAME: &str = "float";
132pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
133pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
134
135pub const FTS_INDEX_NAME: &str = "fts_index";
137pub const VECTOR_INDEX_NAME: &str = "vector_index";
138pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
139pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
140pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
141pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
142pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
143
144pub const DOCUMENT_KEY: &str = "#document";
146pub const EMBEDDING_KEY: &str = "#embedding";
147
148static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
150 Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
151 .expect("The CMEK pattern for GCP should be valid")
152});
153
154#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Cmek {
161 Gcp(Arc<String>),
165}
166
167impl Cmek {
168 pub fn gcp(resource: String) -> Self {
178 Cmek::Gcp(Arc::new(resource))
179 }
180
181 pub fn validate_pattern(&self) -> bool {
187 match self {
188 Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
189 }
190 }
191}
192
193impl TryFrom<chroma_proto::Cmek> for Cmek {
194 type Error = ConversionError;
195
196 fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
197 match proto.provider {
198 Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
199 None => Err(ConversionError::DecodeError),
200 }
201 }
202}
203
204impl From<Cmek> for chroma_proto::Cmek {
205 fn from(cmek: Cmek) -> Self {
206 match cmek {
207 Cmek::Gcp(resource) => chroma_proto::Cmek {
208 provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
209 },
210 }
211 }
212}
213
214#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
223#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
224pub struct Schema {
225 pub defaults: ValueTypes,
227 #[serde(rename = "keys", alias = "key_overrides")]
230 pub keys: HashMap<String, ValueTypes>,
231 #[serde(skip_serializing_if = "Option::is_none")]
233 #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
234 pub cmek: Option<Cmek>,
235 #[serde(skip_serializing_if = "Option::is_none")]
237 pub source_attached_function_id: Option<String>,
238}
239
240impl Schema {
241 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
242 if let Some(vector_update) = &configuration.vector_index {
243 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
244 Self::apply_vector_index_update(default_vector_index, vector_update);
245 }
246 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
247 Self::apply_vector_index_update(embedding_vector_index, vector_update);
248 }
249 }
250
251 if let Some(embedding_function) = configuration.embedding_function.as_ref() {
252 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
253 default_vector_index.config.embedding_function = Some(embedding_function.clone());
254 }
255 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
256 embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
257 }
258 }
259 }
260
261 fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
262 self.defaults
263 .float_list
264 .as_mut()
265 .and_then(|float_list| float_list.vector_index.as_mut())
266 }
267
268 fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
269 self.keys
270 .get_mut(EMBEDDING_KEY)
271 .and_then(|value_types| value_types.float_list.as_mut())
272 .and_then(|float_list| float_list.vector_index.as_mut())
273 }
274
275 fn apply_vector_index_update(
276 vector_index: &mut VectorIndexType,
277 update: &UpdateVectorIndexConfiguration,
278 ) {
279 match update {
280 UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
281 if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
282 if let Some(ef_search) = hnsw_update.ef_search {
283 hnsw_config.ef_search = Some(ef_search);
284 }
285 if let Some(max_neighbors) = hnsw_update.max_neighbors {
286 hnsw_config.max_neighbors = Some(max_neighbors);
287 }
288 if let Some(num_threads) = hnsw_update.num_threads {
289 hnsw_config.num_threads = Some(num_threads);
290 }
291 if let Some(resize_factor) = hnsw_update.resize_factor {
292 hnsw_config.resize_factor = Some(resize_factor);
293 }
294 if let Some(sync_threshold) = hnsw_update.sync_threshold {
295 hnsw_config.sync_threshold = Some(sync_threshold);
296 }
297 if let Some(batch_size) = hnsw_update.batch_size {
298 hnsw_config.batch_size = Some(batch_size);
299 }
300 }
301 }
302 UpdateVectorIndexConfiguration::Hnsw(None) => {}
303 UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
304 if let Some(spann_config) = vector_index.config.spann.as_mut() {
305 if let Some(search_nprobe) = spann_update.search_nprobe {
306 spann_config.search_nprobe = Some(search_nprobe);
307 }
308 if let Some(ef_search) = spann_update.ef_search {
309 spann_config.ef_search = Some(ef_search);
310 }
311 }
312 }
313 UpdateVectorIndexConfiguration::Spann(None) => {}
314 }
315 }
316
317 pub fn is_sparse_index_enabled(&self) -> bool {
318 let defaults_enabled = self
319 .defaults
320 .sparse_vector
321 .as_ref()
322 .and_then(|sv| sv.sparse_vector_index.as_ref())
323 .is_some_and(|idx| idx.enabled);
324 let key_enabled = self.keys.values().any(|value_types| {
325 value_types
326 .sparse_vector
327 .as_ref()
328 .and_then(|sv| sv.sparse_vector_index.as_ref())
329 .is_some_and(|idx| idx.enabled)
330 });
331 defaults_enabled || key_enabled
332 }
333}
334
335impl Default for Schema {
336 fn default() -> Self {
353 let defaults = ValueTypes {
355 string: Some(StringValueType {
356 fts_index: Some(FtsIndexType {
357 enabled: false,
358 config: FtsIndexConfig {},
359 }),
360 string_inverted_index: Some(StringInvertedIndexType {
361 enabled: true,
362 config: StringInvertedIndexConfig {},
363 }),
364 }),
365 float_list: Some(FloatListValueType {
366 vector_index: Some(VectorIndexType {
367 enabled: false,
368 config: VectorIndexConfig {
369 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
371 source_key: None,
372 hnsw: None, spann: None, },
375 }),
376 }),
377 sparse_vector: Some(SparseVectorValueType {
378 sparse_vector_index: Some(SparseVectorIndexType {
379 enabled: false,
380 config: SparseVectorIndexConfig {
381 embedding_function: None,
382 source_key: None,
383 bm25: None,
384 },
385 }),
386 }),
387 int: Some(IntValueType {
388 int_inverted_index: Some(IntInvertedIndexType {
389 enabled: true,
390 config: IntInvertedIndexConfig {},
391 }),
392 }),
393 float: Some(FloatValueType {
394 float_inverted_index: Some(FloatInvertedIndexType {
395 enabled: true,
396 config: FloatInvertedIndexConfig {},
397 }),
398 }),
399 boolean: Some(BoolValueType {
400 bool_inverted_index: Some(BoolInvertedIndexType {
401 enabled: true,
402 config: BoolInvertedIndexConfig {},
403 }),
404 }),
405 };
406
407 let mut keys = HashMap::new();
409
410 keys.insert(
412 DOCUMENT_KEY.to_string(),
413 ValueTypes {
414 string: Some(StringValueType {
415 fts_index: Some(FtsIndexType {
416 enabled: true,
417 config: FtsIndexConfig {},
418 }),
419 string_inverted_index: Some(StringInvertedIndexType {
420 enabled: false,
421 config: StringInvertedIndexConfig {},
422 }),
423 }),
424 ..Default::default()
425 },
426 );
427
428 keys.insert(
430 EMBEDDING_KEY.to_string(),
431 ValueTypes {
432 float_list: Some(FloatListValueType {
433 vector_index: Some(VectorIndexType {
434 enabled: true,
435 config: VectorIndexConfig {
436 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
438 source_key: Some(DOCUMENT_KEY.to_string()),
439 hnsw: None, spann: None, },
442 }),
443 }),
444 ..Default::default()
445 },
446 );
447
448 Schema {
449 defaults,
450 keys,
451 cmek: None,
452 source_attached_function_id: None,
453 }
454 }
455}
456
457pub fn is_embedding_function_default(
458 embedding_function: &Option<EmbeddingFunctionConfiguration>,
459) -> bool {
460 match embedding_function {
461 None => true,
462 Some(embedding_function) => embedding_function.is_default(),
463 }
464}
465
466pub fn is_space_default(space: &Option<Space>) -> bool {
468 match space {
469 None => true, Some(s) => *s == default_space(), }
472}
473
474pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
476 hnsw_config.ef_construction == Some(default_construction_ef())
477 && hnsw_config.ef_search == Some(default_search_ef())
478 && hnsw_config.max_neighbors == Some(default_m())
479 && hnsw_config.num_threads == Some(default_num_threads())
480 && hnsw_config.batch_size == Some(default_batch_size())
481 && hnsw_config.sync_threshold == Some(default_sync_threshold())
482 && hnsw_config.resize_factor == Some(default_resize_factor())
483}
484
485#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
492#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
493pub struct ValueTypes {
494 #[serde(
495 rename = "string",
496 alias = "#string",
497 skip_serializing_if = "Option::is_none"
498 )] pub string: Option<StringValueType>,
500
501 #[serde(
502 rename = "float_list",
503 alias = "#float_list",
504 skip_serializing_if = "Option::is_none"
505 )]
506 pub float_list: Option<FloatListValueType>,
508
509 #[serde(
510 rename = "sparse_vector",
511 alias = "#sparse_vector",
512 skip_serializing_if = "Option::is_none"
513 )]
514 pub sparse_vector: Option<SparseVectorValueType>,
516
517 #[serde(
518 rename = "int",
519 alias = "#int",
520 skip_serializing_if = "Option::is_none"
521 )] pub int: Option<IntValueType>,
523
524 #[serde(
525 rename = "float",
526 alias = "#float",
527 skip_serializing_if = "Option::is_none"
528 )] pub float: Option<FloatValueType>,
530
531 #[serde(
532 rename = "bool",
533 alias = "#bool",
534 skip_serializing_if = "Option::is_none"
535 )] pub boolean: Option<BoolValueType>,
537}
538
539#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
541#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
542pub struct StringValueType {
543 #[serde(
544 rename = "fts_index",
545 alias = "$fts_index",
546 skip_serializing_if = "Option::is_none"
547 )] pub fts_index: Option<FtsIndexType>,
549
550 #[serde(
551 rename = "string_inverted_index", alias = "$string_inverted_index",
553 skip_serializing_if = "Option::is_none"
554 )]
555 pub string_inverted_index: Option<StringInvertedIndexType>,
556}
557
558#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
560#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
561pub struct FloatListValueType {
562 #[serde(
563 rename = "vector_index",
564 alias = "$vector_index",
565 skip_serializing_if = "Option::is_none"
566 )] pub vector_index: Option<VectorIndexType>,
568}
569
570#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
572#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
573pub struct SparseVectorValueType {
574 #[serde(
575 rename = "sparse_vector_index", alias = "$sparse_vector_index",
577 skip_serializing_if = "Option::is_none"
578 )]
579 pub sparse_vector_index: Option<SparseVectorIndexType>,
580}
581
582#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
584#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
585pub struct IntValueType {
586 #[serde(
587 rename = "int_inverted_index",
588 alias = "$int_inverted_index",
589 skip_serializing_if = "Option::is_none"
590 )]
591 pub int_inverted_index: Option<IntInvertedIndexType>,
593}
594
595#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
597#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
598pub struct FloatValueType {
599 #[serde(
600 rename = "float_inverted_index", alias = "$float_inverted_index",
602 skip_serializing_if = "Option::is_none"
603 )]
604 pub float_inverted_index: Option<FloatInvertedIndexType>,
605}
606
607#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
609#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
610pub struct BoolValueType {
611 #[serde(
612 rename = "bool_inverted_index", alias = "$bool_inverted_index",
614 skip_serializing_if = "Option::is_none"
615 )]
616 pub bool_inverted_index: Option<BoolInvertedIndexType>,
617}
618
619#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
621#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
622pub struct FtsIndexType {
623 pub enabled: bool,
624 pub config: FtsIndexConfig,
625}
626
627#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
628#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
629pub struct VectorIndexType {
630 pub enabled: bool,
631 pub config: VectorIndexConfig,
632}
633
634#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
635#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
636pub struct SparseVectorIndexType {
637 pub enabled: bool,
638 pub config: SparseVectorIndexConfig,
639}
640
641#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
642#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
643pub struct StringInvertedIndexType {
644 pub enabled: bool,
645 pub config: StringInvertedIndexConfig,
646}
647
648#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
649#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
650pub struct IntInvertedIndexType {
651 pub enabled: bool,
652 pub config: IntInvertedIndexConfig,
653}
654
655#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
656#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
657pub struct FloatInvertedIndexType {
658 pub enabled: bool,
659 pub config: FloatInvertedIndexConfig,
660}
661
662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
664pub struct BoolInvertedIndexType {
665 pub enabled: bool,
666 pub config: BoolInvertedIndexConfig,
667}
668
669impl Schema {
670 pub fn new_default(default_knn_index: KnnIndex) -> Self {
672 let vector_config = VectorIndexType {
674 enabled: false,
675 config: VectorIndexConfig {
676 space: Some(default_space()),
677 embedding_function: None,
678 source_key: None,
679 hnsw: match default_knn_index {
680 KnnIndex::Hnsw => Some(HnswIndexConfig {
681 ef_construction: Some(default_construction_ef()),
682 max_neighbors: Some(default_m()),
683 ef_search: Some(default_search_ef()),
684 num_threads: Some(default_num_threads()),
685 batch_size: Some(default_batch_size()),
686 sync_threshold: Some(default_sync_threshold()),
687 resize_factor: Some(default_resize_factor()),
688 }),
689 KnnIndex::Spann => None,
690 },
691 spann: match default_knn_index {
692 KnnIndex::Hnsw => None,
693 KnnIndex::Spann => Some(SpannIndexConfig {
694 search_nprobe: Some(default_search_nprobe()),
695 search_rng_factor: Some(default_search_rng_factor()),
696 search_rng_epsilon: Some(default_search_rng_epsilon()),
697 nreplica_count: Some(default_nreplica_count()),
698 write_rng_factor: Some(default_write_rng_factor()),
699 write_rng_epsilon: Some(default_write_rng_epsilon()),
700 split_threshold: Some(default_split_threshold()),
701 num_samples_kmeans: Some(default_num_samples_kmeans()),
702 initial_lambda: Some(default_initial_lambda()),
703 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
704 merge_threshold: Some(default_merge_threshold()),
705 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
706 write_nprobe: Some(default_write_nprobe()),
707 ef_construction: Some(default_construction_ef_spann()),
708 ef_search: Some(default_search_ef_spann()),
709 max_neighbors: Some(default_m_spann()),
710 }),
711 },
712 },
713 };
714
715 let defaults = ValueTypes {
717 string: Some(StringValueType {
718 string_inverted_index: Some(StringInvertedIndexType {
719 enabled: true,
720 config: StringInvertedIndexConfig {},
721 }),
722 fts_index: Some(FtsIndexType {
723 enabled: false,
724 config: FtsIndexConfig {},
725 }),
726 }),
727 float: Some(FloatValueType {
728 float_inverted_index: Some(FloatInvertedIndexType {
729 enabled: true,
730 config: FloatInvertedIndexConfig {},
731 }),
732 }),
733 int: Some(IntValueType {
734 int_inverted_index: Some(IntInvertedIndexType {
735 enabled: true,
736 config: IntInvertedIndexConfig {},
737 }),
738 }),
739 boolean: Some(BoolValueType {
740 bool_inverted_index: Some(BoolInvertedIndexType {
741 enabled: true,
742 config: BoolInvertedIndexConfig {},
743 }),
744 }),
745 float_list: Some(FloatListValueType {
746 vector_index: Some(vector_config),
747 }),
748 sparse_vector: Some(SparseVectorValueType {
749 sparse_vector_index: Some(SparseVectorIndexType {
750 enabled: false,
751 config: SparseVectorIndexConfig {
752 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
753 source_key: None,
754 bm25: Some(false),
755 },
756 }),
757 }),
758 };
759
760 let mut keys = HashMap::new();
762
763 let embedding_defaults = ValueTypes {
765 float_list: Some(FloatListValueType {
766 vector_index: Some(VectorIndexType {
767 enabled: true,
768 config: VectorIndexConfig {
769 space: Some(default_space()),
770 embedding_function: None,
771 source_key: Some(DOCUMENT_KEY.to_string()),
772 hnsw: match default_knn_index {
773 KnnIndex::Hnsw => Some(HnswIndexConfig {
774 ef_construction: Some(default_construction_ef()),
775 max_neighbors: Some(default_m()),
776 ef_search: Some(default_search_ef()),
777 num_threads: Some(default_num_threads()),
778 batch_size: Some(default_batch_size()),
779 sync_threshold: Some(default_sync_threshold()),
780 resize_factor: Some(default_resize_factor()),
781 }),
782 KnnIndex::Spann => None,
783 },
784 spann: match default_knn_index {
785 KnnIndex::Hnsw => None,
786 KnnIndex::Spann => Some(SpannIndexConfig {
787 search_nprobe: Some(default_search_nprobe()),
788 search_rng_factor: Some(default_search_rng_factor()),
789 search_rng_epsilon: Some(default_search_rng_epsilon()),
790 nreplica_count: Some(default_nreplica_count()),
791 write_rng_factor: Some(default_write_rng_factor()),
792 write_rng_epsilon: Some(default_write_rng_epsilon()),
793 split_threshold: Some(default_split_threshold()),
794 num_samples_kmeans: Some(default_num_samples_kmeans()),
795 initial_lambda: Some(default_initial_lambda()),
796 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
797 merge_threshold: Some(default_merge_threshold()),
798 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
799 write_nprobe: Some(default_write_nprobe()),
800 ef_construction: Some(default_construction_ef_spann()),
801 ef_search: Some(default_search_ef_spann()),
802 max_neighbors: Some(default_m_spann()),
803 }),
804 },
805 },
806 }),
807 }),
808 ..Default::default()
809 };
810 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
811
812 let document_defaults = ValueTypes {
814 string: Some(StringValueType {
815 fts_index: Some(FtsIndexType {
816 enabled: true,
817 config: FtsIndexConfig {},
818 }),
819 string_inverted_index: Some(StringInvertedIndexType {
820 enabled: false,
821 config: StringInvertedIndexConfig {},
822 }),
823 }),
824 ..Default::default()
825 };
826 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
827
828 Schema {
829 defaults,
830 keys,
831 cmek: None,
832 source_attached_function_id: None,
833 }
834 }
835
836 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
837 let to_internal = |vector_index: &VectorIndexType| {
838 let space = vector_index.config.space.clone();
839 vector_index
840 .config
841 .spann
842 .clone()
843 .map(|config| (space.as_ref(), &config).into())
844 };
845
846 self.keys
847 .get(EMBEDDING_KEY)
848 .and_then(|value_types| value_types.float_list.as_ref())
849 .and_then(|float_list| float_list.vector_index.as_ref())
850 .and_then(to_internal)
851 .or_else(|| {
852 self.defaults
853 .float_list
854 .as_ref()
855 .and_then(|float_list| float_list.vector_index.as_ref())
856 .and_then(to_internal)
857 })
858 }
859
860 pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
861 let to_internal = |vector_index: &VectorIndexType| {
862 if vector_index.config.spann.is_some() {
863 return None;
864 }
865 let space = vector_index.config.space.as_ref();
866 let hnsw_config = vector_index.config.hnsw.as_ref();
867 Some((space, hnsw_config).into())
868 };
869
870 self.keys
871 .get(EMBEDDING_KEY)
872 .and_then(|value_types| value_types.float_list.as_ref())
873 .and_then(|float_list| float_list.vector_index.as_ref())
874 .and_then(to_internal)
875 .or_else(|| {
876 self.defaults
877 .float_list
878 .as_ref()
879 .and_then(|float_list| float_list.vector_index.as_ref())
880 .and_then(to_internal)
881 })
882 }
883
884 pub fn get_internal_hnsw_config_with_legacy_fallback(
885 &self,
886 segment: &Segment,
887 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
888 if let Some(config) = self.get_internal_hnsw_config() {
889 let config_from_metadata =
890 InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
891
892 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
893 return Ok(Some(config_from_metadata));
894 }
895
896 return Ok(Some(config));
897 }
898
899 Ok(None)
900 }
901
902 pub fn reconcile_with_defaults(
909 user_schema: Option<&Schema>,
910 knn_index: KnnIndex,
911 ) -> Result<Self, SchemaError> {
912 let default_schema = Schema::new_default(knn_index);
913
914 match user_schema {
915 Some(user) => {
916 let merged_defaults =
918 Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
919
920 let mut merged_keys = default_schema.keys.clone();
922 for (key, user_value_types) in &user.keys {
923 if let Some(default_value_types) = merged_keys.get(key) {
924 let merged_value_types = Self::merge_value_types(
926 default_value_types,
927 user_value_types,
928 knn_index,
929 )?;
930 merged_keys.insert(key.clone(), merged_value_types);
931 } else {
932 merged_keys.insert(key.clone(), user_value_types.clone());
934 }
935 }
936
937 Ok(Schema {
938 defaults: merged_defaults,
939 keys: merged_keys,
940 cmek: user.cmek.clone().or(default_schema.cmek.clone()),
941 source_attached_function_id: user
942 .source_attached_function_id
943 .clone()
944 .or(default_schema.source_attached_function_id.clone()),
945 })
946 }
947 None => Ok(default_schema),
948 }
949 }
950
951 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
953 if self.defaults != other.defaults {
954 return Err(SchemaError::DefaultsMismatch);
955 }
956
957 let mut keys = self.keys.clone();
958
959 for (key, other_value_types) in &other.keys {
960 if let Some(existing) = keys.get(key).cloned() {
961 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
962 keys.insert(key.clone(), merged);
963 } else {
964 keys.insert(key.clone(), other_value_types.clone());
965 }
966 }
967
968 Ok(Schema {
969 defaults: self.defaults.clone(),
970 keys,
971 cmek: other.cmek.clone().or(self.cmek.clone()),
972 source_attached_function_id: other
973 .source_attached_function_id
974 .clone()
975 .or(self.source_attached_function_id.clone()),
976 })
977 }
978
979 fn merge_override_value_types(
980 key: &str,
981 left: &ValueTypes,
982 right: &ValueTypes,
983 ) -> Result<ValueTypes, SchemaError> {
984 Ok(ValueTypes {
985 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
986 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
987 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
988 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
989 float_list: Self::merge_float_list_override(
990 key,
991 left.float_list.as_ref(),
992 right.float_list.as_ref(),
993 )?,
994 sparse_vector: Self::merge_sparse_vector_override(
995 key,
996 left.sparse_vector.as_ref(),
997 right.sparse_vector.as_ref(),
998 )?,
999 })
1000 }
1001
1002 fn merge_string_override(
1003 key: &str,
1004 left: Option<&StringValueType>,
1005 right: Option<&StringValueType>,
1006 ) -> Result<Option<StringValueType>, SchemaError> {
1007 match (left, right) {
1008 (Some(l), Some(r)) => Ok(Some(StringValueType {
1009 string_inverted_index: Self::merge_index_or_error(
1010 l.string_inverted_index.as_ref(),
1011 r.string_inverted_index.as_ref(),
1012 &format!("key '{key}' string.string_inverted_index"),
1013 )?,
1014 fts_index: Self::merge_index_or_error(
1015 l.fts_index.as_ref(),
1016 r.fts_index.as_ref(),
1017 &format!("key '{key}' string.fts_index"),
1018 )?,
1019 })),
1020 (Some(l), None) => Ok(Some(l.clone())),
1021 (None, Some(r)) => Ok(Some(r.clone())),
1022 (None, None) => Ok(None),
1023 }
1024 }
1025
1026 fn merge_float_override(
1027 key: &str,
1028 left: Option<&FloatValueType>,
1029 right: Option<&FloatValueType>,
1030 ) -> Result<Option<FloatValueType>, SchemaError> {
1031 match (left, right) {
1032 (Some(l), Some(r)) => Ok(Some(FloatValueType {
1033 float_inverted_index: Self::merge_index_or_error(
1034 l.float_inverted_index.as_ref(),
1035 r.float_inverted_index.as_ref(),
1036 &format!("key '{key}' float.float_inverted_index"),
1037 )?,
1038 })),
1039 (Some(l), None) => Ok(Some(l.clone())),
1040 (None, Some(r)) => Ok(Some(r.clone())),
1041 (None, None) => Ok(None),
1042 }
1043 }
1044
1045 fn merge_int_override(
1046 key: &str,
1047 left: Option<&IntValueType>,
1048 right: Option<&IntValueType>,
1049 ) -> Result<Option<IntValueType>, SchemaError> {
1050 match (left, right) {
1051 (Some(l), Some(r)) => Ok(Some(IntValueType {
1052 int_inverted_index: Self::merge_index_or_error(
1053 l.int_inverted_index.as_ref(),
1054 r.int_inverted_index.as_ref(),
1055 &format!("key '{key}' int.int_inverted_index"),
1056 )?,
1057 })),
1058 (Some(l), None) => Ok(Some(l.clone())),
1059 (None, Some(r)) => Ok(Some(r.clone())),
1060 (None, None) => Ok(None),
1061 }
1062 }
1063
1064 fn merge_bool_override(
1065 key: &str,
1066 left: Option<&BoolValueType>,
1067 right: Option<&BoolValueType>,
1068 ) -> Result<Option<BoolValueType>, SchemaError> {
1069 match (left, right) {
1070 (Some(l), Some(r)) => Ok(Some(BoolValueType {
1071 bool_inverted_index: Self::merge_index_or_error(
1072 l.bool_inverted_index.as_ref(),
1073 r.bool_inverted_index.as_ref(),
1074 &format!("key '{key}' bool.bool_inverted_index"),
1075 )?,
1076 })),
1077 (Some(l), None) => Ok(Some(l.clone())),
1078 (None, Some(r)) => Ok(Some(r.clone())),
1079 (None, None) => Ok(None),
1080 }
1081 }
1082
1083 fn merge_float_list_override(
1084 key: &str,
1085 left: Option<&FloatListValueType>,
1086 right: Option<&FloatListValueType>,
1087 ) -> Result<Option<FloatListValueType>, SchemaError> {
1088 match (left, right) {
1089 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1090 vector_index: Self::merge_index_or_error(
1091 l.vector_index.as_ref(),
1092 r.vector_index.as_ref(),
1093 &format!("key '{key}' float_list.vector_index"),
1094 )?,
1095 })),
1096 (Some(l), None) => Ok(Some(l.clone())),
1097 (None, Some(r)) => Ok(Some(r.clone())),
1098 (None, None) => Ok(None),
1099 }
1100 }
1101
1102 fn merge_sparse_vector_override(
1103 key: &str,
1104 left: Option<&SparseVectorValueType>,
1105 right: Option<&SparseVectorValueType>,
1106 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1107 match (left, right) {
1108 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1109 sparse_vector_index: Self::merge_index_or_error(
1110 l.sparse_vector_index.as_ref(),
1111 r.sparse_vector_index.as_ref(),
1112 &format!("key '{key}' sparse_vector.sparse_vector_index"),
1113 )?,
1114 })),
1115 (Some(l), None) => Ok(Some(l.clone())),
1116 (None, Some(r)) => Ok(Some(r.clone())),
1117 (None, None) => Ok(None),
1118 }
1119 }
1120
1121 fn merge_index_or_error<T: Clone + PartialEq>(
1122 left: Option<&T>,
1123 right: Option<&T>,
1124 context: &str,
1125 ) -> Result<Option<T>, SchemaError> {
1126 match (left, right) {
1127 (Some(l), Some(r)) => {
1128 if l == r {
1129 Ok(Some(l.clone()))
1130 } else {
1131 Err(SchemaError::ConfigurationConflict {
1132 context: context.to_string(),
1133 })
1134 }
1135 }
1136 (Some(l), None) => Ok(Some(l.clone())),
1137 (None, Some(r)) => Ok(Some(r.clone())),
1138 (None, None) => Ok(None),
1139 }
1140 }
1141
1142 fn merge_value_types(
1145 default: &ValueTypes,
1146 user: &ValueTypes,
1147 knn_index: KnnIndex,
1148 ) -> Result<ValueTypes, SchemaError> {
1149 let float_list = Self::merge_float_list_type(
1151 default.float_list.as_ref(),
1152 user.float_list.as_ref(),
1153 knn_index,
1154 );
1155
1156 if let Some(ref fl) = float_list {
1158 Self::validate_float_list_value_type(fl)?;
1159 }
1160
1161 Ok(ValueTypes {
1162 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1163 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1164 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1165 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1166 float_list,
1167 sparse_vector: Self::merge_sparse_vector_type(
1168 default.sparse_vector.as_ref(),
1169 user.sparse_vector.as_ref(),
1170 )?,
1171 })
1172 }
1173
1174 fn merge_string_type(
1176 default: Option<&StringValueType>,
1177 user: Option<&StringValueType>,
1178 ) -> Result<Option<StringValueType>, SchemaError> {
1179 match (default, user) {
1180 (Some(default), Some(user)) => Ok(Some(StringValueType {
1181 string_inverted_index: Self::merge_string_inverted_index_type(
1182 default.string_inverted_index.as_ref(),
1183 user.string_inverted_index.as_ref(),
1184 )?,
1185 fts_index: Self::merge_fts_index_type(
1186 default.fts_index.as_ref(),
1187 user.fts_index.as_ref(),
1188 )?,
1189 })),
1190 (Some(default), None) => Ok(Some(default.clone())),
1191 (None, Some(user)) => Ok(Some(user.clone())),
1192 (None, None) => Ok(None),
1193 }
1194 }
1195
1196 fn merge_float_type(
1198 default: Option<&FloatValueType>,
1199 user: Option<&FloatValueType>,
1200 ) -> Result<Option<FloatValueType>, SchemaError> {
1201 match (default, user) {
1202 (Some(default), Some(user)) => Ok(Some(FloatValueType {
1203 float_inverted_index: Self::merge_float_inverted_index_type(
1204 default.float_inverted_index.as_ref(),
1205 user.float_inverted_index.as_ref(),
1206 )?,
1207 })),
1208 (Some(default), None) => Ok(Some(default.clone())),
1209 (None, Some(user)) => Ok(Some(user.clone())),
1210 (None, None) => Ok(None),
1211 }
1212 }
1213
1214 fn merge_int_type(
1216 default: Option<&IntValueType>,
1217 user: Option<&IntValueType>,
1218 ) -> Result<Option<IntValueType>, SchemaError> {
1219 match (default, user) {
1220 (Some(default), Some(user)) => Ok(Some(IntValueType {
1221 int_inverted_index: Self::merge_int_inverted_index_type(
1222 default.int_inverted_index.as_ref(),
1223 user.int_inverted_index.as_ref(),
1224 )?,
1225 })),
1226 (Some(default), None) => Ok(Some(default.clone())),
1227 (None, Some(user)) => Ok(Some(user.clone())),
1228 (None, None) => Ok(None),
1229 }
1230 }
1231
1232 fn merge_bool_type(
1234 default: Option<&BoolValueType>,
1235 user: Option<&BoolValueType>,
1236 ) -> Result<Option<BoolValueType>, SchemaError> {
1237 match (default, user) {
1238 (Some(default), Some(user)) => Ok(Some(BoolValueType {
1239 bool_inverted_index: Self::merge_bool_inverted_index_type(
1240 default.bool_inverted_index.as_ref(),
1241 user.bool_inverted_index.as_ref(),
1242 )?,
1243 })),
1244 (Some(default), None) => Ok(Some(default.clone())),
1245 (None, Some(user)) => Ok(Some(user.clone())),
1246 (None, None) => Ok(None),
1247 }
1248 }
1249
1250 fn merge_float_list_type(
1252 default: Option<&FloatListValueType>,
1253 user: Option<&FloatListValueType>,
1254 knn_index: KnnIndex,
1255 ) -> Option<FloatListValueType> {
1256 match (default, user) {
1257 (Some(default), Some(user)) => Some(FloatListValueType {
1258 vector_index: Self::merge_vector_index_type(
1259 default.vector_index.as_ref(),
1260 user.vector_index.as_ref(),
1261 knn_index,
1262 ),
1263 }),
1264 (Some(default), None) => Some(default.clone()),
1265 (None, Some(user)) => Some(user.clone()),
1266 (None, None) => None,
1267 }
1268 }
1269
1270 fn merge_sparse_vector_type(
1272 default: Option<&SparseVectorValueType>,
1273 user: Option<&SparseVectorValueType>,
1274 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1275 match (default, user) {
1276 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1277 sparse_vector_index: Self::merge_sparse_vector_index_type(
1278 default.sparse_vector_index.as_ref(),
1279 user.sparse_vector_index.as_ref(),
1280 )?,
1281 })),
1282 (Some(default), None) => Ok(Some(default.clone())),
1283 (None, Some(user)) => Ok(Some(user.clone())),
1284 (None, None) => Ok(None),
1285 }
1286 }
1287
1288 fn merge_string_inverted_index_type(
1290 default: Option<&StringInvertedIndexType>,
1291 user: Option<&StringInvertedIndexType>,
1292 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1293 match (default, user) {
1294 (Some(_default), Some(user)) => {
1295 Ok(Some(StringInvertedIndexType {
1296 enabled: user.enabled, config: user.config.clone(), }))
1299 }
1300 (Some(default), None) => Ok(Some(default.clone())),
1301 (None, Some(user)) => Ok(Some(user.clone())),
1302 (None, None) => Ok(None),
1303 }
1304 }
1305
1306 fn merge_fts_index_type(
1307 default: Option<&FtsIndexType>,
1308 user: Option<&FtsIndexType>,
1309 ) -> Result<Option<FtsIndexType>, SchemaError> {
1310 match (default, user) {
1311 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1312 enabled: user.enabled,
1313 config: user.config.clone(),
1314 })),
1315 (Some(default), None) => Ok(Some(default.clone())),
1316 (None, Some(user)) => Ok(Some(user.clone())),
1317 (None, None) => Ok(None),
1318 }
1319 }
1320
1321 fn merge_float_inverted_index_type(
1322 default: Option<&FloatInvertedIndexType>,
1323 user: Option<&FloatInvertedIndexType>,
1324 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1325 match (default, user) {
1326 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1327 enabled: user.enabled,
1328 config: user.config.clone(),
1329 })),
1330 (Some(default), None) => Ok(Some(default.clone())),
1331 (None, Some(user)) => Ok(Some(user.clone())),
1332 (None, None) => Ok(None),
1333 }
1334 }
1335
1336 fn merge_int_inverted_index_type(
1337 default: Option<&IntInvertedIndexType>,
1338 user: Option<&IntInvertedIndexType>,
1339 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1340 match (default, user) {
1341 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1342 enabled: user.enabled,
1343 config: user.config.clone(),
1344 })),
1345 (Some(default), None) => Ok(Some(default.clone())),
1346 (None, Some(user)) => Ok(Some(user.clone())),
1347 (None, None) => Ok(None),
1348 }
1349 }
1350
1351 fn merge_bool_inverted_index_type(
1352 default: Option<&BoolInvertedIndexType>,
1353 user: Option<&BoolInvertedIndexType>,
1354 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1355 match (default, user) {
1356 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1357 enabled: user.enabled,
1358 config: user.config.clone(),
1359 })),
1360 (Some(default), None) => Ok(Some(default.clone())),
1361 (None, Some(user)) => Ok(Some(user.clone())),
1362 (None, None) => Ok(None),
1363 }
1364 }
1365
1366 fn merge_vector_index_type(
1367 default: Option<&VectorIndexType>,
1368 user: Option<&VectorIndexType>,
1369 knn_index: KnnIndex,
1370 ) -> Option<VectorIndexType> {
1371 match (default, user) {
1372 (Some(default), Some(user)) => Some(VectorIndexType {
1373 enabled: user.enabled,
1374 config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1375 }),
1376 (Some(default), None) => Some(default.clone()),
1377 (None, Some(user)) => Some(user.clone()),
1378 (None, None) => None,
1379 }
1380 }
1381
1382 fn merge_sparse_vector_index_type(
1383 default: Option<&SparseVectorIndexType>,
1384 user: Option<&SparseVectorIndexType>,
1385 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1386 match (default, user) {
1387 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1388 enabled: user.enabled,
1389 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1390 })),
1391 (Some(default), None) => Ok(Some(default.clone())),
1392 (None, Some(user)) => Ok(Some(user.clone())),
1393 (None, None) => Ok(None),
1394 }
1395 }
1396
1397 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1400 if let Some(vector_index) = &float_list.vector_index {
1401 if let Some(hnsw) = &vector_index.config.hnsw {
1402 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1403 }
1404 if let Some(spann) = &vector_index.config.spann {
1405 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1406 }
1407 }
1408 Ok(())
1409 }
1410
1411 fn merge_vector_index_config(
1413 default: &VectorIndexConfig,
1414 user: &VectorIndexConfig,
1415 knn_index: KnnIndex,
1416 ) -> VectorIndexConfig {
1417 match knn_index {
1418 KnnIndex::Hnsw => VectorIndexConfig {
1419 space: user.space.clone().or(default.space.clone()),
1420 embedding_function: user
1421 .embedding_function
1422 .clone()
1423 .or(default.embedding_function.clone()),
1424 source_key: user.source_key.clone().or(default.source_key.clone()),
1425 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1426 spann: None,
1427 },
1428 KnnIndex::Spann => VectorIndexConfig {
1429 space: user.space.clone().or(default.space.clone()),
1430 embedding_function: user
1431 .embedding_function
1432 .clone()
1433 .or(default.embedding_function.clone()),
1434 source_key: user.source_key.clone().or(default.source_key.clone()),
1435 hnsw: None,
1436 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1437 },
1438 }
1439 }
1440
1441 fn merge_sparse_vector_index_config(
1443 default: &SparseVectorIndexConfig,
1444 user: &SparseVectorIndexConfig,
1445 ) -> SparseVectorIndexConfig {
1446 SparseVectorIndexConfig {
1447 embedding_function: user
1448 .embedding_function
1449 .clone()
1450 .or(default.embedding_function.clone()),
1451 source_key: user.source_key.clone().or(default.source_key.clone()),
1452 bm25: user.bm25.or(default.bm25),
1453 }
1454 }
1455
1456 fn merge_hnsw_configs(
1458 default_hnsw: Option<&HnswIndexConfig>,
1459 user_hnsw: Option<&HnswIndexConfig>,
1460 ) -> Option<HnswIndexConfig> {
1461 match (default_hnsw, user_hnsw) {
1462 (Some(default), Some(user)) => Some(HnswIndexConfig {
1463 ef_construction: user.ef_construction.or(default.ef_construction),
1464 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1465 ef_search: user.ef_search.or(default.ef_search),
1466 num_threads: user.num_threads.or(default.num_threads),
1467 batch_size: user.batch_size.or(default.batch_size),
1468 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1469 resize_factor: user.resize_factor.or(default.resize_factor),
1470 }),
1471 (Some(default), None) => Some(default.clone()),
1472 (None, Some(user)) => Some(user.clone()),
1473 (None, None) => None,
1474 }
1475 }
1476
1477 fn merge_spann_configs(
1479 default_spann: Option<&SpannIndexConfig>,
1480 user_spann: Option<&SpannIndexConfig>,
1481 ) -> Option<SpannIndexConfig> {
1482 match (default_spann, user_spann) {
1483 (Some(default), Some(user)) => Some(SpannIndexConfig {
1484 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1485 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1486 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1487 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1488 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1489 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1490 split_threshold: user.split_threshold.or(default.split_threshold),
1491 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1492 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1493 reassign_neighbor_count: user
1494 .reassign_neighbor_count
1495 .or(default.reassign_neighbor_count),
1496 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1497 num_centers_to_merge_to: user
1498 .num_centers_to_merge_to
1499 .or(default.num_centers_to_merge_to),
1500 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1501 ef_construction: user.ef_construction.or(default.ef_construction),
1502 ef_search: user.ef_search.or(default.ef_search),
1503 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1504 }),
1505 (Some(default), None) => Some(default.clone()),
1506 (None, Some(user)) => Some(user.clone()),
1507 (None, None) => None,
1508 }
1509 }
1510
1511 pub fn reconcile_with_collection_config(
1519 schema: &Schema,
1520 collection_config: &InternalCollectionConfiguration,
1521 default_knn_index: KnnIndex,
1522 ) -> Result<Schema, SchemaError> {
1523 if collection_config.is_default() {
1525 if schema.is_default() {
1526 let mut new_schema = Schema::new_default(default_knn_index);
1529
1530 if collection_config.embedding_function.is_some() {
1531 if let Some(float_list) = &mut new_schema.defaults.float_list {
1532 if let Some(vector_index) = &mut float_list.vector_index {
1533 vector_index.config.embedding_function =
1534 collection_config.embedding_function.clone();
1535 }
1536 }
1537 if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1538 if let Some(float_list) = &mut embedding_types.float_list {
1539 if let Some(vector_index) = &mut float_list.vector_index {
1540 vector_index.config.embedding_function =
1541 collection_config.embedding_function.clone();
1542 }
1543 }
1544 }
1545 }
1546 return Ok(new_schema);
1547 } else {
1548 return Ok(schema.clone());
1550 }
1551 }
1552
1553 Self::try_from(collection_config)
1556 }
1557
1558 pub fn reconcile_schema_and_config(
1559 schema: Option<&Schema>,
1560 configuration: Option<&InternalCollectionConfiguration>,
1561 knn_index: KnnIndex,
1562 ) -> Result<Schema, SchemaError> {
1563 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1565 if !user_schema.is_default() && !config.is_default() {
1566 return Err(SchemaError::ConfigAndSchemaConflict);
1567 }
1568 }
1569
1570 let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1571 if let Some(config) = configuration {
1572 Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1573 } else {
1574 Ok(reconciled_schema)
1575 }
1576 }
1577
1578 pub fn default_with_embedding_function(
1579 embedding_function: EmbeddingFunctionConfiguration,
1580 ) -> Schema {
1581 let mut schema = Schema::new_default(KnnIndex::Spann);
1582 if let Some(float_list) = &mut schema.defaults.float_list {
1583 if let Some(vector_index) = &mut float_list.vector_index {
1584 vector_index.config.embedding_function = Some(embedding_function.clone());
1585 }
1586 }
1587 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1588 if let Some(float_list) = &mut embedding_types.float_list {
1589 if let Some(vector_index) = &mut float_list.vector_index {
1590 vector_index.config.embedding_function = Some(embedding_function);
1591 }
1592 }
1593 }
1594 schema
1595 }
1596
1597 pub fn is_default(&self) -> bool {
1599 if !Self::is_value_types_default(&self.defaults) {
1601 return false;
1602 }
1603
1604 for key in self.keys.keys() {
1605 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1606 return false;
1607 }
1608 }
1609
1610 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1612 if !Self::is_embedding_value_types_default(embedding_value) {
1613 return false;
1614 }
1615 }
1616
1617 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1619 if !Self::is_document_value_types_default(document_value) {
1620 return false;
1621 }
1622 }
1623
1624 if self.cmek.is_some() {
1626 return false;
1627 }
1628
1629 true
1630 }
1631
1632 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1634 if let Some(string) = &value_types.string {
1636 if let Some(string_inverted) = &string.string_inverted_index {
1637 if !string_inverted.enabled {
1638 return false;
1639 }
1640 }
1642 if let Some(fts) = &string.fts_index {
1643 if fts.enabled {
1644 return false;
1645 }
1646 }
1648 }
1649
1650 if let Some(float) = &value_types.float {
1652 if let Some(float_inverted) = &float.float_inverted_index {
1653 if !float_inverted.enabled {
1654 return false;
1655 }
1656 }
1658 }
1659
1660 if let Some(int) = &value_types.int {
1662 if let Some(int_inverted) = &int.int_inverted_index {
1663 if !int_inverted.enabled {
1664 return false;
1665 }
1666 }
1668 }
1669
1670 if let Some(boolean) = &value_types.boolean {
1672 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1673 if !bool_inverted.enabled {
1674 return false;
1675 }
1676 }
1678 }
1679
1680 if let Some(float_list) = &value_types.float_list {
1682 if let Some(vector_index) = &float_list.vector_index {
1683 if vector_index.enabled {
1684 return false;
1685 }
1686 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1687 return false;
1688 }
1689 if !is_space_default(&vector_index.config.space) {
1690 return false;
1691 }
1692 if vector_index.config.source_key.is_some() {
1694 return false;
1695 }
1696 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1699 (Some(hnsw_config), None) => {
1700 if !hnsw_config.is_default() {
1701 return false;
1702 }
1703 }
1704 (None, Some(spann_config)) => {
1705 if !spann_config.is_default() {
1706 return false;
1707 }
1708 }
1709 (Some(_), Some(_)) => return false, (None, None) => {}
1711 }
1712 }
1713 }
1714
1715 if let Some(sparse_vector) = &value_types.sparse_vector {
1717 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1718 if sparse_index.enabled {
1719 return false;
1720 }
1721 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1723 return false;
1724 }
1725 if sparse_index.config.source_key.is_some() {
1726 return false;
1727 }
1728 if let Some(bm25) = &sparse_index.config.bm25 {
1729 if bm25 != &false {
1730 return false;
1731 }
1732 }
1733 }
1734 }
1735
1736 true
1737 }
1738
1739 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1741 if value_types.string.is_some()
1743 || value_types.float.is_some()
1744 || value_types.int.is_some()
1745 || value_types.boolean.is_some()
1746 || value_types.sparse_vector.is_some()
1747 {
1748 return false;
1749 }
1750
1751 if let Some(float_list) = &value_types.float_list {
1753 if let Some(vector_index) = &float_list.vector_index {
1754 if !vector_index.enabled {
1755 return false;
1756 }
1757 if !is_space_default(&vector_index.config.space) {
1758 return false;
1759 }
1760 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1762 return false;
1763 }
1764 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1766 return false;
1767 }
1768 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1771 (Some(hnsw_config), None) => {
1772 if !hnsw_config.is_default() {
1773 return false;
1774 }
1775 }
1776 (None, Some(spann_config)) => {
1777 if !spann_config.is_default() {
1778 return false;
1779 }
1780 }
1781 (Some(_), Some(_)) => return false, (None, None) => {}
1783 }
1784 }
1785 }
1786
1787 true
1788 }
1789
1790 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1792 if value_types.float_list.is_some()
1794 || value_types.float.is_some()
1795 || value_types.int.is_some()
1796 || value_types.boolean.is_some()
1797 || value_types.sparse_vector.is_some()
1798 {
1799 return false;
1800 }
1801
1802 if let Some(string) = &value_types.string {
1804 if let Some(fts) = &string.fts_index {
1805 if !fts.enabled {
1806 return false;
1807 }
1808 }
1810 if let Some(string_inverted) = &string.string_inverted_index {
1811 if string_inverted.enabled {
1812 return false;
1813 }
1814 }
1816 }
1817
1818 true
1819 }
1820
1821 pub fn is_metadata_type_index_enabled(
1823 &self,
1824 key: &str,
1825 value_type: MetadataValueType,
1826 ) -> Result<bool, SchemaError> {
1827 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1828
1829 match value_type {
1830 MetadataValueType::Bool => match &v_type.boolean {
1831 Some(bool_type) => match &bool_type.bool_inverted_index {
1832 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1833 None => Err(SchemaError::MissingIndexConfiguration {
1834 key: key.to_string(),
1835 value_type: "bool".to_string(),
1836 }),
1837 },
1838 None => match &self.defaults.boolean {
1839 Some(bool_type) => match &bool_type.bool_inverted_index {
1840 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1841 None => Err(SchemaError::MissingIndexConfiguration {
1842 key: key.to_string(),
1843 value_type: "bool".to_string(),
1844 }),
1845 },
1846 None => Err(SchemaError::MissingIndexConfiguration {
1847 key: key.to_string(),
1848 value_type: "bool".to_string(),
1849 }),
1850 },
1851 },
1852 MetadataValueType::Int => match &v_type.int {
1853 Some(int_type) => match &int_type.int_inverted_index {
1854 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1855 None => Err(SchemaError::MissingIndexConfiguration {
1856 key: key.to_string(),
1857 value_type: "int".to_string(),
1858 }),
1859 },
1860 None => match &self.defaults.int {
1861 Some(int_type) => match &int_type.int_inverted_index {
1862 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1863 None => Err(SchemaError::MissingIndexConfiguration {
1864 key: key.to_string(),
1865 value_type: "int".to_string(),
1866 }),
1867 },
1868 None => Err(SchemaError::MissingIndexConfiguration {
1869 key: key.to_string(),
1870 value_type: "int".to_string(),
1871 }),
1872 },
1873 },
1874 MetadataValueType::Float => match &v_type.float {
1875 Some(float_type) => match &float_type.float_inverted_index {
1876 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1877 None => Err(SchemaError::MissingIndexConfiguration {
1878 key: key.to_string(),
1879 value_type: "float".to_string(),
1880 }),
1881 },
1882 None => match &self.defaults.float {
1883 Some(float_type) => match &float_type.float_inverted_index {
1884 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1885 None => Err(SchemaError::MissingIndexConfiguration {
1886 key: key.to_string(),
1887 value_type: "float".to_string(),
1888 }),
1889 },
1890 None => Err(SchemaError::MissingIndexConfiguration {
1891 key: key.to_string(),
1892 value_type: "float".to_string(),
1893 }),
1894 },
1895 },
1896 MetadataValueType::Str => match &v_type.string {
1897 Some(string_type) => match &string_type.string_inverted_index {
1898 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1899 None => Err(SchemaError::MissingIndexConfiguration {
1900 key: key.to_string(),
1901 value_type: "string".to_string(),
1902 }),
1903 },
1904 None => match &self.defaults.string {
1905 Some(string_type) => match &string_type.string_inverted_index {
1906 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1907 None => Err(SchemaError::MissingIndexConfiguration {
1908 key: key.to_string(),
1909 value_type: "string".to_string(),
1910 }),
1911 },
1912 None => Err(SchemaError::MissingIndexConfiguration {
1913 key: key.to_string(),
1914 value_type: "string".to_string(),
1915 }),
1916 },
1917 },
1918 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1919 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1920 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1921 None => Err(SchemaError::MissingIndexConfiguration {
1922 key: key.to_string(),
1923 value_type: "sparse_vector".to_string(),
1924 }),
1925 },
1926 None => match &self.defaults.sparse_vector {
1927 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1928 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1929 None => Err(SchemaError::MissingIndexConfiguration {
1930 key: key.to_string(),
1931 value_type: "sparse_vector".to_string(),
1932 }),
1933 },
1934 None => Err(SchemaError::MissingIndexConfiguration {
1935 key: key.to_string(),
1936 value_type: "sparse_vector".to_string(),
1937 }),
1938 },
1939 },
1940 }
1941 }
1942
1943 pub fn is_metadata_where_indexing_enabled(
1944 &self,
1945 where_clause: &Where,
1946 ) -> Result<(), FilterValidationError> {
1947 match where_clause {
1948 Where::Composite(composite) => {
1949 for child in &composite.children {
1950 self.is_metadata_where_indexing_enabled(child)?;
1951 }
1952 Ok(())
1953 }
1954 Where::Document(_) => Ok(()),
1955 Where::Metadata(expression) => {
1956 let value_type = match &expression.comparison {
1957 MetadataComparison::Primitive(_, value) => value.value_type(),
1958 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1959 };
1960 let is_enabled = self
1961 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1962 .map_err(FilterValidationError::Schema)?;
1963 if !is_enabled {
1964 return Err(FilterValidationError::IndexingDisabled {
1965 key: expression.key.clone(),
1966 value_type,
1967 });
1968 }
1969 Ok(())
1970 }
1971 }
1972 }
1973
1974 pub fn is_knn_key_indexing_enabled(
1975 &self,
1976 key: &str,
1977 query: &QueryVector,
1978 ) -> Result<(), FilterValidationError> {
1979 match query {
1980 QueryVector::Sparse(_) => {
1981 let is_enabled = self
1982 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1983 .map_err(FilterValidationError::Schema)?;
1984 if !is_enabled {
1985 return Err(FilterValidationError::IndexingDisabled {
1986 key: key.to_string(),
1987 value_type: MetadataValueType::SparseVector,
1988 });
1989 }
1990 Ok(())
1991 }
1992 QueryVector::Dense(_) => {
1993 Ok(())
1996 }
1997 }
1998 }
1999
2000 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2001 if key.starts_with(CHROMA_KEY) {
2002 return false;
2003 }
2004 let value_types = self.keys.entry(key.to_string()).or_default();
2005 match value_type {
2006 MetadataValueType::Bool => {
2007 if value_types.boolean.is_none() {
2008 value_types.boolean = self.defaults.boolean.clone();
2009 return true;
2010 }
2011 }
2012 MetadataValueType::Int => {
2013 if value_types.int.is_none() {
2014 value_types.int = self.defaults.int.clone();
2015 return true;
2016 }
2017 }
2018 MetadataValueType::Float => {
2019 if value_types.float.is_none() {
2020 value_types.float = self.defaults.float.clone();
2021 return true;
2022 }
2023 }
2024 MetadataValueType::Str => {
2025 if value_types.string.is_none() {
2026 value_types.string = self.defaults.string.clone();
2027 return true;
2028 }
2029 }
2030 MetadataValueType::SparseVector => {
2031 if value_types.sparse_vector.is_none() {
2032 value_types.sparse_vector = self.defaults.sparse_vector.clone();
2033 return true;
2034 }
2035 }
2036 }
2037 false
2038 }
2039
2040 pub fn create_index(
2080 mut self,
2081 key: Option<&str>,
2082 config: IndexConfig,
2083 ) -> Result<Self, SchemaBuilderError> {
2084 match (&key, &config) {
2086 (None, IndexConfig::Vector(cfg)) => {
2087 self._set_vector_index_config_builder(cfg.clone());
2088 return Ok(self);
2089 }
2090 (None, IndexConfig::Fts(cfg)) => {
2091 self._set_fts_index_config_builder(cfg.clone());
2092 return Ok(self);
2093 }
2094 (Some(k), IndexConfig::Vector(_)) => {
2095 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2096 }
2097 (Some(k), IndexConfig::Fts(_)) => {
2098 return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2099 }
2100 _ => {}
2101 }
2102
2103 if let Some(k) = key {
2105 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2106 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2107 key: k.to_string(),
2108 });
2109 }
2110 }
2111
2112 if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2114 return Err(SchemaBuilderError::SparseVectorRequiresKey);
2115 }
2116
2117 match key {
2119 Some(k) => self._set_index_for_key_builder(k, config, true)?,
2120 None => self._set_index_in_defaults_builder(config, true)?,
2121 }
2122
2123 Ok(self)
2124 }
2125
2126 pub fn delete_index(
2154 mut self,
2155 key: Option<&str>,
2156 config: IndexConfig,
2157 ) -> Result<Self, SchemaBuilderError> {
2158 if let Some(k) = key {
2160 if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2161 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2162 key: k.to_string(),
2163 });
2164 }
2165 }
2166
2167 match &config {
2169 IndexConfig::Vector(_) => {
2170 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2171 }
2172 IndexConfig::Fts(_) => {
2173 return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2174 }
2175 IndexConfig::SparseVector(_) => {
2176 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2177 }
2178 _ => {}
2179 }
2180
2181 match key {
2183 Some(k) => self._set_index_for_key_builder(k, config, false)?,
2184 None => self._set_index_in_defaults_builder(config, false)?,
2185 }
2186
2187 Ok(self)
2188 }
2189
2190 pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2208 self.cmek = Some(cmek);
2209 self
2210 }
2211
2212 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2214 if let Some(float_list) = &mut self.defaults.float_list {
2216 if let Some(vector_index) = &mut float_list.vector_index {
2217 vector_index.config = config.clone();
2218 }
2219 }
2220
2221 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2223 if let Some(float_list) = &mut embedding_types.float_list {
2224 if let Some(vector_index) = &mut float_list.vector_index {
2225 let mut updated_config = config;
2226 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2228 vector_index.config = updated_config;
2229 }
2230 }
2231 }
2232 }
2233
2234 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2236 if let Some(string) = &mut self.defaults.string {
2238 if let Some(fts_index) = &mut string.fts_index {
2239 fts_index.config = config.clone();
2240 }
2241 }
2242
2243 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2245 if let Some(string) = &mut document_types.string {
2246 if let Some(fts_index) = &mut string.fts_index {
2247 fts_index.config = config;
2248 }
2249 }
2250 }
2251 }
2252
2253 fn _set_index_for_key_builder(
2255 &mut self,
2256 key: &str,
2257 config: IndexConfig,
2258 enabled: bool,
2259 ) -> Result<(), SchemaBuilderError> {
2260 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2262 let existing_key = self
2264 .keys
2265 .iter()
2266 .find(|(k, v)| {
2267 k.as_str() != key
2268 && v.sparse_vector
2269 .as_ref()
2270 .and_then(|sv| sv.sparse_vector_index.as_ref())
2271 .map(|idx| idx.enabled)
2272 .unwrap_or(false)
2273 })
2274 .map(|(k, _)| k.clone());
2275
2276 if let Some(existing_key) = existing_key {
2277 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2278 }
2279 }
2280
2281 let value_types = self.keys.entry(key.to_string()).or_default();
2283
2284 match config {
2286 IndexConfig::Vector(_) => {
2287 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2288 key: key.to_string(),
2289 });
2290 }
2291 IndexConfig::Fts(_) => {
2292 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2293 key: key.to_string(),
2294 });
2295 }
2296 IndexConfig::SparseVector(cfg) => {
2297 value_types.sparse_vector = Some(SparseVectorValueType {
2298 sparse_vector_index: Some(SparseVectorIndexType {
2299 enabled,
2300 config: cfg,
2301 }),
2302 });
2303 }
2304 IndexConfig::StringInverted(cfg) => {
2305 if value_types.string.is_none() {
2306 value_types.string = Some(StringValueType {
2307 fts_index: None,
2308 string_inverted_index: None,
2309 });
2310 }
2311 if let Some(string) = &mut value_types.string {
2312 string.string_inverted_index = Some(StringInvertedIndexType {
2313 enabled,
2314 config: cfg,
2315 });
2316 }
2317 }
2318 IndexConfig::IntInverted(cfg) => {
2319 value_types.int = Some(IntValueType {
2320 int_inverted_index: Some(IntInvertedIndexType {
2321 enabled,
2322 config: cfg,
2323 }),
2324 });
2325 }
2326 IndexConfig::FloatInverted(cfg) => {
2327 value_types.float = Some(FloatValueType {
2328 float_inverted_index: Some(FloatInvertedIndexType {
2329 enabled,
2330 config: cfg,
2331 }),
2332 });
2333 }
2334 IndexConfig::BoolInverted(cfg) => {
2335 value_types.boolean = Some(BoolValueType {
2336 bool_inverted_index: Some(BoolInvertedIndexType {
2337 enabled,
2338 config: cfg,
2339 }),
2340 });
2341 }
2342 }
2343
2344 Ok(())
2345 }
2346
2347 fn _set_index_in_defaults_builder(
2349 &mut self,
2350 config: IndexConfig,
2351 enabled: bool,
2352 ) -> Result<(), SchemaBuilderError> {
2353 match config {
2354 IndexConfig::Vector(_) => {
2355 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2356 key: "defaults".to_string(),
2357 });
2358 }
2359 IndexConfig::Fts(_) => {
2360 return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2361 key: "defaults".to_string(),
2362 });
2363 }
2364 IndexConfig::SparseVector(cfg) => {
2365 self.defaults.sparse_vector = Some(SparseVectorValueType {
2366 sparse_vector_index: Some(SparseVectorIndexType {
2367 enabled,
2368 config: cfg,
2369 }),
2370 });
2371 }
2372 IndexConfig::StringInverted(cfg) => {
2373 if self.defaults.string.is_none() {
2374 self.defaults.string = Some(StringValueType {
2375 fts_index: None,
2376 string_inverted_index: None,
2377 });
2378 }
2379 if let Some(string) = &mut self.defaults.string {
2380 string.string_inverted_index = Some(StringInvertedIndexType {
2381 enabled,
2382 config: cfg,
2383 });
2384 }
2385 }
2386 IndexConfig::IntInverted(cfg) => {
2387 self.defaults.int = Some(IntValueType {
2388 int_inverted_index: Some(IntInvertedIndexType {
2389 enabled,
2390 config: cfg,
2391 }),
2392 });
2393 }
2394 IndexConfig::FloatInverted(cfg) => {
2395 self.defaults.float = Some(FloatValueType {
2396 float_inverted_index: Some(FloatInvertedIndexType {
2397 enabled,
2398 config: cfg,
2399 }),
2400 });
2401 }
2402 IndexConfig::BoolInverted(cfg) => {
2403 self.defaults.boolean = Some(BoolValueType {
2404 bool_inverted_index: Some(BoolInvertedIndexType {
2405 enabled,
2406 config: cfg,
2407 }),
2408 });
2409 }
2410 }
2411
2412 Ok(())
2413 }
2414}
2415
2416#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2421#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2422#[serde(deny_unknown_fields)]
2423pub struct VectorIndexConfig {
2424 #[serde(skip_serializing_if = "Option::is_none")]
2426 pub space: Option<Space>,
2427 #[serde(skip_serializing_if = "Option::is_none")]
2429 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2430 #[serde(skip_serializing_if = "Option::is_none")]
2432 pub source_key: Option<String>,
2433 #[serde(skip_serializing_if = "Option::is_none")]
2435 pub hnsw: Option<HnswIndexConfig>,
2436 #[serde(skip_serializing_if = "Option::is_none")]
2438 pub spann: Option<SpannIndexConfig>,
2439}
2440
2441#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2443#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2444#[serde(deny_unknown_fields)]
2445pub struct HnswIndexConfig {
2446 #[serde(skip_serializing_if = "Option::is_none")]
2447 pub ef_construction: Option<usize>,
2448 #[serde(skip_serializing_if = "Option::is_none")]
2449 pub max_neighbors: Option<usize>,
2450 #[serde(skip_serializing_if = "Option::is_none")]
2451 pub ef_search: Option<usize>,
2452 #[serde(skip_serializing_if = "Option::is_none")]
2453 pub num_threads: Option<usize>,
2454 #[serde(skip_serializing_if = "Option::is_none")]
2455 #[validate(range(min = 2))]
2456 pub batch_size: Option<usize>,
2457 #[serde(skip_serializing_if = "Option::is_none")]
2458 #[validate(range(min = 2))]
2459 pub sync_threshold: Option<usize>,
2460 #[serde(skip_serializing_if = "Option::is_none")]
2461 pub resize_factor: Option<f64>,
2462}
2463
2464impl HnswIndexConfig {
2465 pub fn is_default(&self) -> bool {
2469 if let Some(ef_construction) = self.ef_construction {
2470 if ef_construction != default_construction_ef() {
2471 return false;
2472 }
2473 }
2474 if let Some(max_neighbors) = self.max_neighbors {
2475 if max_neighbors != default_m() {
2476 return false;
2477 }
2478 }
2479 if let Some(ef_search) = self.ef_search {
2480 if ef_search != default_search_ef() {
2481 return false;
2482 }
2483 }
2484 if let Some(batch_size) = self.batch_size {
2485 if batch_size != default_batch_size() {
2486 return false;
2487 }
2488 }
2489 if let Some(sync_threshold) = self.sync_threshold {
2490 if sync_threshold != default_sync_threshold() {
2491 return false;
2492 }
2493 }
2494 if let Some(resize_factor) = self.resize_factor {
2495 if resize_factor != default_resize_factor() {
2496 return false;
2497 }
2498 }
2499 true
2501 }
2502}
2503
2504#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2506#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2507#[serde(deny_unknown_fields)]
2508pub struct SpannIndexConfig {
2509 #[serde(skip_serializing_if = "Option::is_none")]
2510 #[validate(range(max = 128))]
2511 pub search_nprobe: Option<u32>,
2512 #[serde(skip_serializing_if = "Option::is_none")]
2513 #[validate(range(min = 1.0, max = 1.0))]
2514 pub search_rng_factor: Option<f32>,
2515 #[serde(skip_serializing_if = "Option::is_none")]
2516 #[validate(range(min = 5.0, max = 10.0))]
2517 pub search_rng_epsilon: Option<f32>,
2518 #[serde(skip_serializing_if = "Option::is_none")]
2519 #[validate(range(max = 8))]
2520 pub nreplica_count: Option<u32>,
2521 #[serde(skip_serializing_if = "Option::is_none")]
2522 #[validate(range(min = 1.0, max = 1.0))]
2523 pub write_rng_factor: Option<f32>,
2524 #[serde(skip_serializing_if = "Option::is_none")]
2525 #[validate(range(min = 5.0, max = 10.0))]
2526 pub write_rng_epsilon: Option<f32>,
2527 #[serde(skip_serializing_if = "Option::is_none")]
2528 #[validate(range(min = 50, max = 200))]
2529 pub split_threshold: Option<u32>,
2530 #[serde(skip_serializing_if = "Option::is_none")]
2531 #[validate(range(max = 1000))]
2532 pub num_samples_kmeans: Option<usize>,
2533 #[serde(skip_serializing_if = "Option::is_none")]
2534 #[validate(range(min = 100.0, max = 100.0))]
2535 pub initial_lambda: Option<f32>,
2536 #[serde(skip_serializing_if = "Option::is_none")]
2537 #[validate(range(max = 64))]
2538 pub reassign_neighbor_count: Option<u32>,
2539 #[serde(skip_serializing_if = "Option::is_none")]
2540 #[validate(range(min = 25, max = 100))]
2541 pub merge_threshold: Option<u32>,
2542 #[serde(skip_serializing_if = "Option::is_none")]
2543 #[validate(range(max = 8))]
2544 pub num_centers_to_merge_to: Option<u32>,
2545 #[serde(skip_serializing_if = "Option::is_none")]
2546 #[validate(range(max = 64))]
2547 pub write_nprobe: Option<u32>,
2548 #[serde(skip_serializing_if = "Option::is_none")]
2549 #[validate(range(max = 200))]
2550 pub ef_construction: Option<usize>,
2551 #[serde(skip_serializing_if = "Option::is_none")]
2552 #[validate(range(max = 200))]
2553 pub ef_search: Option<usize>,
2554 #[serde(skip_serializing_if = "Option::is_none")]
2555 #[validate(range(max = 64))]
2556 pub max_neighbors: Option<usize>,
2557}
2558
2559impl SpannIndexConfig {
2560 pub fn is_default(&self) -> bool {
2563 if let Some(search_nprobe) = self.search_nprobe {
2564 if search_nprobe != default_search_nprobe() {
2565 return false;
2566 }
2567 }
2568 if let Some(search_rng_factor) = self.search_rng_factor {
2569 if search_rng_factor != default_search_rng_factor() {
2570 return false;
2571 }
2572 }
2573 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2574 if search_rng_epsilon != default_search_rng_epsilon() {
2575 return false;
2576 }
2577 }
2578 if let Some(nreplica_count) = self.nreplica_count {
2579 if nreplica_count != default_nreplica_count() {
2580 return false;
2581 }
2582 }
2583 if let Some(write_rng_factor) = self.write_rng_factor {
2584 if write_rng_factor != default_write_rng_factor() {
2585 return false;
2586 }
2587 }
2588 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2589 if write_rng_epsilon != default_write_rng_epsilon() {
2590 return false;
2591 }
2592 }
2593 if let Some(split_threshold) = self.split_threshold {
2594 if split_threshold != default_split_threshold() {
2595 return false;
2596 }
2597 }
2598 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2599 if num_samples_kmeans != default_num_samples_kmeans() {
2600 return false;
2601 }
2602 }
2603 if let Some(initial_lambda) = self.initial_lambda {
2604 if initial_lambda != default_initial_lambda() {
2605 return false;
2606 }
2607 }
2608 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2609 if reassign_neighbor_count != default_reassign_neighbor_count() {
2610 return false;
2611 }
2612 }
2613 if let Some(merge_threshold) = self.merge_threshold {
2614 if merge_threshold != default_merge_threshold() {
2615 return false;
2616 }
2617 }
2618 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2619 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2620 return false;
2621 }
2622 }
2623 if let Some(write_nprobe) = self.write_nprobe {
2624 if write_nprobe != default_write_nprobe() {
2625 return false;
2626 }
2627 }
2628 if let Some(ef_construction) = self.ef_construction {
2629 if ef_construction != default_construction_ef_spann() {
2630 return false;
2631 }
2632 }
2633 if let Some(ef_search) = self.ef_search {
2634 if ef_search != default_search_ef_spann() {
2635 return false;
2636 }
2637 }
2638 if let Some(max_neighbors) = self.max_neighbors {
2639 if max_neighbors != default_m_spann() {
2640 return false;
2641 }
2642 }
2643 true
2644 }
2645}
2646
2647#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2648#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2649#[serde(deny_unknown_fields)]
2650pub struct SparseVectorIndexConfig {
2651 #[serde(skip_serializing_if = "Option::is_none")]
2653 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2654 #[serde(skip_serializing_if = "Option::is_none")]
2656 pub source_key: Option<String>,
2657 #[serde(skip_serializing_if = "Option::is_none")]
2659 pub bm25: Option<bool>,
2660}
2661
2662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2664#[serde(deny_unknown_fields)]
2665pub struct FtsIndexConfig {
2666 }
2668
2669#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2671#[serde(deny_unknown_fields)]
2672pub struct StringInvertedIndexConfig {
2673 }
2675
2676#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2677#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2678#[serde(deny_unknown_fields)]
2679pub struct IntInvertedIndexConfig {
2680 }
2682
2683#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2684#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2685#[serde(deny_unknown_fields)]
2686pub struct FloatInvertedIndexConfig {
2687 }
2689
2690#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2691#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2692#[serde(deny_unknown_fields)]
2693pub struct BoolInvertedIndexConfig {
2694 }
2696
2697#[derive(Clone, Debug)]
2703#[allow(clippy::large_enum_variant)]
2704pub enum IndexConfig {
2705 Vector(VectorIndexConfig),
2706 SparseVector(SparseVectorIndexConfig),
2707 Fts(FtsIndexConfig),
2708 StringInverted(StringInvertedIndexConfig),
2709 IntInverted(IntInvertedIndexConfig),
2710 FloatInverted(FloatInvertedIndexConfig),
2711 BoolInverted(BoolInvertedIndexConfig),
2712}
2713
2714impl From<VectorIndexConfig> for IndexConfig {
2716 fn from(config: VectorIndexConfig) -> Self {
2717 IndexConfig::Vector(config)
2718 }
2719}
2720
2721impl From<SparseVectorIndexConfig> for IndexConfig {
2722 fn from(config: SparseVectorIndexConfig) -> Self {
2723 IndexConfig::SparseVector(config)
2724 }
2725}
2726
2727impl From<FtsIndexConfig> for IndexConfig {
2728 fn from(config: FtsIndexConfig) -> Self {
2729 IndexConfig::Fts(config)
2730 }
2731}
2732
2733impl From<StringInvertedIndexConfig> for IndexConfig {
2734 fn from(config: StringInvertedIndexConfig) -> Self {
2735 IndexConfig::StringInverted(config)
2736 }
2737}
2738
2739impl From<IntInvertedIndexConfig> for IndexConfig {
2740 fn from(config: IntInvertedIndexConfig) -> Self {
2741 IndexConfig::IntInverted(config)
2742 }
2743}
2744
2745impl From<FloatInvertedIndexConfig> for IndexConfig {
2746 fn from(config: FloatInvertedIndexConfig) -> Self {
2747 IndexConfig::FloatInverted(config)
2748 }
2749}
2750
2751impl From<BoolInvertedIndexConfig> for IndexConfig {
2752 fn from(config: BoolInvertedIndexConfig) -> Self {
2753 IndexConfig::BoolInverted(config)
2754 }
2755}
2756
2757impl TryFrom<&InternalCollectionConfiguration> for Schema {
2758 type Error = SchemaError;
2759
2760 fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2761 let mut schema = match &config.vector_index {
2763 VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2764 VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2765 };
2766 let vector_config = match &config.vector_index {
2768 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2769 space: Some(hnsw_config.space.clone()),
2770 embedding_function: config.embedding_function.clone(),
2771 source_key: None,
2772 hnsw: Some(HnswIndexConfig {
2773 ef_construction: Some(hnsw_config.ef_construction),
2774 max_neighbors: Some(hnsw_config.max_neighbors),
2775 ef_search: Some(hnsw_config.ef_search),
2776 num_threads: Some(hnsw_config.num_threads),
2777 batch_size: Some(hnsw_config.batch_size),
2778 sync_threshold: Some(hnsw_config.sync_threshold),
2779 resize_factor: Some(hnsw_config.resize_factor),
2780 }),
2781 spann: None,
2782 },
2783 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
2784 space: Some(spann_config.space.clone()),
2785 embedding_function: config.embedding_function.clone(),
2786 source_key: None,
2787 hnsw: None,
2788 spann: Some(SpannIndexConfig {
2789 search_nprobe: Some(spann_config.search_nprobe),
2790 search_rng_factor: Some(spann_config.search_rng_factor),
2791 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
2792 nreplica_count: Some(spann_config.nreplica_count),
2793 write_rng_factor: Some(spann_config.write_rng_factor),
2794 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
2795 split_threshold: Some(spann_config.split_threshold),
2796 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
2797 initial_lambda: Some(spann_config.initial_lambda),
2798 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
2799 merge_threshold: Some(spann_config.merge_threshold),
2800 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
2801 write_nprobe: Some(spann_config.write_nprobe),
2802 ef_construction: Some(spann_config.ef_construction),
2803 ef_search: Some(spann_config.ef_search),
2804 max_neighbors: Some(spann_config.max_neighbors),
2805 }),
2806 },
2807 };
2808
2809 if let Some(float_list) = &mut schema.defaults.float_list {
2812 if let Some(vector_index) = &mut float_list.vector_index {
2813 vector_index.config = vector_config.clone();
2814 }
2815 }
2816
2817 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
2821 if let Some(float_list) = &mut embedding_types.float_list {
2822 if let Some(vector_index) = &mut float_list.vector_index {
2823 let mut vector_config = vector_config;
2824 vector_config.source_key = Some(DOCUMENT_KEY.to_string());
2825 vector_index.config = vector_config;
2826 }
2827 }
2828 }
2829
2830 Ok(schema)
2831 }
2832}
2833
2834#[cfg(test)]
2835mod tests {
2836 use super::*;
2837 use crate::hnsw_configuration::Space;
2838 use crate::metadata::SparseVector;
2839 use crate::{
2840 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2841 };
2842 use serde_json::json;
2843
2844 #[test]
2845 fn test_reconcile_with_defaults_none_user_schema() {
2846 let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2848 let expected = Schema::new_default(KnnIndex::Spann);
2849 assert_eq!(result, expected);
2850 }
2851
2852 #[test]
2853 fn test_reconcile_with_defaults_empty_user_schema() {
2854 let user_schema = Schema {
2856 defaults: ValueTypes::default(),
2857 keys: HashMap::new(),
2858 cmek: None,
2859 source_attached_function_id: None,
2860 };
2861
2862 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2863 let expected = Schema::new_default(KnnIndex::Spann);
2864 assert_eq!(result, expected);
2865 }
2866
2867 #[test]
2868 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2869 let mut user_schema = Schema {
2871 defaults: ValueTypes::default(),
2872 keys: HashMap::new(),
2873 cmek: None,
2874 source_attached_function_id: None,
2875 };
2876
2877 user_schema.defaults.string = Some(StringValueType {
2878 string_inverted_index: Some(StringInvertedIndexType {
2879 enabled: false, config: StringInvertedIndexConfig {},
2881 }),
2882 fts_index: None,
2883 });
2884
2885 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2886
2887 assert!(
2889 !result
2890 .defaults
2891 .string
2892 .as_ref()
2893 .unwrap()
2894 .string_inverted_index
2895 .as_ref()
2896 .unwrap()
2897 .enabled
2898 );
2899 assert!(result.defaults.float.is_some());
2901 assert!(result.defaults.int.is_some());
2902 }
2903
2904 #[test]
2905 fn test_reconcile_with_defaults_user_overrides_vector_config() {
2906 let mut user_schema = Schema {
2908 defaults: ValueTypes::default(),
2909 keys: HashMap::new(),
2910 cmek: None,
2911 source_attached_function_id: None,
2912 };
2913
2914 user_schema.defaults.float_list = Some(FloatListValueType {
2915 vector_index: Some(VectorIndexType {
2916 enabled: true, config: VectorIndexConfig {
2918 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
2922 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
2926 batch_size: None,
2927 sync_threshold: None,
2928 resize_factor: None,
2929 }),
2930 spann: None,
2931 },
2932 }),
2933 });
2934
2935 let result = {
2937 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2938 let merged_defaults = Schema::merge_value_types(
2939 &default_schema.defaults,
2940 &user_schema.defaults,
2941 KnnIndex::Hnsw,
2942 )
2943 .unwrap();
2944 let mut merged_keys = default_schema.keys.clone();
2945 for (key, user_value_types) in user_schema.keys {
2946 if let Some(default_value_types) = merged_keys.get(&key) {
2947 let merged_value_types = Schema::merge_value_types(
2948 default_value_types,
2949 &user_value_types,
2950 KnnIndex::Hnsw,
2951 )
2952 .unwrap();
2953 merged_keys.insert(key, merged_value_types);
2954 } else {
2955 merged_keys.insert(key, user_value_types);
2956 }
2957 }
2958 Schema {
2959 defaults: merged_defaults,
2960 keys: merged_keys,
2961 cmek: None,
2962 source_attached_function_id: None,
2963 }
2964 };
2965
2966 let vector_config = &result
2967 .defaults
2968 .float_list
2969 .as_ref()
2970 .unwrap()
2971 .vector_index
2972 .as_ref()
2973 .unwrap()
2974 .config;
2975
2976 assert_eq!(vector_config.space, Some(Space::L2));
2978 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2979 assert_eq!(
2980 vector_config.hnsw.as_ref().unwrap().ef_construction,
2981 Some(500)
2982 );
2983
2984 assert_eq!(vector_config.embedding_function, None);
2986 assert_eq!(
2988 vector_config.hnsw.as_ref().unwrap().max_neighbors,
2989 Some(default_m())
2990 );
2991 }
2992
2993 #[test]
2994 fn test_reconcile_with_defaults_keys() {
2995 let mut user_schema = Schema {
2997 defaults: ValueTypes::default(),
2998 keys: HashMap::new(),
2999 cmek: None,
3000 source_attached_function_id: None,
3001 };
3002
3003 let custom_key_types = ValueTypes {
3005 string: Some(StringValueType {
3006 fts_index: Some(FtsIndexType {
3007 enabled: true,
3008 config: FtsIndexConfig {},
3009 }),
3010 string_inverted_index: Some(StringInvertedIndexType {
3011 enabled: false,
3012 config: StringInvertedIndexConfig {},
3013 }),
3014 }),
3015 ..Default::default()
3016 };
3017 user_schema
3018 .keys
3019 .insert("custom_key".to_string(), custom_key_types);
3020
3021 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3022
3023 assert!(result.keys.contains_key(EMBEDDING_KEY));
3025 assert!(result.keys.contains_key(DOCUMENT_KEY));
3026
3027 assert!(result.keys.contains_key("custom_key"));
3029 let custom_override = result.keys.get("custom_key").unwrap();
3030 assert!(
3031 custom_override
3032 .string
3033 .as_ref()
3034 .unwrap()
3035 .fts_index
3036 .as_ref()
3037 .unwrap()
3038 .enabled
3039 );
3040 }
3041
3042 #[test]
3043 fn test_reconcile_with_defaults_override_existing_key() {
3044 let mut user_schema = Schema {
3046 defaults: ValueTypes::default(),
3047 keys: HashMap::new(),
3048 cmek: None,
3049 source_attached_function_id: None,
3050 };
3051
3052 let embedding_override = ValueTypes {
3054 float_list: Some(FloatListValueType {
3055 vector_index: Some(VectorIndexType {
3056 enabled: false, config: VectorIndexConfig {
3058 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3060 source_key: Some("custom_embedding_key".to_string()),
3061 hnsw: None,
3062 spann: None,
3063 },
3064 }),
3065 }),
3066 ..Default::default()
3067 };
3068 user_schema
3069 .keys
3070 .insert(EMBEDDING_KEY.to_string(), embedding_override);
3071
3072 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3073
3074 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3075 let vector_config = &embedding_config
3076 .float_list
3077 .as_ref()
3078 .unwrap()
3079 .vector_index
3080 .as_ref()
3081 .unwrap();
3082
3083 assert!(!vector_config.enabled);
3085 assert_eq!(vector_config.config.space, Some(Space::Ip));
3086 assert_eq!(
3087 vector_config.config.source_key,
3088 Some("custom_embedding_key".to_string())
3089 );
3090 }
3091
3092 #[test]
3093 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3094 let collection_config = InternalCollectionConfiguration {
3095 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3096 space: Space::Cosine,
3097 ef_construction: 128,
3098 ef_search: 96,
3099 max_neighbors: 42,
3100 num_threads: 8,
3101 resize_factor: 1.5,
3102 sync_threshold: 2_000,
3103 batch_size: 256,
3104 }),
3105 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3106 EmbeddingFunctionNewConfiguration {
3107 name: "custom".to_string(),
3108 config: json!({"alpha": 1}),
3109 },
3110 )),
3111 };
3112
3113 let schema = Schema::try_from(&collection_config).unwrap();
3114 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3115
3116 assert_eq!(reconstructed, collection_config);
3117 }
3118
3119 #[test]
3120 fn test_convert_schema_to_collection_config_spann_roundtrip() {
3121 let spann_config = InternalSpannConfiguration {
3122 space: Space::Cosine,
3123 search_nprobe: 11,
3124 search_rng_factor: 1.7,
3125 write_nprobe: 5,
3126 nreplica_count: 3,
3127 split_threshold: 150,
3128 merge_threshold: 80,
3129 ef_construction: 120,
3130 ef_search: 90,
3131 max_neighbors: 40,
3132 ..Default::default()
3133 };
3134
3135 let collection_config = InternalCollectionConfiguration {
3136 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3137 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3138 EmbeddingFunctionNewConfiguration {
3139 name: "custom".to_string(),
3140 config: json!({"beta": true}),
3141 },
3142 )),
3143 };
3144
3145 let schema = Schema::try_from(&collection_config).unwrap();
3146 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3147
3148 assert_eq!(reconstructed, collection_config);
3149 }
3150
3151 #[test]
3152 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3153 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3154 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3155 if let Some(float_list) = &mut embedding.float_list {
3156 if let Some(vector_index) = &mut float_list.vector_index {
3157 vector_index.config.spann = Some(SpannIndexConfig {
3158 search_nprobe: Some(1),
3159 search_rng_factor: Some(1.0),
3160 search_rng_epsilon: Some(0.1),
3161 nreplica_count: Some(1),
3162 write_rng_factor: Some(1.0),
3163 write_rng_epsilon: Some(0.1),
3164 split_threshold: Some(100),
3165 num_samples_kmeans: Some(10),
3166 initial_lambda: Some(0.5),
3167 reassign_neighbor_count: Some(10),
3168 merge_threshold: Some(50),
3169 num_centers_to_merge_to: Some(3),
3170 write_nprobe: Some(1),
3171 ef_construction: Some(50),
3172 ef_search: Some(40),
3173 max_neighbors: Some(20),
3174 });
3175 }
3176 }
3177 }
3178
3179 let result = InternalCollectionConfiguration::try_from(&schema);
3180 assert!(result.is_err());
3181 }
3182
3183 #[test]
3184 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3185 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3186 let before = schema.clone();
3187 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3188 assert!(!modified);
3189 assert_eq!(schema, before);
3190 }
3191
3192 #[test]
3193 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3194 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3195 assert!(!schema.keys.contains_key("custom_field"));
3196
3197 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3198
3199 assert!(modified);
3200 let entry = schema
3201 .keys
3202 .get("custom_field")
3203 .expect("expected new key override to be inserted");
3204 assert_eq!(entry.boolean, schema.defaults.boolean);
3205 assert!(entry.string.is_none());
3206 assert!(entry.int.is_none());
3207 assert!(entry.float.is_none());
3208 assert!(entry.float_list.is_none());
3209 assert!(entry.sparse_vector.is_none());
3210 }
3211
3212 #[test]
3213 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3214 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3215 let initial_len = schema.keys.len();
3216 schema.keys.insert(
3217 "custom_field".to_string(),
3218 ValueTypes {
3219 string: schema.defaults.string.clone(),
3220 ..Default::default()
3221 },
3222 );
3223
3224 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3225
3226 assert!(modified);
3227 assert_eq!(schema.keys.len(), initial_len + 1);
3228 let entry = schema
3229 .keys
3230 .get("custom_field")
3231 .expect("expected key override to exist after ensure call");
3232 assert!(entry.string.is_some());
3233 assert_eq!(entry.boolean, schema.defaults.boolean);
3234 }
3235
3236 #[test]
3237 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3238 let schema = Schema::new_default(KnnIndex::Spann);
3239 let result = schema.is_knn_key_indexing_enabled(
3240 "custom_sparse",
3241 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3242 );
3243
3244 let err = result.expect_err("expected indexing disabled error");
3245 match err {
3246 FilterValidationError::IndexingDisabled { key, value_type } => {
3247 assert_eq!(key, "custom_sparse");
3248 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3249 }
3250 other => panic!("unexpected error variant: {other:?}"),
3251 }
3252 }
3253
3254 #[test]
3255 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3256 let mut schema = Schema::new_default(KnnIndex::Spann);
3257 schema.keys.insert(
3258 "sparse_enabled".to_string(),
3259 ValueTypes {
3260 sparse_vector: Some(SparseVectorValueType {
3261 sparse_vector_index: Some(SparseVectorIndexType {
3262 enabled: true,
3263 config: SparseVectorIndexConfig {
3264 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3265 source_key: None,
3266 bm25: None,
3267 },
3268 }),
3269 }),
3270 ..Default::default()
3271 },
3272 );
3273
3274 let result = schema.is_knn_key_indexing_enabled(
3275 "sparse_enabled",
3276 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3277 );
3278
3279 assert!(result.is_ok());
3280 }
3281
3282 #[test]
3283 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3284 let schema = Schema::new_default(KnnIndex::Spann);
3285 let result = schema.is_knn_key_indexing_enabled(
3286 EMBEDDING_KEY,
3287 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3288 );
3289
3290 assert!(result.is_ok());
3291 }
3292
3293 #[test]
3294 fn test_merge_hnsw_configs_field_level() {
3295 let default_hnsw = HnswIndexConfig {
3297 ef_construction: Some(200),
3298 max_neighbors: Some(16),
3299 ef_search: Some(10),
3300 num_threads: Some(4),
3301 batch_size: Some(100),
3302 sync_threshold: Some(1000),
3303 resize_factor: Some(1.2),
3304 };
3305
3306 let user_hnsw = HnswIndexConfig {
3307 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
3315
3316 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3317
3318 assert_eq!(result.ef_construction, Some(300));
3320 assert_eq!(result.ef_search, Some(20));
3321 assert_eq!(result.sync_threshold, Some(2000));
3322
3323 assert_eq!(result.max_neighbors, Some(16));
3325 assert_eq!(result.num_threads, Some(4));
3326 assert_eq!(result.batch_size, Some(100));
3327 assert_eq!(result.resize_factor, Some(1.2));
3328 }
3329
3330 #[test]
3331 fn test_merge_spann_configs_field_level() {
3332 let default_spann = SpannIndexConfig {
3334 search_nprobe: Some(10),
3335 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
3338 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
3342 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
3344 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
3347 ef_construction: Some(100),
3348 ef_search: Some(10),
3349 max_neighbors: Some(16),
3350 };
3351
3352 let user_spann = SpannIndexConfig {
3353 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
3358 write_rng_epsilon: None,
3359 split_threshold: Some(150), num_samples_kmeans: None,
3361 initial_lambda: None,
3362 reassign_neighbor_count: None,
3363 merge_threshold: None,
3364 num_centers_to_merge_to: None,
3365 write_nprobe: None,
3366 ef_construction: None,
3367 ef_search: None,
3368 max_neighbors: None,
3369 };
3370
3371 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3372
3373 assert_eq!(result.search_nprobe, Some(20));
3375 assert_eq!(result.search_rng_epsilon, Some(8.0));
3376 assert_eq!(result.split_threshold, Some(150));
3377
3378 assert_eq!(result.search_rng_factor, Some(1.0));
3380 assert_eq!(result.nreplica_count, Some(3));
3381 assert_eq!(result.initial_lambda, Some(100.0));
3382 }
3383
3384 #[test]
3385 fn test_spann_index_config_into_internal_configuration() {
3386 let config = SpannIndexConfig {
3387 search_nprobe: Some(33),
3388 search_rng_factor: Some(1.2),
3389 search_rng_epsilon: None,
3390 nreplica_count: None,
3391 write_rng_factor: Some(1.5),
3392 write_rng_epsilon: None,
3393 split_threshold: Some(75),
3394 num_samples_kmeans: None,
3395 initial_lambda: Some(0.9),
3396 reassign_neighbor_count: Some(40),
3397 merge_threshold: None,
3398 num_centers_to_merge_to: Some(4),
3399 write_nprobe: Some(60),
3400 ef_construction: Some(180),
3401 ef_search: Some(170),
3402 max_neighbors: Some(32),
3403 };
3404
3405 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3406 assert_eq!(with_space.space, Space::Cosine);
3407 assert_eq!(with_space.search_nprobe, 33);
3408 assert_eq!(with_space.search_rng_factor, 1.2);
3409 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3410 assert_eq!(with_space.write_rng_factor, 1.5);
3411 assert_eq!(with_space.write_nprobe, 60);
3412 assert_eq!(with_space.ef_construction, 180);
3413 assert_eq!(with_space.ef_search, 170);
3414 assert_eq!(with_space.max_neighbors, 32);
3415 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3416
3417 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3418 assert_eq!(default_space_config.space, default_space());
3419 }
3420
3421 #[test]
3422 fn test_merge_string_type_combinations() {
3423 let default = StringValueType {
3427 string_inverted_index: Some(StringInvertedIndexType {
3428 enabled: true,
3429 config: StringInvertedIndexConfig {},
3430 }),
3431 fts_index: Some(FtsIndexType {
3432 enabled: false,
3433 config: FtsIndexConfig {},
3434 }),
3435 };
3436
3437 let user = StringValueType {
3438 string_inverted_index: Some(StringInvertedIndexType {
3439 enabled: false, config: StringInvertedIndexConfig {},
3441 }),
3442 fts_index: None, };
3444
3445 let result = Schema::merge_string_type(Some(&default), Some(&user))
3446 .unwrap()
3447 .unwrap();
3448 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3453 .unwrap()
3454 .unwrap();
3455 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3456
3457 let result = Schema::merge_string_type(None, Some(&user))
3459 .unwrap()
3460 .unwrap();
3461 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3462
3463 let result = Schema::merge_string_type(None, None).unwrap();
3465 assert!(result.is_none());
3466 }
3467
3468 #[test]
3469 fn test_merge_vector_index_config_comprehensive() {
3470 let default_config = VectorIndexConfig {
3472 space: Some(Space::Cosine),
3473 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3474 source_key: Some("default_key".to_string()),
3475 hnsw: Some(HnswIndexConfig {
3476 ef_construction: Some(200),
3477 max_neighbors: Some(16),
3478 ef_search: Some(10),
3479 num_threads: Some(4),
3480 batch_size: Some(100),
3481 sync_threshold: Some(1000),
3482 resize_factor: Some(1.2),
3483 }),
3484 spann: None,
3485 };
3486
3487 let user_config = VectorIndexConfig {
3488 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3492 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3496 batch_size: None,
3497 sync_threshold: None,
3498 resize_factor: None,
3499 }),
3500 spann: Some(SpannIndexConfig {
3501 search_nprobe: Some(15),
3502 search_rng_factor: None,
3503 search_rng_epsilon: None,
3504 nreplica_count: None,
3505 write_rng_factor: None,
3506 write_rng_epsilon: None,
3507 split_threshold: None,
3508 num_samples_kmeans: None,
3509 initial_lambda: None,
3510 reassign_neighbor_count: None,
3511 merge_threshold: None,
3512 num_centers_to_merge_to: None,
3513 write_nprobe: None,
3514 ef_construction: None,
3515 ef_search: None,
3516 max_neighbors: None,
3517 }), };
3519
3520 let result =
3521 Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3522
3523 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3526 result.embedding_function,
3527 Some(EmbeddingFunctionConfiguration::Legacy)
3528 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_none());
3537 }
3538
3539 #[test]
3540 fn test_merge_sparse_vector_index_config() {
3541 let default_config = SparseVectorIndexConfig {
3543 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3544 source_key: Some("default_sparse_key".to_string()),
3545 bm25: None,
3546 };
3547
3548 let user_config = SparseVectorIndexConfig {
3549 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
3552 };
3553
3554 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3555
3556 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3558 assert_eq!(
3560 result.embedding_function,
3561 Some(EmbeddingFunctionConfiguration::Legacy)
3562 );
3563 }
3564
3565 #[test]
3566 fn test_complex_nested_merging_scenario() {
3567 let mut user_schema = Schema {
3569 defaults: ValueTypes::default(),
3570 keys: HashMap::new(),
3571 cmek: None,
3572 source_attached_function_id: None,
3573 };
3574
3575 user_schema.defaults.string = Some(StringValueType {
3577 string_inverted_index: Some(StringInvertedIndexType {
3578 enabled: false,
3579 config: StringInvertedIndexConfig {},
3580 }),
3581 fts_index: Some(FtsIndexType {
3582 enabled: true,
3583 config: FtsIndexConfig {},
3584 }),
3585 });
3586
3587 user_schema.defaults.float_list = Some(FloatListValueType {
3588 vector_index: Some(VectorIndexType {
3589 enabled: true,
3590 config: VectorIndexConfig {
3591 space: Some(Space::Ip),
3592 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
3594 hnsw: Some(HnswIndexConfig {
3595 ef_construction: Some(400),
3596 max_neighbors: Some(32),
3597 ef_search: None, num_threads: None,
3599 batch_size: None,
3600 sync_threshold: None,
3601 resize_factor: None,
3602 }),
3603 spann: None,
3604 },
3605 }),
3606 });
3607
3608 let custom_key_override = ValueTypes {
3610 string: Some(StringValueType {
3611 fts_index: Some(FtsIndexType {
3612 enabled: true,
3613 config: FtsIndexConfig {},
3614 }),
3615 string_inverted_index: None,
3616 }),
3617 ..Default::default()
3618 };
3619 user_schema
3620 .keys
3621 .insert("custom_field".to_string(), custom_key_override);
3622
3623 let result = {
3625 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3626 let merged_defaults = Schema::merge_value_types(
3627 &default_schema.defaults,
3628 &user_schema.defaults,
3629 KnnIndex::Hnsw,
3630 )
3631 .unwrap();
3632 let mut merged_keys = default_schema.keys.clone();
3633 for (key, user_value_types) in user_schema.keys {
3634 if let Some(default_value_types) = merged_keys.get(&key) {
3635 let merged_value_types = Schema::merge_value_types(
3636 default_value_types,
3637 &user_value_types,
3638 KnnIndex::Hnsw,
3639 )
3640 .unwrap();
3641 merged_keys.insert(key, merged_value_types);
3642 } else {
3643 merged_keys.insert(key, user_value_types);
3644 }
3645 }
3646 Schema {
3647 defaults: merged_defaults,
3648 keys: merged_keys,
3649 cmek: None,
3650 source_attached_function_id: None,
3651 }
3652 };
3653
3654 assert!(
3658 !result
3659 .defaults
3660 .string
3661 .as_ref()
3662 .unwrap()
3663 .string_inverted_index
3664 .as_ref()
3665 .unwrap()
3666 .enabled
3667 );
3668 assert!(
3669 result
3670 .defaults
3671 .string
3672 .as_ref()
3673 .unwrap()
3674 .fts_index
3675 .as_ref()
3676 .unwrap()
3677 .enabled
3678 );
3679
3680 let vector_config = &result
3681 .defaults
3682 .float_list
3683 .as_ref()
3684 .unwrap()
3685 .vector_index
3686 .as_ref()
3687 .unwrap()
3688 .config;
3689 assert_eq!(vector_config.space, Some(Space::Ip));
3690 assert_eq!(vector_config.embedding_function, None); assert_eq!(
3692 vector_config.source_key,
3693 Some("custom_vector_key".to_string())
3694 );
3695 assert_eq!(
3696 vector_config.hnsw.as_ref().unwrap().ef_construction,
3697 Some(400)
3698 );
3699 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3700 assert_eq!(
3701 vector_config.hnsw.as_ref().unwrap().ef_search,
3702 Some(default_search_ef())
3703 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
3711 assert!(
3712 custom_override
3713 .string
3714 .as_ref()
3715 .unwrap()
3716 .fts_index
3717 .as_ref()
3718 .unwrap()
3719 .enabled
3720 );
3721 assert!(custom_override
3722 .string
3723 .as_ref()
3724 .unwrap()
3725 .string_inverted_index
3726 .is_none());
3727 }
3728
3729 #[test]
3730 fn test_reconcile_with_collection_config_default_config() {
3731 let collection_config = InternalCollectionConfiguration::default_hnsw();
3733 let schema = Schema::try_from(&collection_config).unwrap();
3734
3735 let result =
3736 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3737 .unwrap();
3738 assert_eq!(result, schema);
3739 }
3740
3741 #[test]
3743 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
3744 let collection_config = InternalCollectionConfiguration::default_hnsw();
3745 let schema = Schema::new_default(KnnIndex::Hnsw);
3746 let result =
3747 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3748 .unwrap();
3749
3750 assert!(result.defaults.float_list.is_some());
3752 assert!(result
3753 .defaults
3754 .float_list
3755 .as_ref()
3756 .unwrap()
3757 .vector_index
3758 .as_ref()
3759 .unwrap()
3760 .config
3761 .hnsw
3762 .is_some());
3763 assert!(result
3764 .defaults
3765 .float_list
3766 .as_ref()
3767 .unwrap()
3768 .vector_index
3769 .as_ref()
3770 .unwrap()
3771 .config
3772 .spann
3773 .is_none());
3774 }
3775
3776 #[test]
3777 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
3778 let collection_config = InternalCollectionConfiguration::default_hnsw();
3779 let schema = Schema::new_default(KnnIndex::Hnsw);
3780 let result =
3781 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3782 .unwrap();
3783
3784 assert!(result.defaults.float_list.is_some());
3786 assert!(result
3787 .defaults
3788 .float_list
3789 .as_ref()
3790 .unwrap()
3791 .vector_index
3792 .as_ref()
3793 .unwrap()
3794 .config
3795 .spann
3796 .is_some());
3797 assert!(result
3798 .defaults
3799 .float_list
3800 .as_ref()
3801 .unwrap()
3802 .vector_index
3803 .as_ref()
3804 .unwrap()
3805 .config
3806 .hnsw
3807 .is_none());
3808 }
3809
3810 #[test]
3811 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
3812 let collection_config = InternalCollectionConfiguration::default_hnsw();
3813 let schema = Schema::new_default(KnnIndex::Spann);
3814 let result =
3815 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3816 .unwrap();
3817
3818 assert!(result.defaults.float_list.is_some());
3820 assert!(result
3821 .defaults
3822 .float_list
3823 .as_ref()
3824 .unwrap()
3825 .vector_index
3826 .as_ref()
3827 .unwrap()
3828 .config
3829 .hnsw
3830 .is_some());
3831 assert!(result
3832 .defaults
3833 .float_list
3834 .as_ref()
3835 .unwrap()
3836 .vector_index
3837 .as_ref()
3838 .unwrap()
3839 .config
3840 .spann
3841 .is_none());
3842 }
3843
3844 #[test]
3845 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
3846 let collection_config = InternalCollectionConfiguration::default_hnsw();
3847 let schema = Schema::new_default(KnnIndex::Spann);
3848 let result =
3849 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3850 .unwrap();
3851
3852 assert!(result.defaults.float_list.is_some());
3854 assert!(result
3855 .defaults
3856 .float_list
3857 .as_ref()
3858 .unwrap()
3859 .vector_index
3860 .as_ref()
3861 .unwrap()
3862 .config
3863 .spann
3864 .is_some());
3865 assert!(result
3866 .defaults
3867 .float_list
3868 .as_ref()
3869 .unwrap()
3870 .vector_index
3871 .as_ref()
3872 .unwrap()
3873 .config
3874 .hnsw
3875 .is_none());
3876 }
3877
3878 #[test]
3879 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
3880 let collection_config = InternalCollectionConfiguration::default_spann();
3881 let schema = Schema::new_default(KnnIndex::Spann);
3882 let result =
3883 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3884 .unwrap();
3885
3886 assert!(result.defaults.float_list.is_some());
3888 assert!(result
3889 .defaults
3890 .float_list
3891 .as_ref()
3892 .unwrap()
3893 .vector_index
3894 .as_ref()
3895 .unwrap()
3896 .config
3897 .hnsw
3898 .is_some());
3899 assert!(result
3900 .defaults
3901 .float_list
3902 .as_ref()
3903 .unwrap()
3904 .vector_index
3905 .as_ref()
3906 .unwrap()
3907 .config
3908 .spann
3909 .is_none());
3910 }
3911
3912 #[test]
3913 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
3914 let collection_config = InternalCollectionConfiguration::default_spann();
3915 let schema = Schema::new_default(KnnIndex::Spann);
3916 let result =
3917 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3918 .unwrap();
3919
3920 assert!(result.defaults.float_list.is_some());
3922 assert!(result
3923 .defaults
3924 .float_list
3925 .as_ref()
3926 .unwrap()
3927 .vector_index
3928 .as_ref()
3929 .unwrap()
3930 .config
3931 .spann
3932 .is_some());
3933 assert!(result
3934 .defaults
3935 .float_list
3936 .as_ref()
3937 .unwrap()
3938 .vector_index
3939 .as_ref()
3940 .unwrap()
3941 .config
3942 .hnsw
3943 .is_none());
3944 assert_eq!(
3946 result
3947 .defaults
3948 .float_list
3949 .as_ref()
3950 .unwrap()
3951 .vector_index
3952 .as_ref()
3953 .unwrap()
3954 .config
3955 .source_key,
3956 None
3957 );
3958 }
3959
3960 #[test]
3961 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
3962 let collection_config = InternalCollectionConfiguration::default_spann();
3963 let schema = Schema::new_default(KnnIndex::Hnsw);
3964 let result =
3965 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3966 .unwrap();
3967
3968 assert!(result.defaults.float_list.is_some());
3970 assert!(result
3971 .defaults
3972 .float_list
3973 .as_ref()
3974 .unwrap()
3975 .vector_index
3976 .as_ref()
3977 .unwrap()
3978 .config
3979 .hnsw
3980 .is_some());
3981 assert!(result
3982 .defaults
3983 .float_list
3984 .as_ref()
3985 .unwrap()
3986 .vector_index
3987 .as_ref()
3988 .unwrap()
3989 .config
3990 .spann
3991 .is_none());
3992 }
3993
3994 #[test]
3995 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
3996 let collection_config = InternalCollectionConfiguration::default_spann();
3997 let schema = Schema::new_default(KnnIndex::Hnsw);
3998 let result =
3999 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4000 .unwrap();
4001
4002 assert!(result.defaults.float_list.is_some());
4004 assert!(result
4005 .defaults
4006 .float_list
4007 .as_ref()
4008 .unwrap()
4009 .vector_index
4010 .as_ref()
4011 .unwrap()
4012 .config
4013 .spann
4014 .is_some());
4015 assert!(result
4016 .defaults
4017 .float_list
4018 .as_ref()
4019 .unwrap()
4020 .vector_index
4021 .as_ref()
4022 .unwrap()
4023 .config
4024 .hnsw
4025 .is_none());
4026 }
4027
4028 #[test]
4029 fn test_defaults_source_key_not_document() {
4030 let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4032 let schema_spann = Schema::new_default(KnnIndex::Spann);
4033
4034 let defaults_hnsw = schema_hnsw
4036 .defaults
4037 .float_list
4038 .as_ref()
4039 .unwrap()
4040 .vector_index
4041 .as_ref()
4042 .unwrap();
4043 assert_eq!(defaults_hnsw.config.source_key, None);
4044
4045 let defaults_spann = schema_spann
4047 .defaults
4048 .float_list
4049 .as_ref()
4050 .unwrap()
4051 .vector_index
4052 .as_ref()
4053 .unwrap();
4054 assert_eq!(defaults_spann.config.source_key, None);
4055
4056 let collection_config_hnsw = InternalCollectionConfiguration {
4059 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4060 ef_construction: 300,
4061 max_neighbors: 32,
4062 ef_search: 50,
4063 num_threads: 8,
4064 batch_size: 200,
4065 sync_threshold: 2000,
4066 resize_factor: 1.5,
4067 space: Space::L2,
4068 }),
4069 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4070 };
4071 let result_hnsw = Schema::reconcile_with_collection_config(
4072 &schema_hnsw,
4073 &collection_config_hnsw,
4074 KnnIndex::Hnsw,
4075 )
4076 .unwrap();
4077 let reconciled_defaults_hnsw = result_hnsw
4078 .defaults
4079 .float_list
4080 .as_ref()
4081 .unwrap()
4082 .vector_index
4083 .as_ref()
4084 .unwrap();
4085 assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4086
4087 let collection_config_spann = InternalCollectionConfiguration {
4088 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4089 search_nprobe: 20,
4090 search_rng_factor: 3.0,
4091 search_rng_epsilon: 0.2,
4092 nreplica_count: 5,
4093 write_rng_factor: 2.0,
4094 write_rng_epsilon: 0.1,
4095 split_threshold: 2000,
4096 num_samples_kmeans: 200,
4097 initial_lambda: 0.8,
4098 reassign_neighbor_count: 100,
4099 merge_threshold: 800,
4100 num_centers_to_merge_to: 20,
4101 write_nprobe: 10,
4102 ef_construction: 400,
4103 ef_search: 60,
4104 max_neighbors: 24,
4105 space: Space::Cosine,
4106 }),
4107 embedding_function: None,
4108 };
4109 let result_spann = Schema::reconcile_with_collection_config(
4110 &schema_spann,
4111 &collection_config_spann,
4112 KnnIndex::Spann,
4113 )
4114 .unwrap();
4115 let reconciled_defaults_spann = result_spann
4116 .defaults
4117 .float_list
4118 .as_ref()
4119 .unwrap()
4120 .vector_index
4121 .as_ref()
4122 .unwrap();
4123 assert_eq!(reconciled_defaults_spann.config.source_key, None);
4124
4125 let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4127 let embedding_vector_index_hnsw = embedding_hnsw
4128 .float_list
4129 .as_ref()
4130 .unwrap()
4131 .vector_index
4132 .as_ref()
4133 .unwrap();
4134 assert_eq!(
4135 embedding_vector_index_hnsw.config.source_key,
4136 Some(DOCUMENT_KEY.to_string())
4137 );
4138
4139 let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4140 let embedding_vector_index_spann = embedding_spann
4141 .float_list
4142 .as_ref()
4143 .unwrap()
4144 .vector_index
4145 .as_ref()
4146 .unwrap();
4147 assert_eq!(
4148 embedding_vector_index_spann.config.source_key,
4149 Some(DOCUMENT_KEY.to_string())
4150 );
4151 }
4152
4153 #[test]
4154 fn test_try_from_source_key() {
4155 let collection_config_hnsw = InternalCollectionConfiguration {
4160 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4161 ef_construction: 300,
4162 max_neighbors: 32,
4163 ef_search: 50,
4164 num_threads: 8,
4165 batch_size: 200,
4166 sync_threshold: 2000,
4167 resize_factor: 1.5,
4168 space: Space::L2,
4169 }),
4170 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4171 };
4172 let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4173
4174 let defaults_hnsw = schema_hnsw
4176 .defaults
4177 .float_list
4178 .as_ref()
4179 .unwrap()
4180 .vector_index
4181 .as_ref()
4182 .unwrap();
4183 assert_eq!(defaults_hnsw.config.source_key, None);
4184
4185 let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4187 let embedding_vector_index_hnsw = embedding_hnsw
4188 .float_list
4189 .as_ref()
4190 .unwrap()
4191 .vector_index
4192 .as_ref()
4193 .unwrap();
4194 assert_eq!(
4195 embedding_vector_index_hnsw.config.source_key,
4196 Some(DOCUMENT_KEY.to_string())
4197 );
4198
4199 let collection_config_spann = InternalCollectionConfiguration {
4201 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4202 search_nprobe: 20,
4203 search_rng_factor: 3.0,
4204 search_rng_epsilon: 0.2,
4205 nreplica_count: 5,
4206 write_rng_factor: 2.0,
4207 write_rng_epsilon: 0.1,
4208 split_threshold: 2000,
4209 num_samples_kmeans: 200,
4210 initial_lambda: 0.8,
4211 reassign_neighbor_count: 100,
4212 merge_threshold: 800,
4213 num_centers_to_merge_to: 20,
4214 write_nprobe: 10,
4215 ef_construction: 400,
4216 ef_search: 60,
4217 max_neighbors: 24,
4218 space: Space::Cosine,
4219 }),
4220 embedding_function: None,
4221 };
4222 let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4223
4224 let defaults_spann = schema_spann
4226 .defaults
4227 .float_list
4228 .as_ref()
4229 .unwrap()
4230 .vector_index
4231 .as_ref()
4232 .unwrap();
4233 assert_eq!(defaults_spann.config.source_key, None);
4234
4235 let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4237 let embedding_vector_index_spann = embedding_spann
4238 .float_list
4239 .as_ref()
4240 .unwrap()
4241 .vector_index
4242 .as_ref()
4243 .unwrap();
4244 assert_eq!(
4245 embedding_vector_index_spann.config.source_key,
4246 Some(DOCUMENT_KEY.to_string())
4247 );
4248 }
4249
4250 #[test]
4251 fn test_default_hnsw_with_default_embedding_function() {
4252 use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4256
4257 let collection_config = InternalCollectionConfiguration {
4258 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4259 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4260 EmbeddingFunctionNewConfiguration {
4261 name: "default".to_string(),
4262 config: serde_json::json!({}),
4263 },
4264 )),
4265 };
4266
4267 assert!(collection_config.is_default());
4269
4270 let schema = Schema::new_default(KnnIndex::Hnsw);
4271 let result =
4272 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4273 .unwrap();
4274
4275 let defaults = result
4277 .defaults
4278 .float_list
4279 .as_ref()
4280 .unwrap()
4281 .vector_index
4282 .as_ref()
4283 .unwrap();
4284 assert_eq!(defaults.config.source_key, None);
4285
4286 let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4288 let embedding_vector_index = embedding
4289 .float_list
4290 .as_ref()
4291 .unwrap()
4292 .vector_index
4293 .as_ref()
4294 .unwrap();
4295 assert_eq!(
4296 embedding_vector_index.config.source_key,
4297 Some(DOCUMENT_KEY.to_string())
4298 );
4299
4300 let vector_index_config = defaults.config.clone();
4302 assert!(vector_index_config.spann.is_some());
4303 assert!(vector_index_config.hnsw.is_none());
4304
4305 assert_eq!(
4307 embedding_vector_index.config.embedding_function,
4308 Some(EmbeddingFunctionConfiguration::Known(
4309 EmbeddingFunctionNewConfiguration {
4310 name: "default".to_string(),
4311 config: serde_json::json!({}),
4312 },
4313 ))
4314 );
4315 assert_eq!(
4316 defaults.config.embedding_function,
4317 Some(EmbeddingFunctionConfiguration::Known(
4318 EmbeddingFunctionNewConfiguration {
4319 name: "default".to_string(),
4320 config: serde_json::json!({}),
4321 },
4322 ))
4323 );
4324 }
4325
4326 #[test]
4327 fn test_reconcile_with_collection_config_both_non_default() {
4328 let mut schema = Schema::new_default(KnnIndex::Hnsw);
4330 schema.defaults.string = Some(StringValueType {
4331 fts_index: Some(FtsIndexType {
4332 enabled: true,
4333 config: FtsIndexConfig {},
4334 }),
4335 string_inverted_index: None,
4336 });
4337
4338 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4339 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4341 {
4342 hnsw_config.ef_construction = 500; }
4344
4345 let result = Schema::reconcile_schema_and_config(
4347 Some(&schema),
4348 Some(&collection_config),
4349 KnnIndex::Spann,
4350 );
4351 assert!(result.is_err());
4352 assert!(matches!(
4353 result.unwrap_err(),
4354 SchemaError::ConfigAndSchemaConflict
4355 ));
4356 }
4357
4358 #[test]
4359 fn test_reconcile_with_collection_config_hnsw_override() {
4360 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
4364 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4365 ef_construction: 300,
4366 max_neighbors: 32,
4367 ef_search: 50,
4368 num_threads: 8,
4369 batch_size: 200,
4370 sync_threshold: 2000,
4371 resize_factor: 1.5,
4372 space: Space::L2,
4373 }),
4374 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4375 };
4376
4377 let result =
4378 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4379 .unwrap();
4380
4381 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4383 let vector_index = embedding_override
4384 .float_list
4385 .as_ref()
4386 .unwrap()
4387 .vector_index
4388 .as_ref()
4389 .unwrap();
4390
4391 assert!(vector_index.enabled);
4392 assert_eq!(vector_index.config.space, Some(Space::L2));
4393 assert_eq!(
4394 vector_index.config.embedding_function,
4395 Some(EmbeddingFunctionConfiguration::Legacy)
4396 );
4397 assert_eq!(
4398 vector_index.config.source_key,
4399 Some(DOCUMENT_KEY.to_string())
4400 );
4401
4402 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4403 assert_eq!(hnsw_config.ef_construction, Some(300));
4404 assert_eq!(hnsw_config.max_neighbors, Some(32));
4405 assert_eq!(hnsw_config.ef_search, Some(50));
4406 assert_eq!(hnsw_config.num_threads, Some(8));
4407 assert_eq!(hnsw_config.batch_size, Some(200));
4408 assert_eq!(hnsw_config.sync_threshold, Some(2000));
4409 assert_eq!(hnsw_config.resize_factor, Some(1.5));
4410
4411 assert!(vector_index.config.spann.is_none());
4412 }
4413
4414 #[test]
4415 fn test_reconcile_with_collection_config_spann_override() {
4416 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
4420 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4421 search_nprobe: 20,
4422 search_rng_factor: 3.0,
4423 search_rng_epsilon: 0.2,
4424 nreplica_count: 5,
4425 write_rng_factor: 2.0,
4426 write_rng_epsilon: 0.1,
4427 split_threshold: 2000,
4428 num_samples_kmeans: 200,
4429 initial_lambda: 0.8,
4430 reassign_neighbor_count: 100,
4431 merge_threshold: 800,
4432 num_centers_to_merge_to: 20,
4433 write_nprobe: 10,
4434 ef_construction: 400,
4435 ef_search: 60,
4436 max_neighbors: 24,
4437 space: Space::Cosine,
4438 }),
4439 embedding_function: None,
4440 };
4441
4442 let result =
4443 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4444 .unwrap();
4445
4446 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4448 let vector_index = embedding_override
4449 .float_list
4450 .as_ref()
4451 .unwrap()
4452 .vector_index
4453 .as_ref()
4454 .unwrap();
4455
4456 assert!(vector_index.enabled);
4457 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4458 assert_eq!(vector_index.config.embedding_function, None);
4459 assert_eq!(
4460 vector_index.config.source_key,
4461 Some(DOCUMENT_KEY.to_string())
4462 );
4463
4464 assert!(vector_index.config.hnsw.is_none());
4465
4466 let spann_config = vector_index.config.spann.as_ref().unwrap();
4467 assert_eq!(spann_config.search_nprobe, Some(20));
4468 assert_eq!(spann_config.search_rng_factor, Some(3.0));
4469 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4470 assert_eq!(spann_config.nreplica_count, Some(5));
4471 assert_eq!(spann_config.write_rng_factor, Some(2.0));
4472 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4473 assert_eq!(spann_config.split_threshold, Some(2000));
4474 assert_eq!(spann_config.num_samples_kmeans, Some(200));
4475 assert_eq!(spann_config.initial_lambda, Some(0.8));
4476 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4477 assert_eq!(spann_config.merge_threshold, Some(800));
4478 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4479 assert_eq!(spann_config.write_nprobe, Some(10));
4480 assert_eq!(spann_config.ef_construction, Some(400));
4481 assert_eq!(spann_config.ef_search, Some(60));
4482 assert_eq!(spann_config.max_neighbors, Some(24));
4483 }
4484
4485 #[test]
4486 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4487 let schema = Schema::new_default(KnnIndex::Hnsw);
4490
4491 let collection_config = InternalCollectionConfiguration {
4492 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4493 ef_construction: 300,
4494 max_neighbors: 32,
4495 ef_search: 50,
4496 num_threads: 8,
4497 batch_size: 200,
4498 sync_threshold: 2000,
4499 resize_factor: 1.5,
4500 space: Space::L2,
4501 }),
4502 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4503 };
4504
4505 let result =
4506 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4507 .unwrap();
4508
4509 let defaults_vector_index = result
4511 .defaults
4512 .float_list
4513 .as_ref()
4514 .unwrap()
4515 .vector_index
4516 .as_ref()
4517 .unwrap();
4518
4519 assert!(!defaults_vector_index.enabled);
4521 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4523 assert_eq!(
4524 defaults_vector_index.config.embedding_function,
4525 Some(EmbeddingFunctionConfiguration::Legacy)
4526 );
4527 assert_eq!(defaults_vector_index.config.source_key, None);
4528 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4529 assert_eq!(defaults_hnsw.ef_construction, Some(300));
4530 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4531
4532 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4534 let embedding_vector_index = embedding_override
4535 .float_list
4536 .as_ref()
4537 .unwrap()
4538 .vector_index
4539 .as_ref()
4540 .unwrap();
4541
4542 assert!(embedding_vector_index.enabled);
4544 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4546 assert_eq!(
4547 embedding_vector_index.config.embedding_function,
4548 Some(EmbeddingFunctionConfiguration::Legacy)
4549 );
4550 assert_eq!(
4551 embedding_vector_index.config.source_key,
4552 Some(DOCUMENT_KEY.to_string())
4553 );
4554 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4555 assert_eq!(embedding_hnsw.ef_construction, Some(300));
4556 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4557 }
4558
4559 #[test]
4560 fn test_is_schema_default() {
4561 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4563 assert!(default_hnsw_schema.is_default());
4564
4565 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4566 assert!(default_spann_schema.is_default());
4567
4568 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4570 if let Some(ref mut string_type) = modified_schema.defaults.string {
4572 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4573 string_inverted.enabled = false; }
4575 }
4576 assert!(!modified_schema.is_default());
4577
4578 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4580 schema_with_extra_overrides
4581 .keys
4582 .insert("custom_key".to_string(), ValueTypes::default());
4583 assert!(!schema_with_extra_overrides.is_default());
4584 }
4585
4586 #[test]
4587 fn test_is_schema_default_with_space() {
4588 let schema = Schema::new_default(KnnIndex::Hnsw);
4589 assert!(schema.is_default());
4590
4591 let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4592 if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4593 if let Some(ref mut vector_index) = float_list.vector_index {
4594 vector_index.config.space = Some(Space::Cosine);
4595 }
4596 }
4597 assert!(!schema_with_space.is_default());
4598
4599 let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4600 if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4601 .keys
4602 .get_mut(EMBEDDING_KEY)
4603 {
4604 if let Some(ref mut float_list) = embedding_key.float_list {
4605 if let Some(ref mut vector_index) = float_list.vector_index {
4606 vector_index.config.space = Some(Space::Cosine);
4607 }
4608 }
4609 }
4610 assert!(!schema_with_space_in_embedding_key.is_default());
4611 }
4612
4613 #[test]
4614 fn test_is_schema_default_with_embedding_function() {
4615 let schema = Schema::new_default(KnnIndex::Hnsw);
4616 assert!(schema.is_default());
4617
4618 let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4619 if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4620 if let Some(ref mut vector_index) = float_list.vector_index {
4621 vector_index.config.embedding_function =
4622 Some(EmbeddingFunctionConfiguration::Legacy);
4623 }
4624 }
4625 assert!(!schema_with_embedding_function.is_default());
4626
4627 let mut schema_with_embedding_function_in_embedding_key =
4628 Schema::new_default(KnnIndex::Spann);
4629 if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4630 .keys
4631 .get_mut(EMBEDDING_KEY)
4632 {
4633 if let Some(ref mut float_list) = embedding_key.float_list {
4634 if let Some(ref mut vector_index) = float_list.vector_index {
4635 vector_index.config.embedding_function =
4636 Some(EmbeddingFunctionConfiguration::Legacy);
4637 }
4638 }
4639 }
4640 assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4641 }
4642
4643 #[test]
4644 fn test_add_merges_keys_by_value_type() {
4645 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4646 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4647
4648 let string_override = ValueTypes {
4649 string: Some(StringValueType {
4650 string_inverted_index: Some(StringInvertedIndexType {
4651 enabled: true,
4652 config: StringInvertedIndexConfig {},
4653 }),
4654 fts_index: None,
4655 }),
4656 ..Default::default()
4657 };
4658 schema_a
4659 .keys
4660 .insert("custom_field".to_string(), string_override);
4661
4662 let float_override = ValueTypes {
4663 float: Some(FloatValueType {
4664 float_inverted_index: Some(FloatInvertedIndexType {
4665 enabled: true,
4666 config: FloatInvertedIndexConfig {},
4667 }),
4668 }),
4669 ..Default::default()
4670 };
4671 schema_b
4672 .keys
4673 .insert("custom_field".to_string(), float_override);
4674
4675 let merged = schema_a.merge(&schema_b).unwrap();
4676 let merged_override = merged.keys.get("custom_field").unwrap();
4677
4678 assert!(merged_override.string.is_some());
4679 assert!(merged_override.float.is_some());
4680 assert!(
4681 merged_override
4682 .string
4683 .as_ref()
4684 .unwrap()
4685 .string_inverted_index
4686 .as_ref()
4687 .unwrap()
4688 .enabled
4689 );
4690 assert!(
4691 merged_override
4692 .float
4693 .as_ref()
4694 .unwrap()
4695 .float_inverted_index
4696 .as_ref()
4697 .unwrap()
4698 .enabled
4699 );
4700 }
4701
4702 #[test]
4703 fn test_add_rejects_different_defaults() {
4704 let schema_a = Schema::new_default(KnnIndex::Hnsw);
4705 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4706
4707 if let Some(string_type) = schema_b.defaults.string.as_mut() {
4708 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
4709 string_index.enabled = false;
4710 }
4711 }
4712
4713 let err = schema_a.merge(&schema_b).unwrap_err();
4714 assert!(matches!(err, SchemaError::DefaultsMismatch));
4715 }
4716
4717 #[test]
4718 fn test_add_detects_conflicting_value_type_configuration() {
4719 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4720 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4721
4722 let string_override_enabled = ValueTypes {
4723 string: Some(StringValueType {
4724 string_inverted_index: Some(StringInvertedIndexType {
4725 enabled: true,
4726 config: StringInvertedIndexConfig {},
4727 }),
4728 fts_index: None,
4729 }),
4730 ..Default::default()
4731 };
4732 schema_a
4733 .keys
4734 .insert("custom_field".to_string(), string_override_enabled);
4735
4736 let string_override_disabled = ValueTypes {
4737 string: Some(StringValueType {
4738 string_inverted_index: Some(StringInvertedIndexType {
4739 enabled: false,
4740 config: StringInvertedIndexConfig {},
4741 }),
4742 fts_index: None,
4743 }),
4744 ..Default::default()
4745 };
4746 schema_b
4747 .keys
4748 .insert("custom_field".to_string(), string_override_disabled);
4749
4750 let err = schema_a.merge(&schema_b).unwrap_err();
4751 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
4752 }
4753
4754 #[test]
4756 fn test_backward_compatibility_aliases() {
4757 let old_format_json = r###"{
4759 "defaults": {
4760 "#string": {
4761 "$fts_index": {
4762 "enabled": true,
4763 "config": {}
4764 }
4765 },
4766 "#int": {
4767 "$int_inverted_index": {
4768 "enabled": true,
4769 "config": {}
4770 }
4771 },
4772 "#float_list": {
4773 "$vector_index": {
4774 "enabled": true,
4775 "config": {
4776 "spann": {
4777 "search_nprobe": 10
4778 }
4779 }
4780 }
4781 }
4782 },
4783 "key_overrides": {
4784 "#document": {
4785 "#string": {
4786 "$fts_index": {
4787 "enabled": false,
4788 "config": {}
4789 }
4790 }
4791 }
4792 }
4793 }"###;
4794
4795 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
4796
4797 let new_format_json = r###"{
4799 "defaults": {
4800 "string": {
4801 "fts_index": {
4802 "enabled": true,
4803 "config": {}
4804 }
4805 },
4806 "int": {
4807 "int_inverted_index": {
4808 "enabled": true,
4809 "config": {}
4810 }
4811 },
4812 "float_list": {
4813 "vector_index": {
4814 "enabled": true,
4815 "config": {
4816 "spann": {
4817 "search_nprobe": 10
4818 }
4819 }
4820 }
4821 }
4822 },
4823 "keys": {
4824 "#document": {
4825 "string": {
4826 "fts_index": {
4827 "enabled": false,
4828 "config": {}
4829 }
4830 }
4831 }
4832 }
4833 }"###;
4834
4835 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4836
4837 assert_eq!(schema_from_old, schema_from_new);
4839
4840 assert!(schema_from_old.defaults.string.is_some());
4842 assert!(schema_from_old
4843 .defaults
4844 .string
4845 .as_ref()
4846 .unwrap()
4847 .fts_index
4848 .is_some());
4849 assert!(
4850 schema_from_old
4851 .defaults
4852 .string
4853 .as_ref()
4854 .unwrap()
4855 .fts_index
4856 .as_ref()
4857 .unwrap()
4858 .enabled
4859 );
4860
4861 assert!(schema_from_old.defaults.int.is_some());
4862 assert!(schema_from_old
4863 .defaults
4864 .int
4865 .as_ref()
4866 .unwrap()
4867 .int_inverted_index
4868 .is_some());
4869
4870 assert!(schema_from_old.defaults.float_list.is_some());
4871 assert!(schema_from_old
4872 .defaults
4873 .float_list
4874 .as_ref()
4875 .unwrap()
4876 .vector_index
4877 .is_some());
4878
4879 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4880 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4881 assert!(doc_override.string.is_some());
4882 assert!(
4883 !doc_override
4884 .string
4885 .as_ref()
4886 .unwrap()
4887 .fts_index
4888 .as_ref()
4889 .unwrap()
4890 .enabled
4891 );
4892
4893 let serialized = serde_json::to_string(&schema_from_old).unwrap();
4895
4896 assert!(serialized.contains(r#""keys":"#));
4898 assert!(serialized.contains(r#""string":"#));
4899 assert!(serialized.contains(r#""fts_index":"#));
4900 assert!(serialized.contains(r#""int_inverted_index":"#));
4901 assert!(serialized.contains(r#""vector_index":"#));
4902
4903 assert!(!serialized.contains(r#""key_overrides":"#));
4905 assert!(!serialized.contains(r###""#string":"###));
4906 assert!(!serialized.contains(r###""$fts_index":"###));
4907 assert!(!serialized.contains(r###""$int_inverted_index":"###));
4908 assert!(!serialized.contains(r###""$vector_index":"###));
4909 }
4910
4911 #[test]
4912 fn test_hnsw_index_config_validation() {
4913 use validator::Validate;
4914
4915 let valid_config = HnswIndexConfig {
4917 batch_size: Some(10),
4918 sync_threshold: Some(100),
4919 ef_construction: Some(100),
4920 max_neighbors: Some(16),
4921 ..Default::default()
4922 };
4923 assert!(valid_config.validate().is_ok());
4924
4925 let invalid_batch_size = HnswIndexConfig {
4927 batch_size: Some(1),
4928 ..Default::default()
4929 };
4930 assert!(invalid_batch_size.validate().is_err());
4931
4932 let invalid_sync_threshold = HnswIndexConfig {
4934 sync_threshold: Some(1),
4935 ..Default::default()
4936 };
4937 assert!(invalid_sync_threshold.validate().is_err());
4938
4939 let boundary_config = HnswIndexConfig {
4941 batch_size: Some(2),
4942 sync_threshold: Some(2),
4943 ..Default::default()
4944 };
4945 assert!(boundary_config.validate().is_ok());
4946
4947 let all_none_config = HnswIndexConfig {
4949 ..Default::default()
4950 };
4951 assert!(all_none_config.validate().is_ok());
4952
4953 let other_fields_config = HnswIndexConfig {
4955 ef_construction: Some(1),
4956 max_neighbors: Some(1),
4957 ef_search: Some(1),
4958 num_threads: Some(1),
4959 resize_factor: Some(0.1),
4960 ..Default::default()
4961 };
4962 assert!(other_fields_config.validate().is_ok());
4963 }
4964
4965 #[test]
4966 fn test_spann_index_config_validation() {
4967 use validator::Validate;
4968
4969 let valid_config = SpannIndexConfig {
4971 write_nprobe: Some(32),
4972 nreplica_count: Some(4),
4973 split_threshold: Some(100),
4974 merge_threshold: Some(50),
4975 reassign_neighbor_count: Some(32),
4976 num_centers_to_merge_to: Some(4),
4977 ef_construction: Some(100),
4978 ef_search: Some(100),
4979 max_neighbors: Some(32),
4980 search_rng_factor: Some(1.0),
4981 write_rng_factor: Some(1.0),
4982 search_rng_epsilon: Some(7.5),
4983 write_rng_epsilon: Some(7.5),
4984 ..Default::default()
4985 };
4986 assert!(valid_config.validate().is_ok());
4987
4988 let invalid_write_nprobe = SpannIndexConfig {
4990 write_nprobe: Some(200),
4991 ..Default::default()
4992 };
4993 assert!(invalid_write_nprobe.validate().is_err());
4994
4995 let invalid_split_threshold = SpannIndexConfig {
4997 split_threshold: Some(10),
4998 ..Default::default()
4999 };
5000 assert!(invalid_split_threshold.validate().is_err());
5001
5002 let invalid_split_threshold_high = SpannIndexConfig {
5004 split_threshold: Some(250),
5005 ..Default::default()
5006 };
5007 assert!(invalid_split_threshold_high.validate().is_err());
5008
5009 let invalid_nreplica = SpannIndexConfig {
5011 nreplica_count: Some(10),
5012 ..Default::default()
5013 };
5014 assert!(invalid_nreplica.validate().is_err());
5015
5016 let invalid_reassign = SpannIndexConfig {
5018 reassign_neighbor_count: Some(100),
5019 ..Default::default()
5020 };
5021 assert!(invalid_reassign.validate().is_err());
5022
5023 let invalid_merge_threshold_low = SpannIndexConfig {
5025 merge_threshold: Some(5),
5026 ..Default::default()
5027 };
5028 assert!(invalid_merge_threshold_low.validate().is_err());
5029
5030 let invalid_merge_threshold_high = SpannIndexConfig {
5031 merge_threshold: Some(150),
5032 ..Default::default()
5033 };
5034 assert!(invalid_merge_threshold_high.validate().is_err());
5035
5036 let invalid_num_centers = SpannIndexConfig {
5038 num_centers_to_merge_to: Some(10),
5039 ..Default::default()
5040 };
5041 assert!(invalid_num_centers.validate().is_err());
5042
5043 let invalid_ef_construction = SpannIndexConfig {
5045 ef_construction: Some(300),
5046 ..Default::default()
5047 };
5048 assert!(invalid_ef_construction.validate().is_err());
5049
5050 let invalid_ef_search = SpannIndexConfig {
5052 ef_search: Some(300),
5053 ..Default::default()
5054 };
5055 assert!(invalid_ef_search.validate().is_err());
5056
5057 let invalid_max_neighbors = SpannIndexConfig {
5059 max_neighbors: Some(100),
5060 ..Default::default()
5061 };
5062 assert!(invalid_max_neighbors.validate().is_err());
5063
5064 let invalid_search_nprobe = SpannIndexConfig {
5066 search_nprobe: Some(200),
5067 ..Default::default()
5068 };
5069 assert!(invalid_search_nprobe.validate().is_err());
5070
5071 let invalid_search_rng_factor_low = SpannIndexConfig {
5073 search_rng_factor: Some(0.9),
5074 ..Default::default()
5075 };
5076 assert!(invalid_search_rng_factor_low.validate().is_err());
5077
5078 let invalid_search_rng_factor_high = SpannIndexConfig {
5079 search_rng_factor: Some(1.1),
5080 ..Default::default()
5081 };
5082 assert!(invalid_search_rng_factor_high.validate().is_err());
5083
5084 let valid_search_rng_factor = SpannIndexConfig {
5086 search_rng_factor: Some(1.0),
5087 ..Default::default()
5088 };
5089 assert!(valid_search_rng_factor.validate().is_ok());
5090
5091 let invalid_search_rng_epsilon_low = SpannIndexConfig {
5093 search_rng_epsilon: Some(4.0),
5094 ..Default::default()
5095 };
5096 assert!(invalid_search_rng_epsilon_low.validate().is_err());
5097
5098 let invalid_search_rng_epsilon_high = SpannIndexConfig {
5099 search_rng_epsilon: Some(11.0),
5100 ..Default::default()
5101 };
5102 assert!(invalid_search_rng_epsilon_high.validate().is_err());
5103
5104 let valid_search_rng_epsilon = SpannIndexConfig {
5106 search_rng_epsilon: Some(7.5),
5107 ..Default::default()
5108 };
5109 assert!(valid_search_rng_epsilon.validate().is_ok());
5110
5111 let invalid_write_rng_factor_low = SpannIndexConfig {
5113 write_rng_factor: Some(0.9),
5114 ..Default::default()
5115 };
5116 assert!(invalid_write_rng_factor_low.validate().is_err());
5117
5118 let invalid_write_rng_factor_high = SpannIndexConfig {
5119 write_rng_factor: Some(1.1),
5120 ..Default::default()
5121 };
5122 assert!(invalid_write_rng_factor_high.validate().is_err());
5123
5124 let valid_write_rng_factor = SpannIndexConfig {
5126 write_rng_factor: Some(1.0),
5127 ..Default::default()
5128 };
5129 assert!(valid_write_rng_factor.validate().is_ok());
5130
5131 let invalid_write_rng_epsilon_low = SpannIndexConfig {
5133 write_rng_epsilon: Some(4.0),
5134 ..Default::default()
5135 };
5136 assert!(invalid_write_rng_epsilon_low.validate().is_err());
5137
5138 let invalid_write_rng_epsilon_high = SpannIndexConfig {
5139 write_rng_epsilon: Some(11.0),
5140 ..Default::default()
5141 };
5142 assert!(invalid_write_rng_epsilon_high.validate().is_err());
5143
5144 let valid_write_rng_epsilon = SpannIndexConfig {
5146 write_rng_epsilon: Some(7.5),
5147 ..Default::default()
5148 };
5149 assert!(valid_write_rng_epsilon.validate().is_ok());
5150
5151 let invalid_num_samples_kmeans = SpannIndexConfig {
5153 num_samples_kmeans: Some(1500),
5154 ..Default::default()
5155 };
5156 assert!(invalid_num_samples_kmeans.validate().is_err());
5157
5158 let valid_num_samples_kmeans = SpannIndexConfig {
5160 num_samples_kmeans: Some(500),
5161 ..Default::default()
5162 };
5163 assert!(valid_num_samples_kmeans.validate().is_ok());
5164
5165 let invalid_initial_lambda_high = SpannIndexConfig {
5167 initial_lambda: Some(150.0),
5168 ..Default::default()
5169 };
5170 assert!(invalid_initial_lambda_high.validate().is_err());
5171
5172 let invalid_initial_lambda_low = SpannIndexConfig {
5173 initial_lambda: Some(50.0),
5174 ..Default::default()
5175 };
5176 assert!(invalid_initial_lambda_low.validate().is_err());
5177
5178 let valid_initial_lambda = SpannIndexConfig {
5180 initial_lambda: Some(100.0),
5181 ..Default::default()
5182 };
5183 assert!(valid_initial_lambda.validate().is_ok());
5184
5185 let all_none_config = SpannIndexConfig {
5187 ..Default::default()
5188 };
5189 assert!(all_none_config.validate().is_ok());
5190 }
5191
5192 #[test]
5193 fn test_builder_pattern_crud_workflow() {
5194 let schema = Schema::new_default(KnnIndex::Hnsw)
5198 .create_index(
5199 None,
5200 IndexConfig::Vector(VectorIndexConfig {
5201 space: Some(Space::Cosine),
5202 embedding_function: None,
5203 source_key: None,
5204 hnsw: Some(HnswIndexConfig {
5205 ef_construction: Some(200),
5206 max_neighbors: Some(32),
5207 ef_search: Some(50),
5208 num_threads: None,
5209 batch_size: None,
5210 sync_threshold: None,
5211 resize_factor: None,
5212 }),
5213 spann: None,
5214 }),
5215 )
5216 .expect("vector config should succeed")
5217 .create_index(
5218 Some("category"),
5219 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5220 )
5221 .expect("string inverted on key should succeed")
5222 .create_index(
5223 Some("year"),
5224 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5225 )
5226 .expect("int inverted on key should succeed")
5227 .create_index(
5228 Some("rating"),
5229 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5230 )
5231 .expect("float inverted on key should succeed")
5232 .create_index(
5233 Some("is_active"),
5234 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5235 )
5236 .expect("bool inverted on key should succeed");
5237
5238 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5241 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5242 assert!(embedding.float_list.is_some());
5243 let vector_index = embedding
5244 .float_list
5245 .as_ref()
5246 .unwrap()
5247 .vector_index
5248 .as_ref()
5249 .unwrap();
5250 assert!(vector_index.enabled);
5251 assert_eq!(vector_index.config.space, Some(Space::Cosine));
5252 assert_eq!(
5253 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5254 Some(200)
5255 );
5256
5257 assert!(schema.keys.contains_key("category"));
5259 assert!(schema.keys.contains_key("year"));
5260 assert!(schema.keys.contains_key("rating"));
5261 assert!(schema.keys.contains_key("is_active"));
5262
5263 let category = schema.keys.get("category").unwrap();
5265 assert!(category.string.is_some());
5266 let string_idx = category
5267 .string
5268 .as_ref()
5269 .unwrap()
5270 .string_inverted_index
5271 .as_ref()
5272 .unwrap();
5273 assert!(string_idx.enabled);
5274
5275 let year = schema.keys.get("year").unwrap();
5277 assert!(year.int.is_some());
5278 let int_idx = year
5279 .int
5280 .as_ref()
5281 .unwrap()
5282 .int_inverted_index
5283 .as_ref()
5284 .unwrap();
5285 assert!(int_idx.enabled);
5286
5287 let schema = schema
5289 .delete_index(
5290 Some("category"),
5291 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5292 )
5293 .expect("delete string inverted should succeed")
5294 .delete_index(
5295 Some("year"),
5296 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5297 )
5298 .expect("delete int inverted should succeed");
5299
5300 let category = schema.keys.get("category").unwrap();
5302 let string_idx = category
5303 .string
5304 .as_ref()
5305 .unwrap()
5306 .string_inverted_index
5307 .as_ref()
5308 .unwrap();
5309 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
5312 let int_idx = year
5313 .int
5314 .as_ref()
5315 .unwrap()
5316 .int_inverted_index
5317 .as_ref()
5318 .unwrap();
5319 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
5323 let float_idx = rating
5324 .float
5325 .as_ref()
5326 .unwrap()
5327 .float_inverted_index
5328 .as_ref()
5329 .unwrap();
5330 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
5333 let bool_idx = is_active
5334 .boolean
5335 .as_ref()
5336 .unwrap()
5337 .bool_inverted_index
5338 .as_ref()
5339 .unwrap();
5340 assert!(bool_idx.enabled); }
5342
5343 #[test]
5344 fn test_builder_create_index_validation_errors() {
5345 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5352 Some("my_vectors"),
5353 IndexConfig::Vector(VectorIndexConfig {
5354 space: Some(Space::L2),
5355 embedding_function: None,
5356 source_key: None,
5357 hnsw: None,
5358 spann: None,
5359 }),
5360 );
5361 assert!(result.is_err());
5362 assert!(matches!(
5363 result.unwrap_err(),
5364 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5365 ));
5366
5367 let result = Schema::new_default(KnnIndex::Hnsw)
5369 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5370 assert!(result.is_err());
5371 assert!(matches!(
5372 result.unwrap_err(),
5373 SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5374 ));
5375
5376 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5378 Some(DOCUMENT_KEY),
5379 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5380 );
5381 assert!(result.is_err());
5382 assert!(matches!(
5383 result.unwrap_err(),
5384 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5385 ));
5386
5387 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5389 Some(EMBEDDING_KEY),
5390 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5391 );
5392 assert!(result.is_err());
5393 assert!(matches!(
5394 result.unwrap_err(),
5395 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5396 ));
5397
5398 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5400 None,
5401 IndexConfig::SparseVector(SparseVectorIndexConfig {
5402 embedding_function: None,
5403 source_key: None,
5404 bm25: None,
5405 }),
5406 );
5407 assert!(result.is_err());
5408 assert!(matches!(
5409 result.unwrap_err(),
5410 SchemaBuilderError::SparseVectorRequiresKey
5411 ));
5412
5413 let result = Schema::new_default(KnnIndex::Hnsw)
5415 .create_index(
5416 Some("sparse1"),
5417 IndexConfig::SparseVector(SparseVectorIndexConfig {
5418 embedding_function: None,
5419 source_key: None,
5420 bm25: None,
5421 }),
5422 )
5423 .expect("first sparse should succeed")
5424 .create_index(
5425 Some("sparse2"),
5426 IndexConfig::SparseVector(SparseVectorIndexConfig {
5427 embedding_function: None,
5428 source_key: None,
5429 bm25: None,
5430 }),
5431 );
5432 assert!(result.is_err());
5433 assert!(matches!(
5434 result.unwrap_err(),
5435 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5436 ));
5437 }
5438
5439 #[test]
5440 fn test_builder_delete_index_validation_errors() {
5441 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5447 Some(EMBEDDING_KEY),
5448 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5449 );
5450 assert!(result.is_err());
5451 assert!(matches!(
5452 result.unwrap_err(),
5453 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5454 ));
5455
5456 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5458 Some(DOCUMENT_KEY),
5459 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5460 );
5461 assert!(result.is_err());
5462 assert!(matches!(
5463 result.unwrap_err(),
5464 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5465 ));
5466
5467 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5469 None,
5470 IndexConfig::Vector(VectorIndexConfig {
5471 space: None,
5472 embedding_function: None,
5473 source_key: None,
5474 hnsw: None,
5475 spann: None,
5476 }),
5477 );
5478 assert!(result.is_err());
5479 assert!(matches!(
5480 result.unwrap_err(),
5481 SchemaBuilderError::VectorIndexDeletionNotSupported
5482 ));
5483
5484 let result = Schema::new_default(KnnIndex::Hnsw)
5486 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5487 assert!(result.is_err());
5488 assert!(matches!(
5489 result.unwrap_err(),
5490 SchemaBuilderError::FtsIndexDeletionNotSupported
5491 ));
5492
5493 let result = Schema::new_default(KnnIndex::Hnsw)
5495 .create_index(
5496 Some("sparse"),
5497 IndexConfig::SparseVector(SparseVectorIndexConfig {
5498 embedding_function: None,
5499 source_key: None,
5500 bm25: None,
5501 }),
5502 )
5503 .expect("create should succeed")
5504 .delete_index(
5505 Some("sparse"),
5506 IndexConfig::SparseVector(SparseVectorIndexConfig {
5507 embedding_function: None,
5508 source_key: None,
5509 bm25: None,
5510 }),
5511 );
5512 assert!(result.is_err());
5513 assert!(matches!(
5514 result.unwrap_err(),
5515 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5516 ));
5517 }
5518
5519 #[test]
5520 fn test_builder_pattern_chaining() {
5521 let schema = Schema::new_default(KnnIndex::Hnsw)
5523 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5524 .unwrap()
5525 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5526 .unwrap()
5527 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5528 .unwrap()
5529 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5530 .unwrap()
5531 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5532 .unwrap()
5533 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5534 .unwrap();
5535
5536 assert!(
5538 schema
5539 .keys
5540 .get("tag1")
5541 .unwrap()
5542 .string
5543 .as_ref()
5544 .unwrap()
5545 .string_inverted_index
5546 .as_ref()
5547 .unwrap()
5548 .enabled
5549 );
5550
5551 assert!(
5553 !schema
5554 .keys
5555 .get("tag2")
5556 .unwrap()
5557 .string
5558 .as_ref()
5559 .unwrap()
5560 .string_inverted_index
5561 .as_ref()
5562 .unwrap()
5563 .enabled
5564 );
5565
5566 assert!(
5568 schema
5569 .keys
5570 .get("tag3")
5571 .unwrap()
5572 .string
5573 .as_ref()
5574 .unwrap()
5575 .string_inverted_index
5576 .as_ref()
5577 .unwrap()
5578 .enabled
5579 );
5580
5581 assert!(
5583 schema
5584 .keys
5585 .get("count")
5586 .unwrap()
5587 .int
5588 .as_ref()
5589 .unwrap()
5590 .int_inverted_index
5591 .as_ref()
5592 .unwrap()
5593 .enabled
5594 );
5595
5596 assert!(
5598 schema
5599 .keys
5600 .get("score")
5601 .unwrap()
5602 .float
5603 .as_ref()
5604 .unwrap()
5605 .float_inverted_index
5606 .as_ref()
5607 .unwrap()
5608 .enabled
5609 );
5610 }
5611
5612 #[test]
5613 fn test_schema_default_matches_python() {
5614 let schema = Schema::default();
5616
5617 assert!(schema.defaults.string.is_some());
5623 let string = schema.defaults.string.as_ref().unwrap();
5624 assert!(!string.fts_index.as_ref().unwrap().enabled);
5625 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5626
5627 assert!(schema.defaults.float_list.is_some());
5629 let float_list = schema.defaults.float_list.as_ref().unwrap();
5630 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5631 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5632 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
5636
5637 assert!(schema.defaults.sparse_vector.is_some());
5639 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5640 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5641
5642 assert!(schema.defaults.int.is_some());
5644 assert!(
5645 schema
5646 .defaults
5647 .int
5648 .as_ref()
5649 .unwrap()
5650 .int_inverted_index
5651 .as_ref()
5652 .unwrap()
5653 .enabled
5654 );
5655
5656 assert!(schema.defaults.float.is_some());
5658 assert!(
5659 schema
5660 .defaults
5661 .float
5662 .as_ref()
5663 .unwrap()
5664 .float_inverted_index
5665 .as_ref()
5666 .unwrap()
5667 .enabled
5668 );
5669
5670 assert!(schema.defaults.boolean.is_some());
5672 assert!(
5673 schema
5674 .defaults
5675 .boolean
5676 .as_ref()
5677 .unwrap()
5678 .bool_inverted_index
5679 .as_ref()
5680 .unwrap()
5681 .enabled
5682 );
5683
5684 assert!(schema.keys.contains_key(DOCUMENT_KEY));
5690 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
5691 assert!(doc.string.is_some());
5692 assert!(
5693 doc.string
5694 .as_ref()
5695 .unwrap()
5696 .fts_index
5697 .as_ref()
5698 .unwrap()
5699 .enabled
5700 );
5701 assert!(
5702 !doc.string
5703 .as_ref()
5704 .unwrap()
5705 .string_inverted_index
5706 .as_ref()
5707 .unwrap()
5708 .enabled
5709 );
5710
5711 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5713 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5714 assert!(embedding.float_list.is_some());
5715 let vec_idx = embedding
5716 .float_list
5717 .as_ref()
5718 .unwrap()
5719 .vector_index
5720 .as_ref()
5721 .unwrap();
5722 assert!(vec_idx.enabled);
5723 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
5724 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
5730 }
5731
5732 #[test]
5733 fn test_schema_default_works_with_builder() {
5734 let schema = Schema::default()
5736 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
5737 .expect("should succeed");
5738
5739 assert!(schema.keys.contains_key("category"));
5741 assert!(schema.keys.contains_key(DOCUMENT_KEY));
5742 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5743 assert_eq!(schema.keys.len(), 3);
5744 }
5745
5746 #[cfg(feature = "testing")]
5747 mod proptests {
5748 use super::*;
5749 use crate::strategies::{
5750 embedding_function_strategy, internal_collection_configuration_strategy,
5751 internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
5752 knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
5753 };
5754 use crate::{
5755 HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
5756 };
5757 use proptest::prelude::*;
5758 use proptest::strategy::BoxedStrategy;
5759 use proptest::string::string_regex;
5760 use serde_json::json;
5761
5762 fn default_embedding_function_strategy(
5763 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5764 proptest::option::of(prop_oneof![
5765 Just(EmbeddingFunctionConfiguration::Unknown),
5766 Just(EmbeddingFunctionConfiguration::Known(
5767 EmbeddingFunctionNewConfiguration {
5768 name: "default".to_string(),
5769 config: json!({ "alpha": 1 }),
5770 }
5771 )),
5772 ])
5773 }
5774
5775 fn sparse_embedding_function_strategy(
5776 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5777 let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
5778 EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
5779 name,
5780 config: json!({ "alpha": 1 }),
5781 })
5782 });
5783
5784 proptest::option::of(prop_oneof![
5785 Just(EmbeddingFunctionConfiguration::Unknown),
5786 known_strategy,
5787 ])
5788 }
5789
5790 fn non_default_internal_collection_configuration_strategy(
5791 ) -> impl Strategy<Value = InternalCollectionConfiguration> {
5792 internal_collection_configuration_strategy()
5793 .prop_filter("non-default configuration", |config| !config.is_default())
5794 }
5795
5796 fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
5797 (
5798 proptest::option::of(1usize..=512),
5799 proptest::option::of(1usize..=128),
5800 proptest::option::of(1usize..=512),
5801 proptest::option::of(1usize..=64),
5802 proptest::option::of(2usize..=4096),
5803 proptest::option::of(2usize..=4096),
5804 proptest::option::of(prop_oneof![
5805 Just(0.5f64),
5806 Just(1.0f64),
5807 Just(1.5f64),
5808 Just(2.0f64)
5809 ]),
5810 )
5811 .prop_map(
5812 |(
5813 ef_construction,
5814 max_neighbors,
5815 ef_search,
5816 num_threads,
5817 batch_size,
5818 sync_threshold,
5819 resize_factor,
5820 )| HnswIndexConfig {
5821 ef_construction,
5822 max_neighbors,
5823 ef_search,
5824 num_threads,
5825 batch_size,
5826 sync_threshold,
5827 resize_factor,
5828 },
5829 )
5830 }
5831
5832 fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
5833 let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
5834 (
5835 (
5836 proptest::option::of(1u32..=128), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy.clone()), proptest::option::of(1u32..=8), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy), proptest::option::of(50u32..=200), proptest::option::of(1usize..=1000), ),
5845 (
5846 proptest::option::of(Just(100.0f32)), proptest::option::of(1u32..=64), proptest::option::of(25u32..=100), proptest::option::of(1u32..=8), proptest::option::of(1u32..=64), proptest::option::of(1usize..=200), proptest::option::of(1usize..=200), proptest::option::of(1usize..=64), ),
5855 )
5856 .prop_map(
5857 |(
5858 (
5859 search_nprobe,
5860 search_rng_factor,
5861 search_rng_epsilon,
5862 nreplica_count,
5863 write_rng_factor,
5864 write_rng_epsilon,
5865 split_threshold,
5866 num_samples_kmeans,
5867 ),
5868 (
5869 initial_lambda,
5870 reassign_neighbor_count,
5871 merge_threshold,
5872 num_centers_to_merge_to,
5873 write_nprobe,
5874 ef_construction,
5875 ef_search,
5876 max_neighbors,
5877 ),
5878 )| SpannIndexConfig {
5879 search_nprobe,
5880 search_rng_factor,
5881 search_rng_epsilon,
5882 nreplica_count,
5883 write_rng_factor,
5884 write_rng_epsilon,
5885 split_threshold,
5886 num_samples_kmeans,
5887 initial_lambda,
5888 reassign_neighbor_count,
5889 merge_threshold,
5890 num_centers_to_merge_to,
5891 write_nprobe,
5892 ef_construction,
5893 ef_search,
5894 max_neighbors,
5895 },
5896 )
5897 }
5898
5899 proptest! {
5900 #[test]
5901 fn merge_hnsw_configs_preserves_user_overrides(
5902 base in partial_hnsw_index_config_strategy(),
5903 user in partial_hnsw_index_config_strategy(),
5904 ) {
5905 let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
5906 .expect("merge should return Some when both are Some");
5907
5908 if user.ef_construction.is_some() {
5910 prop_assert_eq!(merged.ef_construction, user.ef_construction);
5911 }
5912 if user.max_neighbors.is_some() {
5913 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5914 }
5915 if user.ef_search.is_some() {
5916 prop_assert_eq!(merged.ef_search, user.ef_search);
5917 }
5918 if user.num_threads.is_some() {
5919 prop_assert_eq!(merged.num_threads, user.num_threads);
5920 }
5921 if user.batch_size.is_some() {
5922 prop_assert_eq!(merged.batch_size, user.batch_size);
5923 }
5924 if user.sync_threshold.is_some() {
5925 prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
5926 }
5927 if user.resize_factor.is_some() {
5928 prop_assert_eq!(merged.resize_factor, user.resize_factor);
5929 }
5930 }
5931
5932 #[test]
5933 fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
5934 base in partial_hnsw_index_config_strategy(),
5935 ) {
5936 let merged = Schema::merge_hnsw_configs(Some(&base), None)
5937 .expect("merge should return Some when base is Some");
5938
5939 prop_assert_eq!(merged, base);
5941 }
5942
5943 #[test]
5944 fn merge_hnsw_configs_returns_user_when_base_is_none(
5945 user in partial_hnsw_index_config_strategy(),
5946 ) {
5947 let merged = Schema::merge_hnsw_configs(None, Some(&user))
5948 .expect("merge should return Some when user is Some");
5949
5950 prop_assert_eq!(merged, user);
5952 }
5953
5954 #[test]
5955 fn merge_spann_configs_preserves_user_overrides(
5956 base in partial_spann_index_config_strategy(),
5957 user in partial_spann_index_config_strategy(),
5958 ) {
5959 let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
5960 .expect("merge should return Some when both are Some");
5961
5962 if user.search_nprobe.is_some() {
5964 prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
5965 }
5966 if user.search_rng_epsilon.is_some() {
5967 prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
5968 }
5969 if user.split_threshold.is_some() {
5970 prop_assert_eq!(merged.split_threshold, user.split_threshold);
5971 }
5972 if user.ef_construction.is_some() {
5973 prop_assert_eq!(merged.ef_construction, user.ef_construction);
5974 }
5975 if user.ef_search.is_some() {
5976 prop_assert_eq!(merged.ef_search, user.ef_search);
5977 }
5978 if user.max_neighbors.is_some() {
5979 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5980 }
5981 }
5982
5983 #[test]
5984 fn merge_spann_configs_falls_back_to_base_when_user_is_none(
5985 base in partial_spann_index_config_strategy(),
5986 ) {
5987 let merged = Schema::merge_spann_configs(Some(&base), None)
5988 .expect("merge should return Some when base is Some");
5989
5990 prop_assert_eq!(merged, base);
5992 }
5993
5994 #[test]
5995 fn merge_vector_index_config_preserves_user_overrides(
5996 base in vector_index_config_strategy(),
5997 user in vector_index_config_strategy(),
5998 knn in knn_index_strategy(),
5999 ) {
6000 let merged = Schema::merge_vector_index_config(&base, &user, knn);
6001
6002 if user.space.is_some() {
6004 prop_assert_eq!(merged.space, user.space);
6005 }
6006 if user.embedding_function.is_some() {
6007 prop_assert_eq!(merged.embedding_function, user.embedding_function);
6008 }
6009 if user.source_key.is_some() {
6010 prop_assert_eq!(merged.source_key, user.source_key);
6011 }
6012
6013 match knn {
6015 KnnIndex::Hnsw => {
6016 if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6017 let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6018 if user_hnsw.ef_construction.is_some() {
6019 prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6020 }
6021 }
6022 }
6023 KnnIndex::Spann => {
6024 if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6025 let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6026 if user_spann.search_nprobe.is_some() {
6027 prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6028 }
6029 }
6030 }
6031 }
6032 }
6033 }
6034
6035 fn expected_vector_index_config(
6036 config: &InternalCollectionConfiguration,
6037 ) -> VectorIndexConfig {
6038 match &config.vector_index {
6039 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6040 space: Some(hnsw_config.space.clone()),
6041 embedding_function: config.embedding_function.clone(),
6042 source_key: None,
6043 hnsw: Some(HnswIndexConfig {
6044 ef_construction: Some(hnsw_config.ef_construction),
6045 max_neighbors: Some(hnsw_config.max_neighbors),
6046 ef_search: Some(hnsw_config.ef_search),
6047 num_threads: Some(hnsw_config.num_threads),
6048 batch_size: Some(hnsw_config.batch_size),
6049 sync_threshold: Some(hnsw_config.sync_threshold),
6050 resize_factor: Some(hnsw_config.resize_factor),
6051 }),
6052 spann: None,
6053 },
6054 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6055 space: Some(spann_config.space.clone()),
6056 embedding_function: config.embedding_function.clone(),
6057 source_key: None,
6058 hnsw: None,
6059 spann: Some(SpannIndexConfig {
6060 search_nprobe: Some(spann_config.search_nprobe),
6061 search_rng_factor: Some(spann_config.search_rng_factor),
6062 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6063 nreplica_count: Some(spann_config.nreplica_count),
6064 write_rng_factor: Some(spann_config.write_rng_factor),
6065 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6066 split_threshold: Some(spann_config.split_threshold),
6067 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6068 initial_lambda: Some(spann_config.initial_lambda),
6069 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6070 merge_threshold: Some(spann_config.merge_threshold),
6071 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6072 write_nprobe: Some(spann_config.write_nprobe),
6073 ef_construction: Some(spann_config.ef_construction),
6074 ef_search: Some(spann_config.ef_search),
6075 max_neighbors: Some(spann_config.max_neighbors),
6076 }),
6077 },
6078 }
6079 }
6080
6081 fn non_special_key_strategy() -> BoxedStrategy<String> {
6082 string_regex(TEST_NAME_PATTERN)
6083 .unwrap()
6084 .prop_filter("exclude special keys", |key| {
6085 key != DOCUMENT_KEY && key != EMBEDDING_KEY
6086 })
6087 .boxed()
6088 }
6089
6090 fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6091 proptest::option::of(prop_oneof![
6092 Just(DOCUMENT_KEY.to_string()),
6093 string_regex(TEST_NAME_PATTERN).unwrap(),
6094 ])
6095 .boxed()
6096 }
6097
6098 fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6099 any::<bool>().prop_map(|enabled| FtsIndexType {
6100 enabled,
6101 config: FtsIndexConfig {},
6102 })
6103 }
6104
6105 fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6106 any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6107 enabled,
6108 config: StringInvertedIndexConfig {},
6109 })
6110 }
6111
6112 fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6113 proptest::option::of(
6114 (
6115 proptest::option::of(string_inverted_index_type_strategy()),
6116 proptest::option::of(fts_index_type_strategy()),
6117 )
6118 .prop_map(|(string_inverted_index, fts_index)| {
6119 StringValueType {
6120 string_inverted_index,
6121 fts_index,
6122 }
6123 }),
6124 )
6125 .boxed()
6126 }
6127
6128 fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6129 any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6130 enabled,
6131 config: FloatInvertedIndexConfig {},
6132 })
6133 }
6134
6135 fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6136 proptest::option::of(
6137 proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6138 |float_inverted_index| FloatValueType {
6139 float_inverted_index,
6140 },
6141 ),
6142 )
6143 .boxed()
6144 }
6145
6146 fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6147 any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6148 enabled,
6149 config: IntInvertedIndexConfig {},
6150 })
6151 }
6152
6153 fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6154 proptest::option::of(
6155 proptest::option::of(int_inverted_index_type_strategy())
6156 .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6157 )
6158 .boxed()
6159 }
6160
6161 fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6162 any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6163 enabled,
6164 config: BoolInvertedIndexConfig {},
6165 })
6166 }
6167
6168 fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6169 proptest::option::of(
6170 proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6171 |bool_inverted_index| BoolValueType {
6172 bool_inverted_index,
6173 },
6174 ),
6175 )
6176 .boxed()
6177 }
6178
6179 fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6180 (
6181 sparse_embedding_function_strategy(),
6182 source_key_strategy(),
6183 proptest::option::of(any::<bool>()),
6184 )
6185 .prop_map(|(embedding_function, source_key, bm25)| {
6186 SparseVectorIndexConfig {
6187 embedding_function,
6188 source_key,
6189 bm25,
6190 }
6191 })
6192 }
6193
6194 fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6195 proptest::option::of(
6196 (
6197 any::<bool>(),
6198 proptest::option::of(sparse_vector_index_config_strategy()),
6199 )
6200 .prop_map(|(enabled, config)| SparseVectorValueType {
6201 sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6202 enabled,
6203 config: cfg,
6204 }),
6205 }),
6206 )
6207 .boxed()
6208 }
6209
6210 fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6211 internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6212 ef_construction: Some(config.ef_construction),
6213 max_neighbors: Some(config.max_neighbors),
6214 ef_search: Some(config.ef_search),
6215 num_threads: Some(config.num_threads),
6216 batch_size: Some(config.batch_size),
6217 sync_threshold: Some(config.sync_threshold),
6218 resize_factor: Some(config.resize_factor),
6219 })
6220 }
6221
6222 fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6223 internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6224 search_nprobe: Some(config.search_nprobe),
6225 search_rng_factor: Some(config.search_rng_factor),
6226 search_rng_epsilon: Some(config.search_rng_epsilon),
6227 nreplica_count: Some(config.nreplica_count),
6228 write_rng_factor: Some(config.write_rng_factor),
6229 write_rng_epsilon: Some(config.write_rng_epsilon),
6230 split_threshold: Some(config.split_threshold),
6231 num_samples_kmeans: Some(config.num_samples_kmeans),
6232 initial_lambda: Some(config.initial_lambda),
6233 reassign_neighbor_count: Some(config.reassign_neighbor_count),
6234 merge_threshold: Some(config.merge_threshold),
6235 num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6236 write_nprobe: Some(config.write_nprobe),
6237 ef_construction: Some(config.ef_construction),
6238 ef_search: Some(config.ef_search),
6239 max_neighbors: Some(config.max_neighbors),
6240 })
6241 }
6242
6243 fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6244 (
6245 proptest::option::of(space_strategy()),
6246 embedding_function_strategy(),
6247 source_key_strategy(),
6248 proptest::option::of(hnsw_index_config_strategy()),
6249 proptest::option::of(spann_index_config_strategy()),
6250 )
6251 .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6252 VectorIndexConfig {
6253 space,
6254 embedding_function,
6255 source_key,
6256 hnsw,
6257 spann,
6258 }
6259 })
6260 }
6261
6262 fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6263 (any::<bool>(), vector_index_config_strategy())
6264 .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6265 }
6266
6267 fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6268 proptest::option::of(
6269 proptest::option::of(vector_index_type_strategy())
6270 .prop_map(|vector_index| FloatListValueType { vector_index }),
6271 )
6272 .boxed()
6273 }
6274
6275 fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6276 (
6277 string_value_type_strategy(),
6278 float_list_value_type_strategy(),
6279 sparse_vector_value_type_strategy(),
6280 int_value_type_strategy(),
6281 float_value_type_strategy(),
6282 bool_value_type_strategy(),
6283 )
6284 .prop_map(
6285 |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6286 string,
6287 float_list,
6288 sparse_vector,
6289 int,
6290 float,
6291 boolean,
6292 },
6293 )
6294 .boxed()
6295 }
6296
6297 fn schema_strategy() -> BoxedStrategy<Schema> {
6298 (
6299 value_types_strategy(),
6300 proptest::collection::hash_map(
6301 non_special_key_strategy(),
6302 value_types_strategy(),
6303 0..=3,
6304 ),
6305 proptest::option::of(value_types_strategy()),
6306 proptest::option::of(value_types_strategy()),
6307 )
6308 .prop_map(
6309 |(defaults, mut extra_keys, document_override, embedding_override)| {
6310 if let Some(doc) = document_override {
6311 extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6312 }
6313 if let Some(embed) = embedding_override {
6314 extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6315 }
6316 Schema {
6317 defaults,
6318 keys: extra_keys,
6319 cmek: None,
6320 source_attached_function_id: None,
6321 }
6322 },
6323 )
6324 .boxed()
6325 }
6326
6327 fn force_non_default_schema(mut schema: Schema) -> Schema {
6328 if schema.is_default() {
6329 if let Some(string_value) = schema
6330 .defaults
6331 .string
6332 .as_mut()
6333 .and_then(|string_value| string_value.string_inverted_index.as_mut())
6334 {
6335 string_value.enabled = !string_value.enabled;
6336 } else {
6337 schema.defaults.string = Some(StringValueType {
6338 string_inverted_index: Some(StringInvertedIndexType {
6339 enabled: false,
6340 config: StringInvertedIndexConfig {},
6341 }),
6342 fts_index: None,
6343 });
6344 }
6345 }
6346 schema
6347 }
6348
6349 fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6350 schema_strategy().prop_map(force_non_default_schema).boxed()
6351 }
6352
6353 fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6354 let defaults = schema
6355 .defaults
6356 .float_list
6357 .as_ref()
6358 .and_then(|fl| fl.vector_index.as_ref())
6359 .map(|vi| vi.config.clone())
6360 .expect("defaults vector index missing");
6361
6362 let embedding = schema
6363 .keys
6364 .get(EMBEDDING_KEY)
6365 .and_then(|value_types| value_types.float_list.as_ref())
6366 .and_then(|fl| fl.vector_index.as_ref())
6367 .map(|vi| vi.config.clone())
6368 .expect("#embedding vector index missing");
6369
6370 (defaults, embedding)
6371 }
6372
6373 proptest! {
6374 #[test]
6375 fn reconcile_schema_and_config_matches_convert_for_config_only(
6376 config in internal_collection_configuration_strategy(),
6377 knn in knn_index_strategy(),
6378 ) {
6379 let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6380 .expect("reconciliation should succeed");
6381
6382 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6383 let expected_config = expected_vector_index_config(&config);
6384
6385 prop_assert_eq!(defaults_vi, expected_config.clone());
6386
6387 let mut expected_embedding_config = expected_config;
6388 expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6389 prop_assert_eq!(embedding_vi, expected_embedding_config);
6390
6391 prop_assert_eq!(result.keys.len(), 2);
6392 }
6393 }
6394
6395 proptest! {
6396 #[test]
6397 fn reconcile_schema_and_config_errors_when_both_non_default(
6398 config in non_default_internal_collection_configuration_strategy(),
6399 knn in knn_index_strategy(),
6400 ) {
6401 let schema = Schema::try_from(&config)
6402 .expect("conversion should succeed");
6403 prop_assume!(!schema.is_default());
6404
6405 let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6406
6407 prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6408 }
6409 }
6410
6411 proptest! {
6412 #[test]
6413 fn reconcile_schema_and_config_matches_schema_only_path(
6414 schema in schema_strategy(),
6415 knn in knn_index_strategy(),
6416 ) {
6417 let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6418 .expect("reconciliation should succeed");
6419
6420 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6421
6422 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6424 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6425 if let Some(schema_space) = &schema_vi.config.space {
6427 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6428 }
6429 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6430 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6431 }
6432 match knn {
6434 KnnIndex::Hnsw => {
6435 if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6436 if let Some(merged_hnsw) = &defaults_vi.hnsw {
6437 if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6438 prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6439 }
6440 }
6441 }
6442 }
6443 KnnIndex::Spann => {
6444 if let Some(schema_spann) = &schema_vi.config.spann {
6445 if let Some(merged_spann) = &defaults_vi.spann {
6446 if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6447 prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6448 }
6449 }
6450 }
6451 }
6452 }
6453 }
6454 }
6455
6456 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6458 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6459 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6460 if let Some(schema_space) = &embedding_vi_type.config.space {
6461 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6462 }
6463 }
6464 }
6465 }
6466 }
6467 }
6468
6469 proptest! {
6470 #[test]
6471 fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6472 embedding_function in default_embedding_function_strategy(),
6473 knn in knn_index_strategy(),
6474 ) {
6475 let schema = Schema::new_default(knn);
6476 let mut config = match knn {
6477 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6478 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6479 };
6480 config.embedding_function = embedding_function.clone();
6481
6482 let result = Schema::reconcile_schema_and_config(
6483 Some(&schema),
6484 Some(&config),
6485 knn,
6486 )
6487 .expect("reconciliation should succeed");
6488
6489 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6490
6491 if let Some(ef) = embedding_function {
6493 prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6494 prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6495 } else {
6496 prop_assert_eq!(defaults_vi.embedding_function, None);
6498 prop_assert_eq!(embedding_vi.embedding_function, None);
6499 }
6500 }
6501 }
6502
6503 proptest! {
6504 #[test]
6505 fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6506 schema in non_default_schema_strategy(),
6507 knn in knn_index_strategy(),
6508 ) {
6509 let default_config = match knn {
6510 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6511 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6512 };
6513
6514 let result = Schema::reconcile_schema_and_config(
6515 Some(&schema),
6516 Some(&default_config),
6517 knn,
6518 )
6519 .expect("reconciliation should succeed");
6520
6521 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6522
6523 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6526 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6527 if let Some(schema_space) = &schema_vi.config.space {
6528 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6529 }
6530 if let Some(schema_ef) = &schema_vi.config.embedding_function {
6531 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6532 }
6533 }
6534 }
6535
6536 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6538 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6539 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6540 if let Some(schema_space) = &embedding_vi_type.config.space {
6541 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6542 }
6543 }
6544 }
6545 }
6546 }
6547 }
6548 }
6549}