1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8 EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14 default_batch_size, default_construction_ef, default_construction_ef_spann,
15 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21 InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25 fn code(&self) -> ErrorCodes {
26 ErrorCodes::Internal
27 }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33 MissingIndexConfiguration { key: String, value_type: String },
34 #[error("Schema reconciliation failed: {reason}")]
35 InvalidSchema { reason: String },
36}
37
38#[derive(Debug, Error)]
39pub enum FilterValidationError {
40 #[error(
41 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
42 )]
43 IndexingDisabled {
44 key: String,
45 value_type: MetadataValueType,
46 },
47 #[error(transparent)]
48 Schema(#[from] SchemaError),
49}
50
51impl ChromaError for FilterValidationError {
52 fn code(&self) -> ErrorCodes {
53 match self {
54 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
55 FilterValidationError::Schema(_) => ErrorCodes::Internal,
56 }
57 }
58}
59
60pub const STRING_VALUE_NAME: &str = "string";
67pub const INT_VALUE_NAME: &str = "int";
68pub const BOOL_VALUE_NAME: &str = "bool";
69pub const FLOAT_VALUE_NAME: &str = "float";
70pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
71pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
72
73pub const FTS_INDEX_NAME: &str = "fts_index";
75pub const VECTOR_INDEX_NAME: &str = "vector_index";
76pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
77pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
78pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
79pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
80pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
81
82pub const DOCUMENT_KEY: &str = "#document";
84pub const EMBEDDING_KEY: &str = "#embedding";
85
86#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
95#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
96pub struct Schema {
97 pub defaults: ValueTypes,
99 #[serde(rename = "keys", alias = "key_overrides")]
102 pub keys: HashMap<String, ValueTypes>,
103}
104
105pub fn is_embedding_function_default(
106 embedding_function: &Option<EmbeddingFunctionConfiguration>,
107) -> bool {
108 match embedding_function {
109 None => true,
110 Some(embedding_function) => embedding_function.is_default(),
111 }
112}
113
114pub fn is_space_default(space: &Option<Space>) -> bool {
116 match space {
117 None => true, Some(s) => *s == default_space(), }
120}
121
122pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
124 hnsw_config.ef_construction == Some(default_construction_ef())
125 && hnsw_config.ef_search == Some(default_search_ef())
126 && hnsw_config.max_neighbors == Some(default_m())
127 && hnsw_config.num_threads == Some(default_num_threads())
128 && hnsw_config.batch_size == Some(default_batch_size())
129 && hnsw_config.sync_threshold == Some(default_sync_threshold())
130 && hnsw_config.resize_factor == Some(default_resize_factor())
131}
132
133#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
140#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
141pub struct ValueTypes {
142 #[serde(
143 rename = "string",
144 alias = "#string",
145 skip_serializing_if = "Option::is_none"
146 )] pub string: Option<StringValueType>,
148
149 #[serde(
150 rename = "float_list",
151 alias = "#float_list",
152 skip_serializing_if = "Option::is_none"
153 )]
154 pub float_list: Option<FloatListValueType>,
156
157 #[serde(
158 rename = "sparse_vector",
159 alias = "#sparse_vector",
160 skip_serializing_if = "Option::is_none"
161 )]
162 pub sparse_vector: Option<SparseVectorValueType>,
164
165 #[serde(
166 rename = "int",
167 alias = "#int",
168 skip_serializing_if = "Option::is_none"
169 )] pub int: Option<IntValueType>,
171
172 #[serde(
173 rename = "float",
174 alias = "#float",
175 skip_serializing_if = "Option::is_none"
176 )] pub float: Option<FloatValueType>,
178
179 #[serde(
180 rename = "bool",
181 alias = "#bool",
182 skip_serializing_if = "Option::is_none"
183 )] pub boolean: Option<BoolValueType>,
185}
186
187#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
189#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
190pub struct StringValueType {
191 #[serde(
192 rename = "fts_index",
193 alias = "$fts_index",
194 skip_serializing_if = "Option::is_none"
195 )] pub fts_index: Option<FtsIndexType>,
197
198 #[serde(
199 rename = "string_inverted_index", alias = "$string_inverted_index",
201 skip_serializing_if = "Option::is_none"
202 )]
203 pub string_inverted_index: Option<StringInvertedIndexType>,
204}
205
206#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
208#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
209pub struct FloatListValueType {
210 #[serde(
211 rename = "vector_index",
212 alias = "$vector_index",
213 skip_serializing_if = "Option::is_none"
214 )] pub vector_index: Option<VectorIndexType>,
216}
217
218#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
220#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
221pub struct SparseVectorValueType {
222 #[serde(
223 rename = "sparse_vector_index", alias = "$sparse_vector_index",
225 skip_serializing_if = "Option::is_none"
226 )]
227 pub sparse_vector_index: Option<SparseVectorIndexType>,
228}
229
230#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
232#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
233pub struct IntValueType {
234 #[serde(
235 rename = "int_inverted_index",
236 alias = "$int_inverted_index",
237 skip_serializing_if = "Option::is_none"
238 )]
239 pub int_inverted_index: Option<IntInvertedIndexType>,
241}
242
243#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
245#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
246pub struct FloatValueType {
247 #[serde(
248 rename = "float_inverted_index", alias = "$float_inverted_index",
250 skip_serializing_if = "Option::is_none"
251 )]
252 pub float_inverted_index: Option<FloatInvertedIndexType>,
253}
254
255#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
257#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
258pub struct BoolValueType {
259 #[serde(
260 rename = "bool_inverted_index", alias = "$bool_inverted_index",
262 skip_serializing_if = "Option::is_none"
263 )]
264 pub bool_inverted_index: Option<BoolInvertedIndexType>,
265}
266
267#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
269#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
270pub struct FtsIndexType {
271 pub enabled: bool,
272 pub config: FtsIndexConfig,
273}
274
275#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
276#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
277pub struct VectorIndexType {
278 pub enabled: bool,
279 pub config: VectorIndexConfig,
280}
281
282#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
283#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
284pub struct SparseVectorIndexType {
285 pub enabled: bool,
286 pub config: SparseVectorIndexConfig,
287}
288
289#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
290#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
291pub struct StringInvertedIndexType {
292 pub enabled: bool,
293 pub config: StringInvertedIndexConfig,
294}
295
296#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
297#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
298pub struct IntInvertedIndexType {
299 pub enabled: bool,
300 pub config: IntInvertedIndexConfig,
301}
302
303#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
304#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
305pub struct FloatInvertedIndexType {
306 pub enabled: bool,
307 pub config: FloatInvertedIndexConfig,
308}
309
310#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
311#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
312pub struct BoolInvertedIndexType {
313 pub enabled: bool,
314 pub config: BoolInvertedIndexConfig,
315}
316
317impl Schema {
318 pub fn new_default(default_knn_index: KnnIndex) -> Self {
320 let vector_config = VectorIndexType {
322 enabled: false,
323 config: VectorIndexConfig {
324 space: Some(default_space()),
325 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
326 source_key: None,
327 hnsw: match default_knn_index {
328 KnnIndex::Hnsw => Some(HnswIndexConfig {
329 ef_construction: Some(default_construction_ef()),
330 max_neighbors: Some(default_m()),
331 ef_search: Some(default_search_ef()),
332 num_threads: Some(default_num_threads()),
333 batch_size: Some(default_batch_size()),
334 sync_threshold: Some(default_sync_threshold()),
335 resize_factor: Some(default_resize_factor()),
336 }),
337 KnnIndex::Spann => None,
338 },
339 spann: match default_knn_index {
340 KnnIndex::Hnsw => None,
341 KnnIndex::Spann => Some(SpannIndexConfig {
342 search_nprobe: Some(default_search_nprobe()),
343 search_rng_factor: Some(default_search_rng_factor()),
344 search_rng_epsilon: Some(default_search_rng_epsilon()),
345 nreplica_count: Some(default_nreplica_count()),
346 write_rng_factor: Some(default_write_rng_factor()),
347 write_rng_epsilon: Some(default_write_rng_epsilon()),
348 split_threshold: Some(default_split_threshold()),
349 num_samples_kmeans: Some(default_num_samples_kmeans()),
350 initial_lambda: Some(default_initial_lambda()),
351 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
352 merge_threshold: Some(default_merge_threshold()),
353 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
354 write_nprobe: Some(default_write_nprobe()),
355 ef_construction: Some(default_construction_ef_spann()),
356 ef_search: Some(default_search_ef_spann()),
357 max_neighbors: Some(default_m_spann()),
358 }),
359 },
360 },
361 };
362
363 let defaults = ValueTypes {
365 string: Some(StringValueType {
366 string_inverted_index: Some(StringInvertedIndexType {
367 enabled: true,
368 config: StringInvertedIndexConfig {},
369 }),
370 fts_index: Some(FtsIndexType {
371 enabled: false,
372 config: FtsIndexConfig {},
373 }),
374 }),
375 float: Some(FloatValueType {
376 float_inverted_index: Some(FloatInvertedIndexType {
377 enabled: true,
378 config: FloatInvertedIndexConfig {},
379 }),
380 }),
381 int: Some(IntValueType {
382 int_inverted_index: Some(IntInvertedIndexType {
383 enabled: true,
384 config: IntInvertedIndexConfig {},
385 }),
386 }),
387 boolean: Some(BoolValueType {
388 bool_inverted_index: Some(BoolInvertedIndexType {
389 enabled: true,
390 config: BoolInvertedIndexConfig {},
391 }),
392 }),
393 float_list: Some(FloatListValueType {
394 vector_index: Some(vector_config),
395 }),
396 sparse_vector: Some(SparseVectorValueType {
397 sparse_vector_index: Some(SparseVectorIndexType {
398 enabled: false,
399 config: SparseVectorIndexConfig {
400 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
401 source_key: None,
402 bm25: Some(false),
403 },
404 }),
405 }),
406 };
407
408 let mut keys = HashMap::new();
410
411 let embedding_defaults = ValueTypes {
413 float_list: Some(FloatListValueType {
414 vector_index: Some(VectorIndexType {
415 enabled: true,
416 config: VectorIndexConfig {
417 space: Some(default_space()),
418 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
419 source_key: Some(DOCUMENT_KEY.to_string()),
420 hnsw: match default_knn_index {
421 KnnIndex::Hnsw => Some(HnswIndexConfig {
422 ef_construction: Some(default_construction_ef()),
423 max_neighbors: Some(default_m()),
424 ef_search: Some(default_search_ef()),
425 num_threads: Some(default_num_threads()),
426 batch_size: Some(default_batch_size()),
427 sync_threshold: Some(default_sync_threshold()),
428 resize_factor: Some(default_resize_factor()),
429 }),
430 KnnIndex::Spann => None,
431 },
432 spann: match default_knn_index {
433 KnnIndex::Hnsw => None,
434 KnnIndex::Spann => Some(SpannIndexConfig {
435 search_nprobe: Some(default_search_nprobe()),
436 search_rng_factor: Some(default_search_rng_factor()),
437 search_rng_epsilon: Some(default_search_rng_epsilon()),
438 nreplica_count: Some(default_nreplica_count()),
439 write_rng_factor: Some(default_write_rng_factor()),
440 write_rng_epsilon: Some(default_write_rng_epsilon()),
441 split_threshold: Some(default_split_threshold()),
442 num_samples_kmeans: Some(default_num_samples_kmeans()),
443 initial_lambda: Some(default_initial_lambda()),
444 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
445 merge_threshold: Some(default_merge_threshold()),
446 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
447 write_nprobe: Some(default_write_nprobe()),
448 ef_construction: Some(default_construction_ef_spann()),
449 ef_search: Some(default_search_ef_spann()),
450 max_neighbors: Some(default_m_spann()),
451 }),
452 },
453 },
454 }),
455 }),
456 ..Default::default()
457 };
458 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
459
460 let document_defaults = ValueTypes {
462 string: Some(StringValueType {
463 fts_index: Some(FtsIndexType {
464 enabled: true,
465 config: FtsIndexConfig {},
466 }),
467 string_inverted_index: Some(StringInvertedIndexType {
468 enabled: false,
469 config: StringInvertedIndexConfig {},
470 }),
471 }),
472 ..Default::default()
473 };
474 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
475
476 Schema { defaults, keys }
477 }
478
479 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
480 let to_internal = |vector_index: &VectorIndexType| {
481 let space = vector_index.config.space.clone();
482 vector_index
483 .config
484 .spann
485 .clone()
486 .map(|config| (space.as_ref(), &config).into())
487 };
488
489 self.keys
490 .get(EMBEDDING_KEY)
491 .and_then(|value_types| value_types.float_list.as_ref())
492 .and_then(|float_list| float_list.vector_index.as_ref())
493 .and_then(to_internal)
494 .or_else(|| {
495 self.defaults
496 .float_list
497 .as_ref()
498 .and_then(|float_list| float_list.vector_index.as_ref())
499 .and_then(to_internal)
500 })
501 }
502
503 pub fn reconcile_with_defaults(user_schema: Option<Schema>) -> Result<Self, String> {
510 let default_schema = Schema::new_default(KnnIndex::Spann);
511
512 match user_schema {
513 Some(user) => {
514 let merged_defaults =
516 Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
517
518 let mut merged_keys = default_schema.keys.clone();
520 for (key, user_value_types) in user.keys {
521 if let Some(default_value_types) = merged_keys.get(&key) {
522 let merged_value_types =
524 Self::merge_value_types(default_value_types, &user_value_types)?;
525 merged_keys.insert(key, merged_value_types);
526 } else {
527 merged_keys.insert(key, user_value_types);
529 }
530 }
531
532 Ok(Schema {
533 defaults: merged_defaults,
534 keys: merged_keys,
535 })
536 }
537 None => Ok(default_schema),
538 }
539 }
540
541 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
543 if self.defaults != other.defaults {
544 return Err(SchemaError::InvalidSchema {
545 reason: "Cannot merge schemas with differing defaults".to_string(),
546 });
547 }
548
549 let mut keys = self.keys.clone();
550
551 for (key, other_value_types) in &other.keys {
552 if let Some(existing) = keys.get(key).cloned() {
553 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
554 keys.insert(key.clone(), merged);
555 } else {
556 keys.insert(key.clone(), other_value_types.clone());
557 }
558 }
559
560 Ok(Schema {
561 defaults: self.defaults.clone(),
562 keys,
563 })
564 }
565
566 fn merge_override_value_types(
567 key: &str,
568 left: &ValueTypes,
569 right: &ValueTypes,
570 ) -> Result<ValueTypes, SchemaError> {
571 Ok(ValueTypes {
572 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
573 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
574 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
575 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
576 float_list: Self::merge_float_list_override(
577 key,
578 left.float_list.as_ref(),
579 right.float_list.as_ref(),
580 )?,
581 sparse_vector: Self::merge_sparse_vector_override(
582 key,
583 left.sparse_vector.as_ref(),
584 right.sparse_vector.as_ref(),
585 )?,
586 })
587 }
588
589 fn merge_string_override(
590 key: &str,
591 left: Option<&StringValueType>,
592 right: Option<&StringValueType>,
593 ) -> Result<Option<StringValueType>, SchemaError> {
594 match (left, right) {
595 (Some(l), Some(r)) => Ok(Some(StringValueType {
596 string_inverted_index: Self::merge_index_or_error(
597 l.string_inverted_index.as_ref(),
598 r.string_inverted_index.as_ref(),
599 &format!("key '{key}' string.string_inverted_index"),
600 )?,
601 fts_index: Self::merge_index_or_error(
602 l.fts_index.as_ref(),
603 r.fts_index.as_ref(),
604 &format!("key '{key}' string.fts_index"),
605 )?,
606 })),
607 (Some(l), None) => Ok(Some(l.clone())),
608 (None, Some(r)) => Ok(Some(r.clone())),
609 (None, None) => Ok(None),
610 }
611 }
612
613 fn merge_float_override(
614 key: &str,
615 left: Option<&FloatValueType>,
616 right: Option<&FloatValueType>,
617 ) -> Result<Option<FloatValueType>, SchemaError> {
618 match (left, right) {
619 (Some(l), Some(r)) => Ok(Some(FloatValueType {
620 float_inverted_index: Self::merge_index_or_error(
621 l.float_inverted_index.as_ref(),
622 r.float_inverted_index.as_ref(),
623 &format!("key '{key}' float.float_inverted_index"),
624 )?,
625 })),
626 (Some(l), None) => Ok(Some(l.clone())),
627 (None, Some(r)) => Ok(Some(r.clone())),
628 (None, None) => Ok(None),
629 }
630 }
631
632 fn merge_int_override(
633 key: &str,
634 left: Option<&IntValueType>,
635 right: Option<&IntValueType>,
636 ) -> Result<Option<IntValueType>, SchemaError> {
637 match (left, right) {
638 (Some(l), Some(r)) => Ok(Some(IntValueType {
639 int_inverted_index: Self::merge_index_or_error(
640 l.int_inverted_index.as_ref(),
641 r.int_inverted_index.as_ref(),
642 &format!("key '{key}' int.int_inverted_index"),
643 )?,
644 })),
645 (Some(l), None) => Ok(Some(l.clone())),
646 (None, Some(r)) => Ok(Some(r.clone())),
647 (None, None) => Ok(None),
648 }
649 }
650
651 fn merge_bool_override(
652 key: &str,
653 left: Option<&BoolValueType>,
654 right: Option<&BoolValueType>,
655 ) -> Result<Option<BoolValueType>, SchemaError> {
656 match (left, right) {
657 (Some(l), Some(r)) => Ok(Some(BoolValueType {
658 bool_inverted_index: Self::merge_index_or_error(
659 l.bool_inverted_index.as_ref(),
660 r.bool_inverted_index.as_ref(),
661 &format!("key '{key}' bool.bool_inverted_index"),
662 )?,
663 })),
664 (Some(l), None) => Ok(Some(l.clone())),
665 (None, Some(r)) => Ok(Some(r.clone())),
666 (None, None) => Ok(None),
667 }
668 }
669
670 fn merge_float_list_override(
671 key: &str,
672 left: Option<&FloatListValueType>,
673 right: Option<&FloatListValueType>,
674 ) -> Result<Option<FloatListValueType>, SchemaError> {
675 match (left, right) {
676 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
677 vector_index: Self::merge_index_or_error(
678 l.vector_index.as_ref(),
679 r.vector_index.as_ref(),
680 &format!("key '{key}' float_list.vector_index"),
681 )?,
682 })),
683 (Some(l), None) => Ok(Some(l.clone())),
684 (None, Some(r)) => Ok(Some(r.clone())),
685 (None, None) => Ok(None),
686 }
687 }
688
689 fn merge_sparse_vector_override(
690 key: &str,
691 left: Option<&SparseVectorValueType>,
692 right: Option<&SparseVectorValueType>,
693 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
694 match (left, right) {
695 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
696 sparse_vector_index: Self::merge_index_or_error(
697 l.sparse_vector_index.as_ref(),
698 r.sparse_vector_index.as_ref(),
699 &format!("key '{key}' sparse_vector.sparse_vector_index"),
700 )?,
701 })),
702 (Some(l), None) => Ok(Some(l.clone())),
703 (None, Some(r)) => Ok(Some(r.clone())),
704 (None, None) => Ok(None),
705 }
706 }
707
708 fn merge_index_or_error<T: Clone + PartialEq>(
709 left: Option<&T>,
710 right: Option<&T>,
711 context: &str,
712 ) -> Result<Option<T>, SchemaError> {
713 match (left, right) {
714 (Some(l), Some(r)) => {
715 if l == r {
716 Ok(Some(l.clone()))
717 } else {
718 Err(SchemaError::InvalidSchema {
719 reason: format!("Conflicting configuration for {context}"),
720 })
721 }
722 }
723 (Some(l), None) => Ok(Some(l.clone())),
724 (None, Some(r)) => Ok(Some(r.clone())),
725 (None, None) => Ok(None),
726 }
727 }
728
729 fn merge_value_types(default: &ValueTypes, user: &ValueTypes) -> Result<ValueTypes, String> {
732 let float_list =
734 Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
735
736 if let Some(ref fl) = float_list {
738 Self::validate_float_list_value_type(fl)?;
739 }
740
741 Ok(ValueTypes {
742 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
743 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
744 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
745 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
746 float_list,
747 sparse_vector: Self::merge_sparse_vector_type(
748 default.sparse_vector.as_ref(),
749 user.sparse_vector.as_ref(),
750 )?,
751 })
752 }
753
754 fn merge_string_type(
756 default: Option<&StringValueType>,
757 user: Option<&StringValueType>,
758 ) -> Result<Option<StringValueType>, String> {
759 match (default, user) {
760 (Some(default), Some(user)) => Ok(Some(StringValueType {
761 string_inverted_index: Self::merge_string_inverted_index_type(
762 default.string_inverted_index.as_ref(),
763 user.string_inverted_index.as_ref(),
764 )?,
765 fts_index: Self::merge_fts_index_type(
766 default.fts_index.as_ref(),
767 user.fts_index.as_ref(),
768 )?,
769 })),
770 (Some(default), None) => Ok(Some(default.clone())),
771 (None, Some(user)) => Ok(Some(user.clone())),
772 (None, None) => Ok(None),
773 }
774 }
775
776 fn merge_float_type(
778 default: Option<&FloatValueType>,
779 user: Option<&FloatValueType>,
780 ) -> Result<Option<FloatValueType>, String> {
781 match (default, user) {
782 (Some(default), Some(user)) => Ok(Some(FloatValueType {
783 float_inverted_index: Self::merge_float_inverted_index_type(
784 default.float_inverted_index.as_ref(),
785 user.float_inverted_index.as_ref(),
786 )?,
787 })),
788 (Some(default), None) => Ok(Some(default.clone())),
789 (None, Some(user)) => Ok(Some(user.clone())),
790 (None, None) => Ok(None),
791 }
792 }
793
794 fn merge_int_type(
796 default: Option<&IntValueType>,
797 user: Option<&IntValueType>,
798 ) -> Result<Option<IntValueType>, String> {
799 match (default, user) {
800 (Some(default), Some(user)) => Ok(Some(IntValueType {
801 int_inverted_index: Self::merge_int_inverted_index_type(
802 default.int_inverted_index.as_ref(),
803 user.int_inverted_index.as_ref(),
804 )?,
805 })),
806 (Some(default), None) => Ok(Some(default.clone())),
807 (None, Some(user)) => Ok(Some(user.clone())),
808 (None, None) => Ok(None),
809 }
810 }
811
812 fn merge_bool_type(
814 default: Option<&BoolValueType>,
815 user: Option<&BoolValueType>,
816 ) -> Result<Option<BoolValueType>, String> {
817 match (default, user) {
818 (Some(default), Some(user)) => Ok(Some(BoolValueType {
819 bool_inverted_index: Self::merge_bool_inverted_index_type(
820 default.bool_inverted_index.as_ref(),
821 user.bool_inverted_index.as_ref(),
822 )?,
823 })),
824 (Some(default), None) => Ok(Some(default.clone())),
825 (None, Some(user)) => Ok(Some(user.clone())),
826 (None, None) => Ok(None),
827 }
828 }
829
830 fn merge_float_list_type(
832 default: Option<&FloatListValueType>,
833 user: Option<&FloatListValueType>,
834 ) -> Option<FloatListValueType> {
835 match (default, user) {
836 (Some(default), Some(user)) => Some(FloatListValueType {
837 vector_index: Self::merge_vector_index_type(
838 default.vector_index.as_ref(),
839 user.vector_index.as_ref(),
840 ),
841 }),
842 (Some(default), None) => Some(default.clone()),
843 (None, Some(user)) => Some(user.clone()),
844 (None, None) => None,
845 }
846 }
847
848 fn merge_sparse_vector_type(
850 default: Option<&SparseVectorValueType>,
851 user: Option<&SparseVectorValueType>,
852 ) -> Result<Option<SparseVectorValueType>, String> {
853 match (default, user) {
854 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
855 sparse_vector_index: Self::merge_sparse_vector_index_type(
856 default.sparse_vector_index.as_ref(),
857 user.sparse_vector_index.as_ref(),
858 )?,
859 })),
860 (Some(default), None) => Ok(Some(default.clone())),
861 (None, Some(user)) => Ok(Some(user.clone())),
862 (None, None) => Ok(None),
863 }
864 }
865
866 fn merge_string_inverted_index_type(
868 default: Option<&StringInvertedIndexType>,
869 user: Option<&StringInvertedIndexType>,
870 ) -> Result<Option<StringInvertedIndexType>, String> {
871 match (default, user) {
872 (Some(_default), Some(user)) => {
873 Ok(Some(StringInvertedIndexType {
874 enabled: user.enabled, config: user.config.clone(), }))
877 }
878 (Some(default), None) => Ok(Some(default.clone())),
879 (None, Some(user)) => Ok(Some(user.clone())),
880 (None, None) => Ok(None),
881 }
882 }
883
884 fn merge_fts_index_type(
885 default: Option<&FtsIndexType>,
886 user: Option<&FtsIndexType>,
887 ) -> Result<Option<FtsIndexType>, String> {
888 match (default, user) {
889 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
890 enabled: user.enabled,
891 config: user.config.clone(),
892 })),
893 (Some(default), None) => Ok(Some(default.clone())),
894 (None, Some(user)) => Ok(Some(user.clone())),
895 (None, None) => Ok(None),
896 }
897 }
898
899 fn merge_float_inverted_index_type(
900 default: Option<&FloatInvertedIndexType>,
901 user: Option<&FloatInvertedIndexType>,
902 ) -> Result<Option<FloatInvertedIndexType>, String> {
903 match (default, user) {
904 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
905 enabled: user.enabled,
906 config: user.config.clone(),
907 })),
908 (Some(default), None) => Ok(Some(default.clone())),
909 (None, Some(user)) => Ok(Some(user.clone())),
910 (None, None) => Ok(None),
911 }
912 }
913
914 fn merge_int_inverted_index_type(
915 default: Option<&IntInvertedIndexType>,
916 user: Option<&IntInvertedIndexType>,
917 ) -> Result<Option<IntInvertedIndexType>, String> {
918 match (default, user) {
919 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
920 enabled: user.enabled,
921 config: user.config.clone(),
922 })),
923 (Some(default), None) => Ok(Some(default.clone())),
924 (None, Some(user)) => Ok(Some(user.clone())),
925 (None, None) => Ok(None),
926 }
927 }
928
929 fn merge_bool_inverted_index_type(
930 default: Option<&BoolInvertedIndexType>,
931 user: Option<&BoolInvertedIndexType>,
932 ) -> Result<Option<BoolInvertedIndexType>, String> {
933 match (default, user) {
934 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
935 enabled: user.enabled,
936 config: user.config.clone(),
937 })),
938 (Some(default), None) => Ok(Some(default.clone())),
939 (None, Some(user)) => Ok(Some(user.clone())),
940 (None, None) => Ok(None),
941 }
942 }
943
944 fn merge_vector_index_type(
945 default: Option<&VectorIndexType>,
946 user: Option<&VectorIndexType>,
947 ) -> Option<VectorIndexType> {
948 match (default, user) {
949 (Some(default), Some(user)) => Some(VectorIndexType {
950 enabled: user.enabled,
951 config: Self::merge_vector_index_config(&default.config, &user.config),
952 }),
953 (Some(default), None) => Some(default.clone()),
954 (None, Some(user)) => Some(user.clone()),
955 (None, None) => None,
956 }
957 }
958
959 fn merge_sparse_vector_index_type(
960 default: Option<&SparseVectorIndexType>,
961 user: Option<&SparseVectorIndexType>,
962 ) -> Result<Option<SparseVectorIndexType>, String> {
963 match (default, user) {
964 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
965 enabled: user.enabled,
966 config: Self::merge_sparse_vector_index_config(&default.config, &user.config)?,
967 })),
968 (Some(default), None) => Ok(Some(default.clone())),
969 (None, Some(user)) => Ok(Some(user.clone())),
970 (None, None) => Ok(None),
971 }
972 }
973
974 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), String> {
977 if let Some(vector_index) = &float_list.vector_index {
978 if let Some(hnsw) = &vector_index.config.hnsw {
979 hnsw.validate()
980 .map_err(|e| format!("Invalid HNSW configuration: {}", e))?;
981 }
982 if let Some(spann) = &vector_index.config.spann {
983 spann
984 .validate()
985 .map_err(|e| format!("Invalid SPANN configuration: {}", e))?;
986 }
987 }
988 Ok(())
989 }
990
991 fn merge_vector_index_config(
993 default: &VectorIndexConfig,
994 user: &VectorIndexConfig,
995 ) -> VectorIndexConfig {
996 VectorIndexConfig {
997 space: user.space.clone().or(default.space.clone()),
998 embedding_function: user
999 .embedding_function
1000 .clone()
1001 .or(default.embedding_function.clone()),
1002 source_key: user.source_key.clone().or(default.source_key.clone()),
1003 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1004 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1005 }
1006 }
1007
1008 fn merge_sparse_vector_index_config(
1010 default: &SparseVectorIndexConfig,
1011 user: &SparseVectorIndexConfig,
1012 ) -> Result<SparseVectorIndexConfig, String> {
1013 Ok(SparseVectorIndexConfig {
1014 embedding_function: user
1015 .embedding_function
1016 .clone()
1017 .or(default.embedding_function.clone()),
1018 source_key: user.source_key.clone().or(default.source_key.clone()),
1019 bm25: user.bm25.or(default.bm25),
1020 })
1021 }
1022
1023 fn merge_hnsw_configs(
1025 default_hnsw: Option<&HnswIndexConfig>,
1026 user_hnsw: Option<&HnswIndexConfig>,
1027 ) -> Option<HnswIndexConfig> {
1028 match (default_hnsw, user_hnsw) {
1029 (Some(default), Some(user)) => Some(HnswIndexConfig {
1030 ef_construction: user.ef_construction.or(default.ef_construction),
1031 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1032 ef_search: user.ef_search.or(default.ef_search),
1033 num_threads: user.num_threads.or(default.num_threads),
1034 batch_size: user.batch_size.or(default.batch_size),
1035 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1036 resize_factor: user.resize_factor.or(default.resize_factor),
1037 }),
1038 (Some(default), None) => Some(default.clone()),
1039 (None, Some(user)) => Some(user.clone()),
1040 (None, None) => None,
1041 }
1042 }
1043
1044 fn merge_spann_configs(
1046 default_spann: Option<&SpannIndexConfig>,
1047 user_spann: Option<&SpannIndexConfig>,
1048 ) -> Option<SpannIndexConfig> {
1049 match (default_spann, user_spann) {
1050 (Some(default), Some(user)) => Some(SpannIndexConfig {
1051 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1052 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1053 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1054 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1055 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1056 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1057 split_threshold: user.split_threshold.or(default.split_threshold),
1058 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1059 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1060 reassign_neighbor_count: user
1061 .reassign_neighbor_count
1062 .or(default.reassign_neighbor_count),
1063 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1064 num_centers_to_merge_to: user
1065 .num_centers_to_merge_to
1066 .or(default.num_centers_to_merge_to),
1067 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1068 ef_construction: user.ef_construction.or(default.ef_construction),
1069 ef_search: user.ef_search.or(default.ef_search),
1070 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1071 }),
1072 (Some(default), None) => Some(default.clone()),
1073 (None, Some(user)) => Some(user.clone()),
1074 (None, None) => None,
1075 }
1076 }
1077
1078 pub fn reconcile_with_collection_config(
1085 schema: Schema,
1086 collection_config: InternalCollectionConfiguration,
1087 ) -> Result<Schema, String> {
1088 if collection_config.is_default() {
1090 return Ok(schema);
1092 }
1093
1094 if !Self::is_schema_default(&schema) {
1096 return Err(
1098 "Cannot set both collection config and schema at the same time".to_string(),
1099 );
1100 }
1101
1102 Self::convert_collection_config_to_schema(collection_config)
1104 }
1105
1106 pub fn reconcile_schema_and_config(
1107 schema: Option<Schema>,
1108 configuration: Option<InternalCollectionConfiguration>,
1109 ) -> Result<Schema, String> {
1110 let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1111 if let Some(config) = configuration {
1112 Self::reconcile_with_collection_config(reconciled_schema, config)
1113 } else {
1114 Ok(reconciled_schema)
1115 }
1116 }
1117
1118 pub fn default_with_embedding_function(
1119 embedding_function: EmbeddingFunctionConfiguration,
1120 ) -> Schema {
1121 let mut schema = Schema::new_default(KnnIndex::Spann);
1122 if let Some(float_list) = &mut schema.defaults.float_list {
1123 if let Some(vector_index) = &mut float_list.vector_index {
1124 vector_index.config.embedding_function = Some(embedding_function.clone());
1125 }
1126 }
1127 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1128 if let Some(float_list) = &mut embedding_types.float_list {
1129 if let Some(vector_index) = &mut float_list.vector_index {
1130 vector_index.config.embedding_function = Some(embedding_function);
1131 }
1132 }
1133 }
1134 schema
1135 }
1136
1137 fn is_schema_default(schema: &Schema) -> bool {
1139 let default_hnsw = Schema::new_default(KnnIndex::Hnsw);
1141 let default_spann = Schema::new_default(KnnIndex::Spann);
1142
1143 schema == &default_hnsw || schema == &default_spann
1144 }
1145
1146 fn convert_collection_config_to_schema(
1148 collection_config: InternalCollectionConfiguration,
1149 ) -> Result<Schema, String> {
1150 let mut schema = Schema::new_default(KnnIndex::Spann); let vector_config = match collection_config.vector_index {
1155 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1156 space: Some(hnsw_config.space),
1157 embedding_function: collection_config.embedding_function,
1158 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: Some(HnswIndexConfig {
1160 ef_construction: Some(hnsw_config.ef_construction),
1161 max_neighbors: Some(hnsw_config.max_neighbors),
1162 ef_search: Some(hnsw_config.ef_search),
1163 num_threads: Some(hnsw_config.num_threads),
1164 batch_size: Some(hnsw_config.batch_size),
1165 sync_threshold: Some(hnsw_config.sync_threshold),
1166 resize_factor: Some(hnsw_config.resize_factor),
1167 }),
1168 spann: None,
1169 },
1170 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1171 space: Some(spann_config.space),
1172 embedding_function: collection_config.embedding_function,
1173 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: None,
1175 spann: Some(SpannIndexConfig {
1176 search_nprobe: Some(spann_config.search_nprobe),
1177 search_rng_factor: Some(spann_config.search_rng_factor),
1178 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1179 nreplica_count: Some(spann_config.nreplica_count),
1180 write_rng_factor: Some(spann_config.write_rng_factor),
1181 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1182 split_threshold: Some(spann_config.split_threshold),
1183 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1184 initial_lambda: Some(spann_config.initial_lambda),
1185 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1186 merge_threshold: Some(spann_config.merge_threshold),
1187 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1188 write_nprobe: Some(spann_config.write_nprobe),
1189 ef_construction: Some(spann_config.ef_construction),
1190 ef_search: Some(spann_config.ef_search),
1191 max_neighbors: Some(spann_config.max_neighbors),
1192 }),
1193 },
1194 };
1195
1196 if let Some(float_list) = &mut schema.defaults.float_list {
1199 if let Some(vector_index) = &mut float_list.vector_index {
1200 vector_index.config = vector_config.clone();
1201 }
1202 }
1203
1204 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1207 if let Some(float_list) = &mut embedding_types.float_list {
1208 if let Some(vector_index) = &mut float_list.vector_index {
1209 vector_index.config = vector_config;
1210 }
1211 }
1212 }
1213
1214 Ok(schema)
1215 }
1216
1217 pub fn is_metadata_type_index_enabled(
1219 &self,
1220 key: &str,
1221 value_type: MetadataValueType,
1222 ) -> Result<bool, SchemaError> {
1223 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1224
1225 match value_type {
1226 MetadataValueType::Bool => match &v_type.boolean {
1227 Some(bool_type) => match &bool_type.bool_inverted_index {
1228 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1229 None => Err(SchemaError::MissingIndexConfiguration {
1230 key: key.to_string(),
1231 value_type: "bool".to_string(),
1232 }),
1233 },
1234 None => match &self.defaults.boolean {
1235 Some(bool_type) => match &bool_type.bool_inverted_index {
1236 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1237 None => Err(SchemaError::MissingIndexConfiguration {
1238 key: key.to_string(),
1239 value_type: "bool".to_string(),
1240 }),
1241 },
1242 None => Err(SchemaError::MissingIndexConfiguration {
1243 key: key.to_string(),
1244 value_type: "bool".to_string(),
1245 }),
1246 },
1247 },
1248 MetadataValueType::Int => match &v_type.int {
1249 Some(int_type) => match &int_type.int_inverted_index {
1250 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1251 None => Err(SchemaError::MissingIndexConfiguration {
1252 key: key.to_string(),
1253 value_type: "int".to_string(),
1254 }),
1255 },
1256 None => match &self.defaults.int {
1257 Some(int_type) => match &int_type.int_inverted_index {
1258 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1259 None => Err(SchemaError::MissingIndexConfiguration {
1260 key: key.to_string(),
1261 value_type: "int".to_string(),
1262 }),
1263 },
1264 None => Err(SchemaError::MissingIndexConfiguration {
1265 key: key.to_string(),
1266 value_type: "int".to_string(),
1267 }),
1268 },
1269 },
1270 MetadataValueType::Float => match &v_type.float {
1271 Some(float_type) => match &float_type.float_inverted_index {
1272 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1273 None => Err(SchemaError::MissingIndexConfiguration {
1274 key: key.to_string(),
1275 value_type: "float".to_string(),
1276 }),
1277 },
1278 None => match &self.defaults.float {
1279 Some(float_type) => match &float_type.float_inverted_index {
1280 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1281 None => Err(SchemaError::MissingIndexConfiguration {
1282 key: key.to_string(),
1283 value_type: "float".to_string(),
1284 }),
1285 },
1286 None => Err(SchemaError::MissingIndexConfiguration {
1287 key: key.to_string(),
1288 value_type: "float".to_string(),
1289 }),
1290 },
1291 },
1292 MetadataValueType::Str => match &v_type.string {
1293 Some(string_type) => match &string_type.string_inverted_index {
1294 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1295 None => Err(SchemaError::MissingIndexConfiguration {
1296 key: key.to_string(),
1297 value_type: "string".to_string(),
1298 }),
1299 },
1300 None => match &self.defaults.string {
1301 Some(string_type) => match &string_type.string_inverted_index {
1302 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1303 None => Err(SchemaError::MissingIndexConfiguration {
1304 key: key.to_string(),
1305 value_type: "string".to_string(),
1306 }),
1307 },
1308 None => Err(SchemaError::MissingIndexConfiguration {
1309 key: key.to_string(),
1310 value_type: "string".to_string(),
1311 }),
1312 },
1313 },
1314 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1315 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1316 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1317 None => Err(SchemaError::MissingIndexConfiguration {
1318 key: key.to_string(),
1319 value_type: "sparse_vector".to_string(),
1320 }),
1321 },
1322 None => match &self.defaults.sparse_vector {
1323 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1324 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1325 None => Err(SchemaError::MissingIndexConfiguration {
1326 key: key.to_string(),
1327 value_type: "sparse_vector".to_string(),
1328 }),
1329 },
1330 None => Err(SchemaError::MissingIndexConfiguration {
1331 key: key.to_string(),
1332 value_type: "sparse_vector".to_string(),
1333 }),
1334 },
1335 },
1336 }
1337 }
1338
1339 pub fn is_metadata_where_indexing_enabled(
1340 &self,
1341 where_clause: &Where,
1342 ) -> Result<(), FilterValidationError> {
1343 match where_clause {
1344 Where::Composite(composite) => {
1345 for child in &composite.children {
1346 self.is_metadata_where_indexing_enabled(child)?;
1347 }
1348 Ok(())
1349 }
1350 Where::Document(_) => Ok(()),
1351 Where::Metadata(expression) => {
1352 let value_type = match &expression.comparison {
1353 MetadataComparison::Primitive(_, value) => value.value_type(),
1354 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1355 };
1356 let is_enabled = self
1357 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1358 .map_err(FilterValidationError::Schema)?;
1359 if !is_enabled {
1360 return Err(FilterValidationError::IndexingDisabled {
1361 key: expression.key.clone(),
1362 value_type,
1363 });
1364 }
1365 Ok(())
1366 }
1367 }
1368 }
1369
1370 pub fn is_knn_key_indexing_enabled(
1371 &self,
1372 key: &str,
1373 query: &QueryVector,
1374 ) -> Result<(), FilterValidationError> {
1375 match query {
1376 QueryVector::Sparse(_) => {
1377 let is_enabled = self
1378 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1379 .map_err(FilterValidationError::Schema)?;
1380 if !is_enabled {
1381 return Err(FilterValidationError::IndexingDisabled {
1382 key: key.to_string(),
1383 value_type: MetadataValueType::SparseVector,
1384 });
1385 }
1386 Ok(())
1387 }
1388 QueryVector::Dense(_) => {
1389 Ok(())
1392 }
1393 }
1394 }
1395
1396 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1397 let value_types = self.keys.entry(key.to_string()).or_default();
1398 match value_type {
1399 MetadataValueType::Bool => {
1400 if value_types.boolean.is_none() {
1401 value_types.boolean = self.defaults.boolean.clone();
1402 return true;
1403 }
1404 }
1405 MetadataValueType::Int => {
1406 if value_types.int.is_none() {
1407 value_types.int = self.defaults.int.clone();
1408 return true;
1409 }
1410 }
1411 MetadataValueType::Float => {
1412 if value_types.float.is_none() {
1413 value_types.float = self.defaults.float.clone();
1414 return true;
1415 }
1416 }
1417 MetadataValueType::Str => {
1418 if value_types.string.is_none() {
1419 value_types.string = self.defaults.string.clone();
1420 return true;
1421 }
1422 }
1423 MetadataValueType::SparseVector => {
1424 if value_types.sparse_vector.is_none() {
1425 value_types.sparse_vector = self.defaults.sparse_vector.clone();
1426 return true;
1427 }
1428 }
1429 }
1430 false
1431 }
1432}
1433
1434#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1439#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1440#[serde(deny_unknown_fields)]
1441pub struct VectorIndexConfig {
1442 #[serde(skip_serializing_if = "Option::is_none")]
1444 pub space: Option<Space>,
1445 #[serde(skip_serializing_if = "Option::is_none")]
1447 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1448 #[serde(skip_serializing_if = "Option::is_none")]
1450 pub source_key: Option<String>,
1451 #[serde(skip_serializing_if = "Option::is_none")]
1453 pub hnsw: Option<HnswIndexConfig>,
1454 #[serde(skip_serializing_if = "Option::is_none")]
1456 pub spann: Option<SpannIndexConfig>,
1457}
1458
1459#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1461#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1462#[serde(deny_unknown_fields)]
1463pub struct HnswIndexConfig {
1464 #[serde(skip_serializing_if = "Option::is_none")]
1465 pub ef_construction: Option<usize>,
1466 #[serde(skip_serializing_if = "Option::is_none")]
1467 pub max_neighbors: Option<usize>,
1468 #[serde(skip_serializing_if = "Option::is_none")]
1469 pub ef_search: Option<usize>,
1470 #[serde(skip_serializing_if = "Option::is_none")]
1471 pub num_threads: Option<usize>,
1472 #[serde(skip_serializing_if = "Option::is_none")]
1473 #[validate(range(min = 2))]
1474 pub batch_size: Option<usize>,
1475 #[serde(skip_serializing_if = "Option::is_none")]
1476 #[validate(range(min = 2))]
1477 pub sync_threshold: Option<usize>,
1478 #[serde(skip_serializing_if = "Option::is_none")]
1479 pub resize_factor: Option<f64>,
1480}
1481
1482#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1484#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1485#[serde(deny_unknown_fields)]
1486pub struct SpannIndexConfig {
1487 #[serde(skip_serializing_if = "Option::is_none")]
1488 #[validate(range(max = 128))]
1489 pub search_nprobe: Option<u32>,
1490 #[serde(skip_serializing_if = "Option::is_none")]
1491 #[validate(range(min = 1.0, max = 1.0))]
1492 pub search_rng_factor: Option<f32>,
1493 #[serde(skip_serializing_if = "Option::is_none")]
1494 #[validate(range(min = 5.0, max = 10.0))]
1495 pub search_rng_epsilon: Option<f32>,
1496 #[serde(skip_serializing_if = "Option::is_none")]
1497 #[validate(range(max = 8))]
1498 pub nreplica_count: Option<u32>,
1499 #[serde(skip_serializing_if = "Option::is_none")]
1500 #[validate(range(min = 1.0, max = 1.0))]
1501 pub write_rng_factor: Option<f32>,
1502 #[serde(skip_serializing_if = "Option::is_none")]
1503 #[validate(range(min = 5.0, max = 10.0))]
1504 pub write_rng_epsilon: Option<f32>,
1505 #[serde(skip_serializing_if = "Option::is_none")]
1506 #[validate(range(min = 50, max = 200))]
1507 pub split_threshold: Option<u32>,
1508 #[serde(skip_serializing_if = "Option::is_none")]
1509 #[validate(range(max = 1000))]
1510 pub num_samples_kmeans: Option<usize>,
1511 #[serde(skip_serializing_if = "Option::is_none")]
1512 #[validate(range(min = 100.0, max = 100.0))]
1513 pub initial_lambda: Option<f32>,
1514 #[serde(skip_serializing_if = "Option::is_none")]
1515 #[validate(range(max = 64))]
1516 pub reassign_neighbor_count: Option<u32>,
1517 #[serde(skip_serializing_if = "Option::is_none")]
1518 #[validate(range(min = 25, max = 100))]
1519 pub merge_threshold: Option<u32>,
1520 #[serde(skip_serializing_if = "Option::is_none")]
1521 #[validate(range(max = 8))]
1522 pub num_centers_to_merge_to: Option<u32>,
1523 #[serde(skip_serializing_if = "Option::is_none")]
1524 #[validate(range(max = 64))]
1525 pub write_nprobe: Option<u32>,
1526 #[serde(skip_serializing_if = "Option::is_none")]
1527 #[validate(range(max = 200))]
1528 pub ef_construction: Option<usize>,
1529 #[serde(skip_serializing_if = "Option::is_none")]
1530 #[validate(range(max = 200))]
1531 pub ef_search: Option<usize>,
1532 #[serde(skip_serializing_if = "Option::is_none")]
1533 #[validate(range(max = 64))]
1534 pub max_neighbors: Option<usize>,
1535}
1536
1537#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1538#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1539#[serde(deny_unknown_fields)]
1540pub struct SparseVectorIndexConfig {
1541 #[serde(skip_serializing_if = "Option::is_none")]
1543 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1544 #[serde(skip_serializing_if = "Option::is_none")]
1546 pub source_key: Option<String>,
1547 #[serde(skip_serializing_if = "Option::is_none")]
1549 pub bm25: Option<bool>,
1550}
1551
1552#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1553#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1554#[serde(deny_unknown_fields)]
1555pub struct FtsIndexConfig {
1556 }
1558
1559#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1560#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1561#[serde(deny_unknown_fields)]
1562pub struct StringInvertedIndexConfig {
1563 }
1565
1566#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1567#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1568#[serde(deny_unknown_fields)]
1569pub struct IntInvertedIndexConfig {
1570 }
1572
1573#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1574#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1575#[serde(deny_unknown_fields)]
1576pub struct FloatInvertedIndexConfig {
1577 }
1579
1580#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1581#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1582#[serde(deny_unknown_fields)]
1583pub struct BoolInvertedIndexConfig {
1584 }
1586
1587#[cfg(test)]
1588mod tests {
1589 use super::*;
1590 use crate::hnsw_configuration::Space;
1591 use crate::metadata::SparseVector;
1592 use crate::{
1593 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
1594 };
1595 use serde_json::json;
1596
1597 #[test]
1598 fn test_reconcile_with_defaults_none_user_schema() {
1599 let result = Schema::reconcile_with_defaults(None).unwrap();
1601 let expected = Schema::new_default(KnnIndex::Spann);
1602 assert_eq!(result, expected);
1603 }
1604
1605 #[test]
1606 fn test_reconcile_with_defaults_empty_user_schema() {
1607 let user_schema = Schema {
1609 defaults: ValueTypes::default(),
1610 keys: HashMap::new(),
1611 };
1612
1613 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1614 let expected = Schema::new_default(KnnIndex::Spann);
1615 assert_eq!(result, expected);
1616 }
1617
1618 #[test]
1619 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
1620 let mut user_schema = Schema {
1622 defaults: ValueTypes::default(),
1623 keys: HashMap::new(),
1624 };
1625
1626 user_schema.defaults.string = Some(StringValueType {
1627 string_inverted_index: Some(StringInvertedIndexType {
1628 enabled: false, config: StringInvertedIndexConfig {},
1630 }),
1631 fts_index: None,
1632 });
1633
1634 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1635
1636 assert!(
1638 !result
1639 .defaults
1640 .string
1641 .as_ref()
1642 .unwrap()
1643 .string_inverted_index
1644 .as_ref()
1645 .unwrap()
1646 .enabled
1647 );
1648 assert!(result.defaults.float.is_some());
1650 assert!(result.defaults.int.is_some());
1651 }
1652
1653 #[test]
1654 fn test_reconcile_with_defaults_user_overrides_vector_config() {
1655 let mut user_schema = Schema {
1657 defaults: ValueTypes::default(),
1658 keys: HashMap::new(),
1659 };
1660
1661 user_schema.defaults.float_list = Some(FloatListValueType {
1662 vector_index: Some(VectorIndexType {
1663 enabled: true, config: VectorIndexConfig {
1665 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
1669 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
1673 batch_size: None,
1674 sync_threshold: None,
1675 resize_factor: None,
1676 }),
1677 spann: None,
1678 },
1679 }),
1680 });
1681
1682 let result = {
1684 let default_schema = Schema::new_default(KnnIndex::Hnsw);
1685 let merged_defaults =
1686 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
1687 let mut merged_keys = default_schema.keys.clone();
1688 for (key, user_value_types) in user_schema.keys {
1689 if let Some(default_value_types) = merged_keys.get(&key) {
1690 let merged_value_types =
1691 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
1692 merged_keys.insert(key, merged_value_types);
1693 } else {
1694 merged_keys.insert(key, user_value_types);
1695 }
1696 }
1697 Schema {
1698 defaults: merged_defaults,
1699 keys: merged_keys,
1700 }
1701 };
1702
1703 let vector_config = &result
1704 .defaults
1705 .float_list
1706 .as_ref()
1707 .unwrap()
1708 .vector_index
1709 .as_ref()
1710 .unwrap()
1711 .config;
1712
1713 assert_eq!(vector_config.space, Some(Space::L2));
1715 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
1716 assert_eq!(
1717 vector_config.hnsw.as_ref().unwrap().ef_construction,
1718 Some(500)
1719 );
1720
1721 assert_eq!(
1723 vector_config.embedding_function,
1724 Some(EmbeddingFunctionConfiguration::Legacy)
1725 );
1726 assert_eq!(
1728 vector_config.hnsw.as_ref().unwrap().max_neighbors,
1729 Some(default_m())
1730 );
1731 }
1732
1733 #[test]
1734 fn test_reconcile_with_defaults_keys() {
1735 let mut user_schema = Schema {
1737 defaults: ValueTypes::default(),
1738 keys: HashMap::new(),
1739 };
1740
1741 let custom_key_types = ValueTypes {
1743 string: Some(StringValueType {
1744 fts_index: Some(FtsIndexType {
1745 enabled: true,
1746 config: FtsIndexConfig {},
1747 }),
1748 string_inverted_index: Some(StringInvertedIndexType {
1749 enabled: false,
1750 config: StringInvertedIndexConfig {},
1751 }),
1752 }),
1753 ..Default::default()
1754 };
1755 user_schema
1756 .keys
1757 .insert("custom_key".to_string(), custom_key_types);
1758
1759 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1760
1761 assert!(result.keys.contains_key(EMBEDDING_KEY));
1763 assert!(result.keys.contains_key(DOCUMENT_KEY));
1764
1765 assert!(result.keys.contains_key("custom_key"));
1767 let custom_override = result.keys.get("custom_key").unwrap();
1768 assert!(
1769 custom_override
1770 .string
1771 .as_ref()
1772 .unwrap()
1773 .fts_index
1774 .as_ref()
1775 .unwrap()
1776 .enabled
1777 );
1778 }
1779
1780 #[test]
1781 fn test_reconcile_with_defaults_override_existing_key() {
1782 let mut user_schema = Schema {
1784 defaults: ValueTypes::default(),
1785 keys: HashMap::new(),
1786 };
1787
1788 let embedding_override = ValueTypes {
1790 float_list: Some(FloatListValueType {
1791 vector_index: Some(VectorIndexType {
1792 enabled: false, config: VectorIndexConfig {
1794 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1796 source_key: Some("custom_embedding_key".to_string()),
1797 hnsw: None,
1798 spann: None,
1799 },
1800 }),
1801 }),
1802 ..Default::default()
1803 };
1804 user_schema
1805 .keys
1806 .insert(EMBEDDING_KEY.to_string(), embedding_override);
1807
1808 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1809
1810 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
1811 let vector_config = &embedding_config
1812 .float_list
1813 .as_ref()
1814 .unwrap()
1815 .vector_index
1816 .as_ref()
1817 .unwrap();
1818
1819 assert!(!vector_config.enabled);
1821 assert_eq!(vector_config.config.space, Some(Space::Ip));
1822 assert_eq!(
1823 vector_config.config.source_key,
1824 Some("custom_embedding_key".to_string())
1825 );
1826 }
1827
1828 #[test]
1829 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
1830 let collection_config = InternalCollectionConfiguration {
1831 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
1832 space: Space::Cosine,
1833 ef_construction: 128,
1834 ef_search: 96,
1835 max_neighbors: 42,
1836 num_threads: 8,
1837 resize_factor: 1.5,
1838 sync_threshold: 2_000,
1839 batch_size: 256,
1840 }),
1841 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1842 EmbeddingFunctionNewConfiguration {
1843 name: "custom".to_string(),
1844 config: json!({"alpha": 1}),
1845 },
1846 )),
1847 };
1848
1849 let schema =
1850 Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1851 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1852
1853 assert_eq!(reconstructed, collection_config);
1854 }
1855
1856 #[test]
1857 fn test_convert_schema_to_collection_config_spann_roundtrip() {
1858 let spann_config = InternalSpannConfiguration {
1859 space: Space::Cosine,
1860 search_nprobe: 11,
1861 search_rng_factor: 1.7,
1862 write_nprobe: 5,
1863 nreplica_count: 3,
1864 split_threshold: 150,
1865 merge_threshold: 80,
1866 ef_construction: 120,
1867 ef_search: 90,
1868 max_neighbors: 40,
1869 ..Default::default()
1870 };
1871
1872 let collection_config = InternalCollectionConfiguration {
1873 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
1874 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1875 EmbeddingFunctionNewConfiguration {
1876 name: "custom".to_string(),
1877 config: json!({"beta": true}),
1878 },
1879 )),
1880 };
1881
1882 let schema =
1883 Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1884 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1885
1886 assert_eq!(reconstructed, collection_config);
1887 }
1888
1889 #[test]
1890 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
1891 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1892 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
1893 if let Some(float_list) = &mut embedding.float_list {
1894 if let Some(vector_index) = &mut float_list.vector_index {
1895 vector_index.config.spann = Some(SpannIndexConfig {
1896 search_nprobe: Some(1),
1897 search_rng_factor: Some(1.0),
1898 search_rng_epsilon: Some(0.1),
1899 nreplica_count: Some(1),
1900 write_rng_factor: Some(1.0),
1901 write_rng_epsilon: Some(0.1),
1902 split_threshold: Some(100),
1903 num_samples_kmeans: Some(10),
1904 initial_lambda: Some(0.5),
1905 reassign_neighbor_count: Some(10),
1906 merge_threshold: Some(50),
1907 num_centers_to_merge_to: Some(3),
1908 write_nprobe: Some(1),
1909 ef_construction: Some(50),
1910 ef_search: Some(40),
1911 max_neighbors: Some(20),
1912 });
1913 }
1914 }
1915 }
1916
1917 let result = InternalCollectionConfiguration::try_from(&schema);
1918 assert!(result.is_err());
1919 }
1920
1921 #[test]
1922 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
1923 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1924 let before = schema.clone();
1925 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
1926 assert!(!modified);
1927 assert_eq!(schema, before);
1928 }
1929
1930 #[test]
1931 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
1932 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1933 assert!(!schema.keys.contains_key("custom_field"));
1934
1935 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1936
1937 assert!(modified);
1938 let entry = schema
1939 .keys
1940 .get("custom_field")
1941 .expect("expected new key override to be inserted");
1942 assert_eq!(entry.boolean, schema.defaults.boolean);
1943 assert!(entry.string.is_none());
1944 assert!(entry.int.is_none());
1945 assert!(entry.float.is_none());
1946 assert!(entry.float_list.is_none());
1947 assert!(entry.sparse_vector.is_none());
1948 }
1949
1950 #[test]
1951 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
1952 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1953 let initial_len = schema.keys.len();
1954 schema.keys.insert(
1955 "custom_field".to_string(),
1956 ValueTypes {
1957 string: schema.defaults.string.clone(),
1958 ..Default::default()
1959 },
1960 );
1961
1962 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1963
1964 assert!(modified);
1965 assert_eq!(schema.keys.len(), initial_len + 1);
1966 let entry = schema
1967 .keys
1968 .get("custom_field")
1969 .expect("expected key override to exist after ensure call");
1970 assert!(entry.string.is_some());
1971 assert_eq!(entry.boolean, schema.defaults.boolean);
1972 }
1973
1974 #[test]
1975 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
1976 let schema = Schema::new_default(KnnIndex::Spann);
1977 let result = schema.is_knn_key_indexing_enabled(
1978 "custom_sparse",
1979 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1980 );
1981
1982 let err = result.expect_err("expected indexing disabled error");
1983 match err {
1984 FilterValidationError::IndexingDisabled { key, value_type } => {
1985 assert_eq!(key, "custom_sparse");
1986 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
1987 }
1988 other => panic!("unexpected error variant: {other:?}"),
1989 }
1990 }
1991
1992 #[test]
1993 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
1994 let mut schema = Schema::new_default(KnnIndex::Spann);
1995 schema.keys.insert(
1996 "sparse_enabled".to_string(),
1997 ValueTypes {
1998 sparse_vector: Some(SparseVectorValueType {
1999 sparse_vector_index: Some(SparseVectorIndexType {
2000 enabled: true,
2001 config: SparseVectorIndexConfig {
2002 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2003 source_key: None,
2004 bm25: None,
2005 },
2006 }),
2007 }),
2008 ..Default::default()
2009 },
2010 );
2011
2012 let result = schema.is_knn_key_indexing_enabled(
2013 "sparse_enabled",
2014 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
2015 );
2016
2017 assert!(result.is_ok());
2018 }
2019
2020 #[test]
2021 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2022 let schema = Schema::new_default(KnnIndex::Spann);
2023 let result = schema.is_knn_key_indexing_enabled(
2024 EMBEDDING_KEY,
2025 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2026 );
2027
2028 assert!(result.is_ok());
2029 }
2030
2031 #[test]
2032 fn test_merge_hnsw_configs_field_level() {
2033 let default_hnsw = HnswIndexConfig {
2035 ef_construction: Some(200),
2036 max_neighbors: Some(16),
2037 ef_search: Some(10),
2038 num_threads: Some(4),
2039 batch_size: Some(100),
2040 sync_threshold: Some(1000),
2041 resize_factor: Some(1.2),
2042 };
2043
2044 let user_hnsw = HnswIndexConfig {
2045 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
2053
2054 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2055
2056 assert_eq!(result.ef_construction, Some(300));
2058 assert_eq!(result.ef_search, Some(20));
2059 assert_eq!(result.sync_threshold, Some(2000));
2060
2061 assert_eq!(result.max_neighbors, Some(16));
2063 assert_eq!(result.num_threads, Some(4));
2064 assert_eq!(result.batch_size, Some(100));
2065 assert_eq!(result.resize_factor, Some(1.2));
2066 }
2067
2068 #[test]
2069 fn test_merge_spann_configs_field_level() {
2070 let default_spann = SpannIndexConfig {
2072 search_nprobe: Some(10),
2073 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
2076 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
2080 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
2082 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
2085 ef_construction: Some(100),
2086 ef_search: Some(10),
2087 max_neighbors: Some(16),
2088 };
2089
2090 let user_spann = SpannIndexConfig {
2091 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
2096 write_rng_epsilon: None,
2097 split_threshold: Some(150), num_samples_kmeans: None,
2099 initial_lambda: None,
2100 reassign_neighbor_count: None,
2101 merge_threshold: None,
2102 num_centers_to_merge_to: None,
2103 write_nprobe: None,
2104 ef_construction: None,
2105 ef_search: None,
2106 max_neighbors: None,
2107 };
2108
2109 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
2110
2111 assert_eq!(result.search_nprobe, Some(20));
2113 assert_eq!(result.search_rng_epsilon, Some(8.0));
2114 assert_eq!(result.split_threshold, Some(150));
2115
2116 assert_eq!(result.search_rng_factor, Some(1.0));
2118 assert_eq!(result.nreplica_count, Some(3));
2119 assert_eq!(result.initial_lambda, Some(100.0));
2120 }
2121
2122 #[test]
2123 fn test_spann_index_config_into_internal_configuration() {
2124 let config = SpannIndexConfig {
2125 search_nprobe: Some(33),
2126 search_rng_factor: Some(1.2),
2127 search_rng_epsilon: None,
2128 nreplica_count: None,
2129 write_rng_factor: Some(1.5),
2130 write_rng_epsilon: None,
2131 split_threshold: Some(75),
2132 num_samples_kmeans: None,
2133 initial_lambda: Some(0.9),
2134 reassign_neighbor_count: Some(40),
2135 merge_threshold: None,
2136 num_centers_to_merge_to: Some(4),
2137 write_nprobe: Some(60),
2138 ef_construction: Some(180),
2139 ef_search: Some(170),
2140 max_neighbors: Some(32),
2141 };
2142
2143 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
2144 assert_eq!(with_space.space, Space::Cosine);
2145 assert_eq!(with_space.search_nprobe, 33);
2146 assert_eq!(with_space.search_rng_factor, 1.2);
2147 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2148 assert_eq!(with_space.write_rng_factor, 1.5);
2149 assert_eq!(with_space.write_nprobe, 60);
2150 assert_eq!(with_space.ef_construction, 180);
2151 assert_eq!(with_space.ef_search, 170);
2152 assert_eq!(with_space.max_neighbors, 32);
2153 assert_eq!(with_space.merge_threshold, default_merge_threshold());
2154
2155 let default_space_config: InternalSpannConfiguration = (None, &config).into();
2156 assert_eq!(default_space_config.space, default_space());
2157 }
2158
2159 #[test]
2160 fn test_merge_string_type_combinations() {
2161 let default = StringValueType {
2165 string_inverted_index: Some(StringInvertedIndexType {
2166 enabled: true,
2167 config: StringInvertedIndexConfig {},
2168 }),
2169 fts_index: Some(FtsIndexType {
2170 enabled: false,
2171 config: FtsIndexConfig {},
2172 }),
2173 };
2174
2175 let user = StringValueType {
2176 string_inverted_index: Some(StringInvertedIndexType {
2177 enabled: false, config: StringInvertedIndexConfig {},
2179 }),
2180 fts_index: None, };
2182
2183 let result = Schema::merge_string_type(Some(&default), Some(&user))
2184 .unwrap()
2185 .unwrap();
2186 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
2191 .unwrap()
2192 .unwrap();
2193 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2194
2195 let result = Schema::merge_string_type(None, Some(&user))
2197 .unwrap()
2198 .unwrap();
2199 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2200
2201 let result = Schema::merge_string_type(None, None).unwrap();
2203 assert!(result.is_none());
2204 }
2205
2206 #[test]
2207 fn test_merge_vector_index_config_comprehensive() {
2208 let default_config = VectorIndexConfig {
2210 space: Some(Space::Cosine),
2211 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2212 source_key: Some("default_key".to_string()),
2213 hnsw: Some(HnswIndexConfig {
2214 ef_construction: Some(200),
2215 max_neighbors: Some(16),
2216 ef_search: Some(10),
2217 num_threads: Some(4),
2218 batch_size: Some(100),
2219 sync_threshold: Some(1000),
2220 resize_factor: Some(1.2),
2221 }),
2222 spann: None,
2223 };
2224
2225 let user_config = VectorIndexConfig {
2226 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
2230 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
2234 batch_size: None,
2235 sync_threshold: None,
2236 resize_factor: None,
2237 }),
2238 spann: Some(SpannIndexConfig {
2239 search_nprobe: Some(15),
2240 search_rng_factor: None,
2241 search_rng_epsilon: None,
2242 nreplica_count: None,
2243 write_rng_factor: None,
2244 write_rng_epsilon: None,
2245 split_threshold: None,
2246 num_samples_kmeans: None,
2247 initial_lambda: None,
2248 reassign_neighbor_count: None,
2249 merge_threshold: None,
2250 num_centers_to_merge_to: None,
2251 write_nprobe: None,
2252 ef_construction: None,
2253 ef_search: None,
2254 max_neighbors: None,
2255 }), };
2257
2258 let result = Schema::merge_vector_index_config(&default_config, &user_config);
2259
2260 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
2263 result.embedding_function,
2264 Some(EmbeddingFunctionConfiguration::Legacy)
2265 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_some());
2274 assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2275 }
2276
2277 #[test]
2278 fn test_merge_sparse_vector_index_config() {
2279 let default_config = SparseVectorIndexConfig {
2281 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2282 source_key: Some("default_sparse_key".to_string()),
2283 bm25: None,
2284 };
2285
2286 let user_config = SparseVectorIndexConfig {
2287 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
2290 };
2291
2292 let result =
2293 Schema::merge_sparse_vector_index_config(&default_config, &user_config).unwrap();
2294
2295 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2297 assert_eq!(
2299 result.embedding_function,
2300 Some(EmbeddingFunctionConfiguration::Legacy)
2301 );
2302 }
2303
2304 #[test]
2305 fn test_complex_nested_merging_scenario() {
2306 let mut user_schema = Schema {
2308 defaults: ValueTypes::default(),
2309 keys: HashMap::new(),
2310 };
2311
2312 user_schema.defaults.string = Some(StringValueType {
2314 string_inverted_index: Some(StringInvertedIndexType {
2315 enabled: false,
2316 config: StringInvertedIndexConfig {},
2317 }),
2318 fts_index: Some(FtsIndexType {
2319 enabled: true,
2320 config: FtsIndexConfig {},
2321 }),
2322 });
2323
2324 user_schema.defaults.float_list = Some(FloatListValueType {
2325 vector_index: Some(VectorIndexType {
2326 enabled: true,
2327 config: VectorIndexConfig {
2328 space: Some(Space::Ip),
2329 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
2331 hnsw: Some(HnswIndexConfig {
2332 ef_construction: Some(400),
2333 max_neighbors: Some(32),
2334 ef_search: None, num_threads: None,
2336 batch_size: None,
2337 sync_threshold: None,
2338 resize_factor: None,
2339 }),
2340 spann: None,
2341 },
2342 }),
2343 });
2344
2345 let custom_key_override = ValueTypes {
2347 string: Some(StringValueType {
2348 fts_index: Some(FtsIndexType {
2349 enabled: true,
2350 config: FtsIndexConfig {},
2351 }),
2352 string_inverted_index: None,
2353 }),
2354 ..Default::default()
2355 };
2356 user_schema
2357 .keys
2358 .insert("custom_field".to_string(), custom_key_override);
2359
2360 let result = {
2362 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2363 let merged_defaults =
2364 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2365 let mut merged_keys = default_schema.keys.clone();
2366 for (key, user_value_types) in user_schema.keys {
2367 if let Some(default_value_types) = merged_keys.get(&key) {
2368 let merged_value_types =
2369 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2370 merged_keys.insert(key, merged_value_types);
2371 } else {
2372 merged_keys.insert(key, user_value_types);
2373 }
2374 }
2375 Schema {
2376 defaults: merged_defaults,
2377 keys: merged_keys,
2378 }
2379 };
2380
2381 assert!(
2385 !result
2386 .defaults
2387 .string
2388 .as_ref()
2389 .unwrap()
2390 .string_inverted_index
2391 .as_ref()
2392 .unwrap()
2393 .enabled
2394 );
2395 assert!(
2396 result
2397 .defaults
2398 .string
2399 .as_ref()
2400 .unwrap()
2401 .fts_index
2402 .as_ref()
2403 .unwrap()
2404 .enabled
2405 );
2406
2407 let vector_config = &result
2408 .defaults
2409 .float_list
2410 .as_ref()
2411 .unwrap()
2412 .vector_index
2413 .as_ref()
2414 .unwrap()
2415 .config;
2416 assert_eq!(vector_config.space, Some(Space::Ip));
2417 assert_eq!(
2418 vector_config.embedding_function,
2419 Some(EmbeddingFunctionConfiguration::Legacy)
2420 ); assert_eq!(
2422 vector_config.source_key,
2423 Some("custom_vector_key".to_string())
2424 );
2425 assert_eq!(
2426 vector_config.hnsw.as_ref().unwrap().ef_construction,
2427 Some(400)
2428 );
2429 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2430 assert_eq!(
2431 vector_config.hnsw.as_ref().unwrap().ef_search,
2432 Some(default_search_ef())
2433 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
2441 assert!(
2442 custom_override
2443 .string
2444 .as_ref()
2445 .unwrap()
2446 .fts_index
2447 .as_ref()
2448 .unwrap()
2449 .enabled
2450 );
2451 assert!(custom_override
2452 .string
2453 .as_ref()
2454 .unwrap()
2455 .string_inverted_index
2456 .is_none());
2457 }
2458
2459 #[test]
2460 fn test_reconcile_with_collection_config_default_config() {
2461 let schema = Schema::new_default(KnnIndex::Hnsw);
2463 let collection_config = InternalCollectionConfiguration::default_hnsw();
2464
2465 let result =
2466 Schema::reconcile_with_collection_config(schema.clone(), collection_config).unwrap();
2467 assert_eq!(result, schema);
2468 }
2469
2470 #[test]
2471 fn test_reconcile_with_collection_config_both_non_default() {
2472 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2474 schema.defaults.string = Some(StringValueType {
2475 fts_index: Some(FtsIndexType {
2476 enabled: true,
2477 config: FtsIndexConfig {},
2478 }),
2479 string_inverted_index: None,
2480 });
2481
2482 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
2483 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
2485 {
2486 hnsw_config.ef_construction = 500; }
2488
2489 let result = Schema::reconcile_with_collection_config(schema, collection_config);
2490 assert!(result.is_err());
2491 assert_eq!(
2492 result.unwrap_err(),
2493 "Cannot set both collection config and schema at the same time"
2494 );
2495 }
2496
2497 #[test]
2498 fn test_reconcile_with_collection_config_hnsw_override() {
2499 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
2503 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2504 ef_construction: 300,
2505 max_neighbors: 32,
2506 ef_search: 50,
2507 num_threads: 8,
2508 batch_size: 200,
2509 sync_threshold: 2000,
2510 resize_factor: 1.5,
2511 space: Space::L2,
2512 }),
2513 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2514 };
2515
2516 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2517
2518 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2520 let vector_index = embedding_override
2521 .float_list
2522 .as_ref()
2523 .unwrap()
2524 .vector_index
2525 .as_ref()
2526 .unwrap();
2527
2528 assert!(vector_index.enabled);
2529 assert_eq!(vector_index.config.space, Some(Space::L2));
2530 assert_eq!(
2531 vector_index.config.embedding_function,
2532 Some(EmbeddingFunctionConfiguration::Legacy)
2533 );
2534 assert_eq!(
2535 vector_index.config.source_key,
2536 Some(DOCUMENT_KEY.to_string())
2537 );
2538
2539 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
2540 assert_eq!(hnsw_config.ef_construction, Some(300));
2541 assert_eq!(hnsw_config.max_neighbors, Some(32));
2542 assert_eq!(hnsw_config.ef_search, Some(50));
2543 assert_eq!(hnsw_config.num_threads, Some(8));
2544 assert_eq!(hnsw_config.batch_size, Some(200));
2545 assert_eq!(hnsw_config.sync_threshold, Some(2000));
2546 assert_eq!(hnsw_config.resize_factor, Some(1.5));
2547
2548 assert!(vector_index.config.spann.is_none());
2549 }
2550
2551 #[test]
2552 fn test_reconcile_with_collection_config_spann_override() {
2553 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
2557 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
2558 search_nprobe: 20,
2559 search_rng_factor: 3.0,
2560 search_rng_epsilon: 0.2,
2561 nreplica_count: 5,
2562 write_rng_factor: 2.0,
2563 write_rng_epsilon: 0.1,
2564 split_threshold: 2000,
2565 num_samples_kmeans: 200,
2566 initial_lambda: 0.8,
2567 reassign_neighbor_count: 100,
2568 merge_threshold: 800,
2569 num_centers_to_merge_to: 20,
2570 write_nprobe: 10,
2571 ef_construction: 400,
2572 ef_search: 60,
2573 max_neighbors: 24,
2574 space: Space::Cosine,
2575 }),
2576 embedding_function: None,
2577 };
2578
2579 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2580
2581 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2583 let vector_index = embedding_override
2584 .float_list
2585 .as_ref()
2586 .unwrap()
2587 .vector_index
2588 .as_ref()
2589 .unwrap();
2590
2591 assert!(vector_index.enabled);
2592 assert_eq!(vector_index.config.space, Some(Space::Cosine));
2593 assert_eq!(vector_index.config.embedding_function, None);
2594 assert_eq!(
2595 vector_index.config.source_key,
2596 Some(DOCUMENT_KEY.to_string())
2597 );
2598
2599 assert!(vector_index.config.hnsw.is_none());
2600
2601 let spann_config = vector_index.config.spann.as_ref().unwrap();
2602 assert_eq!(spann_config.search_nprobe, Some(20));
2603 assert_eq!(spann_config.search_rng_factor, Some(3.0));
2604 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
2605 assert_eq!(spann_config.nreplica_count, Some(5));
2606 assert_eq!(spann_config.write_rng_factor, Some(2.0));
2607 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
2608 assert_eq!(spann_config.split_threshold, Some(2000));
2609 assert_eq!(spann_config.num_samples_kmeans, Some(200));
2610 assert_eq!(spann_config.initial_lambda, Some(0.8));
2611 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
2612 assert_eq!(spann_config.merge_threshold, Some(800));
2613 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
2614 assert_eq!(spann_config.write_nprobe, Some(10));
2615 assert_eq!(spann_config.ef_construction, Some(400));
2616 assert_eq!(spann_config.ef_search, Some(60));
2617 assert_eq!(spann_config.max_neighbors, Some(24));
2618 }
2619
2620 #[test]
2621 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
2622 let schema = Schema::new_default(KnnIndex::Hnsw);
2625
2626 let collection_config = InternalCollectionConfiguration {
2627 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2628 ef_construction: 300,
2629 max_neighbors: 32,
2630 ef_search: 50,
2631 num_threads: 8,
2632 batch_size: 200,
2633 sync_threshold: 2000,
2634 resize_factor: 1.5,
2635 space: Space::L2,
2636 }),
2637 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2638 };
2639
2640 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2641
2642 let defaults_vector_index = result
2644 .defaults
2645 .float_list
2646 .as_ref()
2647 .unwrap()
2648 .vector_index
2649 .as_ref()
2650 .unwrap();
2651
2652 assert!(!defaults_vector_index.enabled);
2654 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
2656 assert_eq!(
2657 defaults_vector_index.config.embedding_function,
2658 Some(EmbeddingFunctionConfiguration::Legacy)
2659 );
2660 assert_eq!(
2661 defaults_vector_index.config.source_key,
2662 Some(DOCUMENT_KEY.to_string())
2663 );
2664 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
2665 assert_eq!(defaults_hnsw.ef_construction, Some(300));
2666 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
2667
2668 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2670 let embedding_vector_index = embedding_override
2671 .float_list
2672 .as_ref()
2673 .unwrap()
2674 .vector_index
2675 .as_ref()
2676 .unwrap();
2677
2678 assert!(embedding_vector_index.enabled);
2680 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
2682 assert_eq!(
2683 embedding_vector_index.config.embedding_function,
2684 Some(EmbeddingFunctionConfiguration::Legacy)
2685 );
2686 assert_eq!(
2687 embedding_vector_index.config.source_key,
2688 Some(DOCUMENT_KEY.to_string())
2689 );
2690 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
2691 assert_eq!(embedding_hnsw.ef_construction, Some(300));
2692 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
2693 }
2694
2695 #[test]
2696 fn test_is_schema_default() {
2697 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
2699 assert!(Schema::is_schema_default(&default_hnsw_schema));
2700
2701 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
2702 assert!(Schema::is_schema_default(&default_spann_schema));
2703
2704 let empty_schema = Schema {
2706 defaults: ValueTypes::default(),
2707 keys: HashMap::new(),
2708 };
2709 assert!(!Schema::is_schema_default(&empty_schema));
2710
2711 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
2713 if let Some(ref mut string_type) = modified_schema.defaults.string {
2715 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
2716 string_inverted.enabled = false; }
2718 }
2719 assert!(!Schema::is_schema_default(&modified_schema));
2720
2721 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
2723 schema_with_extra_overrides
2724 .keys
2725 .insert("custom_key".to_string(), ValueTypes::default());
2726 assert!(!Schema::is_schema_default(&schema_with_extra_overrides));
2727 }
2728
2729 #[test]
2730 fn test_add_merges_keys_by_value_type() {
2731 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2732 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2733
2734 let string_override = ValueTypes {
2735 string: Some(StringValueType {
2736 string_inverted_index: Some(StringInvertedIndexType {
2737 enabled: true,
2738 config: StringInvertedIndexConfig {},
2739 }),
2740 fts_index: None,
2741 }),
2742 ..Default::default()
2743 };
2744 schema_a
2745 .keys
2746 .insert("custom_field".to_string(), string_override);
2747
2748 let float_override = ValueTypes {
2749 float: Some(FloatValueType {
2750 float_inverted_index: Some(FloatInvertedIndexType {
2751 enabled: true,
2752 config: FloatInvertedIndexConfig {},
2753 }),
2754 }),
2755 ..Default::default()
2756 };
2757 schema_b
2758 .keys
2759 .insert("custom_field".to_string(), float_override);
2760
2761 let merged = schema_a.merge(&schema_b).unwrap();
2762 let merged_override = merged.keys.get("custom_field").unwrap();
2763
2764 assert!(merged_override.string.is_some());
2765 assert!(merged_override.float.is_some());
2766 assert!(
2767 merged_override
2768 .string
2769 .as_ref()
2770 .unwrap()
2771 .string_inverted_index
2772 .as_ref()
2773 .unwrap()
2774 .enabled
2775 );
2776 assert!(
2777 merged_override
2778 .float
2779 .as_ref()
2780 .unwrap()
2781 .float_inverted_index
2782 .as_ref()
2783 .unwrap()
2784 .enabled
2785 );
2786 }
2787
2788 #[test]
2789 fn test_add_rejects_different_defaults() {
2790 let schema_a = Schema::new_default(KnnIndex::Hnsw);
2791 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2792
2793 if let Some(string_type) = schema_b.defaults.string.as_mut() {
2794 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
2795 string_index.enabled = false;
2796 }
2797 }
2798
2799 let err = schema_a.merge(&schema_b).unwrap_err();
2800 match err {
2801 SchemaError::InvalidSchema { reason } => {
2802 assert_eq!(reason, "Cannot merge schemas with differing defaults")
2803 }
2804 _ => panic!("Expected InvalidSchema error"),
2805 }
2806 }
2807
2808 #[test]
2809 fn test_add_detects_conflicting_value_type_configuration() {
2810 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2811 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2812
2813 let string_override_enabled = ValueTypes {
2814 string: Some(StringValueType {
2815 string_inverted_index: Some(StringInvertedIndexType {
2816 enabled: true,
2817 config: StringInvertedIndexConfig {},
2818 }),
2819 fts_index: None,
2820 }),
2821 ..Default::default()
2822 };
2823 schema_a
2824 .keys
2825 .insert("custom_field".to_string(), string_override_enabled);
2826
2827 let string_override_disabled = ValueTypes {
2828 string: Some(StringValueType {
2829 string_inverted_index: Some(StringInvertedIndexType {
2830 enabled: false,
2831 config: StringInvertedIndexConfig {},
2832 }),
2833 fts_index: None,
2834 }),
2835 ..Default::default()
2836 };
2837 schema_b
2838 .keys
2839 .insert("custom_field".to_string(), string_override_disabled);
2840
2841 let err = schema_a.merge(&schema_b).unwrap_err();
2842 match err {
2843 SchemaError::InvalidSchema { reason } => {
2844 assert!(reason.contains("Conflicting configuration"));
2845 }
2846 _ => panic!("Expected InvalidSchema error"),
2847 }
2848 }
2849
2850 #[test]
2852 fn test_backward_compatibility_aliases() {
2853 let old_format_json = r###"{
2855 "defaults": {
2856 "#string": {
2857 "$fts_index": {
2858 "enabled": true,
2859 "config": {}
2860 }
2861 },
2862 "#int": {
2863 "$int_inverted_index": {
2864 "enabled": true,
2865 "config": {}
2866 }
2867 },
2868 "#float_list": {
2869 "$vector_index": {
2870 "enabled": true,
2871 "config": {
2872 "spann": {
2873 "search_nprobe": 10
2874 }
2875 }
2876 }
2877 }
2878 },
2879 "key_overrides": {
2880 "#document": {
2881 "#string": {
2882 "$fts_index": {
2883 "enabled": false,
2884 "config": {}
2885 }
2886 }
2887 }
2888 }
2889 }"###;
2890
2891 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
2892
2893 let new_format_json = r###"{
2895 "defaults": {
2896 "string": {
2897 "fts_index": {
2898 "enabled": true,
2899 "config": {}
2900 }
2901 },
2902 "int": {
2903 "int_inverted_index": {
2904 "enabled": true,
2905 "config": {}
2906 }
2907 },
2908 "float_list": {
2909 "vector_index": {
2910 "enabled": true,
2911 "config": {
2912 "spann": {
2913 "search_nprobe": 10
2914 }
2915 }
2916 }
2917 }
2918 },
2919 "keys": {
2920 "#document": {
2921 "string": {
2922 "fts_index": {
2923 "enabled": false,
2924 "config": {}
2925 }
2926 }
2927 }
2928 }
2929 }"###;
2930
2931 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
2932
2933 assert_eq!(schema_from_old, schema_from_new);
2935
2936 assert!(schema_from_old.defaults.string.is_some());
2938 assert!(schema_from_old
2939 .defaults
2940 .string
2941 .as_ref()
2942 .unwrap()
2943 .fts_index
2944 .is_some());
2945 assert!(
2946 schema_from_old
2947 .defaults
2948 .string
2949 .as_ref()
2950 .unwrap()
2951 .fts_index
2952 .as_ref()
2953 .unwrap()
2954 .enabled
2955 );
2956
2957 assert!(schema_from_old.defaults.int.is_some());
2958 assert!(schema_from_old
2959 .defaults
2960 .int
2961 .as_ref()
2962 .unwrap()
2963 .int_inverted_index
2964 .is_some());
2965
2966 assert!(schema_from_old.defaults.float_list.is_some());
2967 assert!(schema_from_old
2968 .defaults
2969 .float_list
2970 .as_ref()
2971 .unwrap()
2972 .vector_index
2973 .is_some());
2974
2975 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
2976 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
2977 assert!(doc_override.string.is_some());
2978 assert!(
2979 !doc_override
2980 .string
2981 .as_ref()
2982 .unwrap()
2983 .fts_index
2984 .as_ref()
2985 .unwrap()
2986 .enabled
2987 );
2988
2989 let serialized = serde_json::to_string(&schema_from_old).unwrap();
2991
2992 assert!(serialized.contains(r#""keys":"#));
2994 assert!(serialized.contains(r#""string":"#));
2995 assert!(serialized.contains(r#""fts_index":"#));
2996 assert!(serialized.contains(r#""int_inverted_index":"#));
2997 assert!(serialized.contains(r#""vector_index":"#));
2998
2999 assert!(!serialized.contains(r#""key_overrides":"#));
3001 assert!(!serialized.contains(r###""#string":"###));
3002 assert!(!serialized.contains(r###""$fts_index":"###));
3003 assert!(!serialized.contains(r###""$int_inverted_index":"###));
3004 assert!(!serialized.contains(r###""$vector_index":"###));
3005 }
3006
3007 #[test]
3008 fn test_hnsw_index_config_validation() {
3009 use validator::Validate;
3010
3011 let valid_config = HnswIndexConfig {
3013 batch_size: Some(10),
3014 sync_threshold: Some(100),
3015 ef_construction: Some(100),
3016 max_neighbors: Some(16),
3017 ..Default::default()
3018 };
3019 assert!(valid_config.validate().is_ok());
3020
3021 let invalid_batch_size = HnswIndexConfig {
3023 batch_size: Some(1),
3024 ..Default::default()
3025 };
3026 assert!(invalid_batch_size.validate().is_err());
3027
3028 let invalid_sync_threshold = HnswIndexConfig {
3030 sync_threshold: Some(1),
3031 ..Default::default()
3032 };
3033 assert!(invalid_sync_threshold.validate().is_err());
3034
3035 let boundary_config = HnswIndexConfig {
3037 batch_size: Some(2),
3038 sync_threshold: Some(2),
3039 ..Default::default()
3040 };
3041 assert!(boundary_config.validate().is_ok());
3042
3043 let all_none_config = HnswIndexConfig {
3045 ..Default::default()
3046 };
3047 assert!(all_none_config.validate().is_ok());
3048
3049 let other_fields_config = HnswIndexConfig {
3051 ef_construction: Some(1),
3052 max_neighbors: Some(1),
3053 ef_search: Some(1),
3054 num_threads: Some(1),
3055 resize_factor: Some(0.1),
3056 ..Default::default()
3057 };
3058 assert!(other_fields_config.validate().is_ok());
3059 }
3060
3061 #[test]
3062 fn test_spann_index_config_validation() {
3063 use validator::Validate;
3064
3065 let valid_config = SpannIndexConfig {
3067 write_nprobe: Some(32),
3068 nreplica_count: Some(4),
3069 split_threshold: Some(100),
3070 merge_threshold: Some(50),
3071 reassign_neighbor_count: Some(32),
3072 num_centers_to_merge_to: Some(4),
3073 ef_construction: Some(100),
3074 ef_search: Some(100),
3075 max_neighbors: Some(32),
3076 search_rng_factor: Some(1.0),
3077 write_rng_factor: Some(1.0),
3078 search_rng_epsilon: Some(7.5),
3079 write_rng_epsilon: Some(7.5),
3080 ..Default::default()
3081 };
3082 assert!(valid_config.validate().is_ok());
3083
3084 let invalid_write_nprobe = SpannIndexConfig {
3086 write_nprobe: Some(200),
3087 ..Default::default()
3088 };
3089 assert!(invalid_write_nprobe.validate().is_err());
3090
3091 let invalid_split_threshold = SpannIndexConfig {
3093 split_threshold: Some(10),
3094 ..Default::default()
3095 };
3096 assert!(invalid_split_threshold.validate().is_err());
3097
3098 let invalid_split_threshold_high = SpannIndexConfig {
3100 split_threshold: Some(250),
3101 ..Default::default()
3102 };
3103 assert!(invalid_split_threshold_high.validate().is_err());
3104
3105 let invalid_nreplica = SpannIndexConfig {
3107 nreplica_count: Some(10),
3108 ..Default::default()
3109 };
3110 assert!(invalid_nreplica.validate().is_err());
3111
3112 let invalid_reassign = SpannIndexConfig {
3114 reassign_neighbor_count: Some(100),
3115 ..Default::default()
3116 };
3117 assert!(invalid_reassign.validate().is_err());
3118
3119 let invalid_merge_threshold_low = SpannIndexConfig {
3121 merge_threshold: Some(5),
3122 ..Default::default()
3123 };
3124 assert!(invalid_merge_threshold_low.validate().is_err());
3125
3126 let invalid_merge_threshold_high = SpannIndexConfig {
3127 merge_threshold: Some(150),
3128 ..Default::default()
3129 };
3130 assert!(invalid_merge_threshold_high.validate().is_err());
3131
3132 let invalid_num_centers = SpannIndexConfig {
3134 num_centers_to_merge_to: Some(10),
3135 ..Default::default()
3136 };
3137 assert!(invalid_num_centers.validate().is_err());
3138
3139 let invalid_ef_construction = SpannIndexConfig {
3141 ef_construction: Some(300),
3142 ..Default::default()
3143 };
3144 assert!(invalid_ef_construction.validate().is_err());
3145
3146 let invalid_ef_search = SpannIndexConfig {
3148 ef_search: Some(300),
3149 ..Default::default()
3150 };
3151 assert!(invalid_ef_search.validate().is_err());
3152
3153 let invalid_max_neighbors = SpannIndexConfig {
3155 max_neighbors: Some(100),
3156 ..Default::default()
3157 };
3158 assert!(invalid_max_neighbors.validate().is_err());
3159
3160 let invalid_search_nprobe = SpannIndexConfig {
3162 search_nprobe: Some(200),
3163 ..Default::default()
3164 };
3165 assert!(invalid_search_nprobe.validate().is_err());
3166
3167 let invalid_search_rng_factor_low = SpannIndexConfig {
3169 search_rng_factor: Some(0.9),
3170 ..Default::default()
3171 };
3172 assert!(invalid_search_rng_factor_low.validate().is_err());
3173
3174 let invalid_search_rng_factor_high = SpannIndexConfig {
3175 search_rng_factor: Some(1.1),
3176 ..Default::default()
3177 };
3178 assert!(invalid_search_rng_factor_high.validate().is_err());
3179
3180 let valid_search_rng_factor = SpannIndexConfig {
3182 search_rng_factor: Some(1.0),
3183 ..Default::default()
3184 };
3185 assert!(valid_search_rng_factor.validate().is_ok());
3186
3187 let invalid_search_rng_epsilon_low = SpannIndexConfig {
3189 search_rng_epsilon: Some(4.0),
3190 ..Default::default()
3191 };
3192 assert!(invalid_search_rng_epsilon_low.validate().is_err());
3193
3194 let invalid_search_rng_epsilon_high = SpannIndexConfig {
3195 search_rng_epsilon: Some(11.0),
3196 ..Default::default()
3197 };
3198 assert!(invalid_search_rng_epsilon_high.validate().is_err());
3199
3200 let valid_search_rng_epsilon = SpannIndexConfig {
3202 search_rng_epsilon: Some(7.5),
3203 ..Default::default()
3204 };
3205 assert!(valid_search_rng_epsilon.validate().is_ok());
3206
3207 let invalid_write_rng_factor_low = SpannIndexConfig {
3209 write_rng_factor: Some(0.9),
3210 ..Default::default()
3211 };
3212 assert!(invalid_write_rng_factor_low.validate().is_err());
3213
3214 let invalid_write_rng_factor_high = SpannIndexConfig {
3215 write_rng_factor: Some(1.1),
3216 ..Default::default()
3217 };
3218 assert!(invalid_write_rng_factor_high.validate().is_err());
3219
3220 let valid_write_rng_factor = SpannIndexConfig {
3222 write_rng_factor: Some(1.0),
3223 ..Default::default()
3224 };
3225 assert!(valid_write_rng_factor.validate().is_ok());
3226
3227 let invalid_write_rng_epsilon_low = SpannIndexConfig {
3229 write_rng_epsilon: Some(4.0),
3230 ..Default::default()
3231 };
3232 assert!(invalid_write_rng_epsilon_low.validate().is_err());
3233
3234 let invalid_write_rng_epsilon_high = SpannIndexConfig {
3235 write_rng_epsilon: Some(11.0),
3236 ..Default::default()
3237 };
3238 assert!(invalid_write_rng_epsilon_high.validate().is_err());
3239
3240 let valid_write_rng_epsilon = SpannIndexConfig {
3242 write_rng_epsilon: Some(7.5),
3243 ..Default::default()
3244 };
3245 assert!(valid_write_rng_epsilon.validate().is_ok());
3246
3247 let invalid_num_samples_kmeans = SpannIndexConfig {
3249 num_samples_kmeans: Some(1500),
3250 ..Default::default()
3251 };
3252 assert!(invalid_num_samples_kmeans.validate().is_err());
3253
3254 let valid_num_samples_kmeans = SpannIndexConfig {
3256 num_samples_kmeans: Some(500),
3257 ..Default::default()
3258 };
3259 assert!(valid_num_samples_kmeans.validate().is_ok());
3260
3261 let invalid_initial_lambda_high = SpannIndexConfig {
3263 initial_lambda: Some(150.0),
3264 ..Default::default()
3265 };
3266 assert!(invalid_initial_lambda_high.validate().is_err());
3267
3268 let invalid_initial_lambda_low = SpannIndexConfig {
3269 initial_lambda: Some(50.0),
3270 ..Default::default()
3271 };
3272 assert!(invalid_initial_lambda_low.validate().is_err());
3273
3274 let valid_initial_lambda = SpannIndexConfig {
3276 initial_lambda: Some(100.0),
3277 ..Default::default()
3278 };
3279 assert!(valid_initial_lambda.validate().is_ok());
3280
3281 let all_none_config = SpannIndexConfig {
3283 ..Default::default()
3284 };
3285 assert!(all_none_config.validate().is_ok());
3286 }
3287}