1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5use validator::Validate;
6
7use crate::collection_configuration::{
8 EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
9};
10use crate::hnsw_configuration::Space;
11use crate::metadata::{MetadataComparison, MetadataValueType, Where};
12use crate::operator::QueryVector;
13use crate::{
14 default_batch_size, default_construction_ef, default_construction_ef_spann,
15 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
16 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
17 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
18 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
19 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
20 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
21 InternalSpannConfiguration, KnnIndex,
22};
23
24impl ChromaError for SchemaError {
25 fn code(&self) -> ErrorCodes {
26 ErrorCodes::Internal
27 }
28}
29
30#[derive(Debug, Error)]
31pub enum SchemaError {
32 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
33 MissingIndexConfiguration { key: String, value_type: String },
34 #[error("Schema reconciliation failed: {reason}")]
35 InvalidSchema { reason: String },
36}
37
38#[derive(Debug, Error)]
39pub enum FilterValidationError {
40 #[error(
41 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
42 )]
43 IndexingDisabled {
44 key: String,
45 value_type: MetadataValueType,
46 },
47 #[error(transparent)]
48 Schema(#[from] SchemaError),
49}
50
51impl ChromaError for FilterValidationError {
52 fn code(&self) -> ErrorCodes {
53 match self {
54 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
55 FilterValidationError::Schema(_) => ErrorCodes::Internal,
56 }
57 }
58}
59
60pub const STRING_VALUE_NAME: &str = "string";
67pub const INT_VALUE_NAME: &str = "int";
68pub const BOOL_VALUE_NAME: &str = "bool";
69pub const FLOAT_VALUE_NAME: &str = "float";
70pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
71pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
72
73pub const FTS_INDEX_NAME: &str = "fts_index";
75pub const VECTOR_INDEX_NAME: &str = "vector_index";
76pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
77pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
78pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
79pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
80pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
81
82pub const DOCUMENT_KEY: &str = "#document";
84pub const EMBEDDING_KEY: &str = "#embedding";
85
86#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
94#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
95pub struct Schema {
96 pub defaults: ValueTypes,
98 #[serde(rename = "keys", alias = "key_overrides")]
101 pub keys: HashMap<String, ValueTypes>,
102}
103
104pub fn is_embedding_function_default(
105 embedding_function: &Option<EmbeddingFunctionConfiguration>,
106) -> bool {
107 match embedding_function {
108 None => true,
109 Some(embedding_function) => embedding_function.is_default(),
110 }
111}
112
113pub fn is_space_default(space: &Option<Space>) -> bool {
115 match space {
116 None => true, Some(s) => *s == default_space(), }
119}
120
121pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
123 hnsw_config.ef_construction == Some(default_construction_ef())
124 && hnsw_config.ef_search == Some(default_search_ef())
125 && hnsw_config.max_neighbors == Some(default_m())
126 && hnsw_config.num_threads == Some(default_num_threads())
127 && hnsw_config.batch_size == Some(default_batch_size())
128 && hnsw_config.sync_threshold == Some(default_sync_threshold())
129 && hnsw_config.resize_factor == Some(default_resize_factor())
130}
131
132#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
139#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
140pub struct ValueTypes {
141 #[serde(
142 rename = "string",
143 alias = "#string",
144 skip_serializing_if = "Option::is_none"
145 )] pub string: Option<StringValueType>,
147
148 #[serde(
149 rename = "float_list",
150 alias = "#float_list",
151 skip_serializing_if = "Option::is_none"
152 )]
153 pub float_list: Option<FloatListValueType>,
155
156 #[serde(
157 rename = "sparse_vector",
158 alias = "#sparse_vector",
159 skip_serializing_if = "Option::is_none"
160 )]
161 pub sparse_vector: Option<SparseVectorValueType>,
163
164 #[serde(
165 rename = "int",
166 alias = "#int",
167 skip_serializing_if = "Option::is_none"
168 )] pub int: Option<IntValueType>,
170
171 #[serde(
172 rename = "float",
173 alias = "#float",
174 skip_serializing_if = "Option::is_none"
175 )] pub float: Option<FloatValueType>,
177
178 #[serde(
179 rename = "bool",
180 alias = "#bool",
181 skip_serializing_if = "Option::is_none"
182 )] pub boolean: Option<BoolValueType>,
184}
185
186#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
188#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
189pub struct StringValueType {
190 #[serde(
191 rename = "fts_index",
192 alias = "$fts_index",
193 skip_serializing_if = "Option::is_none"
194 )] pub fts_index: Option<FtsIndexType>,
196
197 #[serde(
198 rename = "string_inverted_index", alias = "$string_inverted_index",
200 skip_serializing_if = "Option::is_none"
201 )]
202 pub string_inverted_index: Option<StringInvertedIndexType>,
203}
204
205#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
207#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
208pub struct FloatListValueType {
209 #[serde(
210 rename = "vector_index",
211 alias = "$vector_index",
212 skip_serializing_if = "Option::is_none"
213 )] pub vector_index: Option<VectorIndexType>,
215}
216
217#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
219#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
220pub struct SparseVectorValueType {
221 #[serde(
222 rename = "sparse_vector_index", alias = "$sparse_vector_index",
224 skip_serializing_if = "Option::is_none"
225 )]
226 pub sparse_vector_index: Option<SparseVectorIndexType>,
227}
228
229#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
231#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
232pub struct IntValueType {
233 #[serde(
234 rename = "int_inverted_index",
235 alias = "$int_inverted_index",
236 skip_serializing_if = "Option::is_none"
237 )]
238 pub int_inverted_index: Option<IntInvertedIndexType>,
240}
241
242#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
244#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
245pub struct FloatValueType {
246 #[serde(
247 rename = "float_inverted_index", alias = "$float_inverted_index",
249 skip_serializing_if = "Option::is_none"
250 )]
251 pub float_inverted_index: Option<FloatInvertedIndexType>,
252}
253
254#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
256#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
257pub struct BoolValueType {
258 #[serde(
259 rename = "bool_inverted_index", alias = "$bool_inverted_index",
261 skip_serializing_if = "Option::is_none"
262 )]
263 pub bool_inverted_index: Option<BoolInvertedIndexType>,
264}
265
266#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
268#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
269pub struct FtsIndexType {
270 pub enabled: bool,
271 pub config: FtsIndexConfig,
272}
273
274#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
275#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
276pub struct VectorIndexType {
277 pub enabled: bool,
278 pub config: VectorIndexConfig,
279}
280
281#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
282#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
283pub struct SparseVectorIndexType {
284 pub enabled: bool,
285 pub config: SparseVectorIndexConfig,
286}
287
288#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
289#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
290pub struct StringInvertedIndexType {
291 pub enabled: bool,
292 pub config: StringInvertedIndexConfig,
293}
294
295#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
296#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
297pub struct IntInvertedIndexType {
298 pub enabled: bool,
299 pub config: IntInvertedIndexConfig,
300}
301
302#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
303#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
304pub struct FloatInvertedIndexType {
305 pub enabled: bool,
306 pub config: FloatInvertedIndexConfig,
307}
308
309#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
310#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
311pub struct BoolInvertedIndexType {
312 pub enabled: bool,
313 pub config: BoolInvertedIndexConfig,
314}
315
316impl Schema {
317 pub fn new_default(default_knn_index: KnnIndex) -> Self {
319 let vector_config = VectorIndexType {
321 enabled: false,
322 config: VectorIndexConfig {
323 space: Some(default_space()),
324 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
325 source_key: None,
326 hnsw: match default_knn_index {
327 KnnIndex::Hnsw => Some(HnswIndexConfig {
328 ef_construction: Some(default_construction_ef()),
329 max_neighbors: Some(default_m()),
330 ef_search: Some(default_search_ef()),
331 num_threads: Some(default_num_threads()),
332 batch_size: Some(default_batch_size()),
333 sync_threshold: Some(default_sync_threshold()),
334 resize_factor: Some(default_resize_factor()),
335 }),
336 KnnIndex::Spann => None,
337 },
338 spann: match default_knn_index {
339 KnnIndex::Hnsw => None,
340 KnnIndex::Spann => Some(SpannIndexConfig {
341 search_nprobe: Some(default_search_nprobe()),
342 search_rng_factor: Some(default_search_rng_factor()),
343 search_rng_epsilon: Some(default_search_rng_epsilon()),
344 nreplica_count: Some(default_nreplica_count()),
345 write_rng_factor: Some(default_write_rng_factor()),
346 write_rng_epsilon: Some(default_write_rng_epsilon()),
347 split_threshold: Some(default_split_threshold()),
348 num_samples_kmeans: Some(default_num_samples_kmeans()),
349 initial_lambda: Some(default_initial_lambda()),
350 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
351 merge_threshold: Some(default_merge_threshold()),
352 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
353 write_nprobe: Some(default_write_nprobe()),
354 ef_construction: Some(default_construction_ef_spann()),
355 ef_search: Some(default_search_ef_spann()),
356 max_neighbors: Some(default_m_spann()),
357 }),
358 },
359 },
360 };
361
362 let defaults = ValueTypes {
364 string: Some(StringValueType {
365 string_inverted_index: Some(StringInvertedIndexType {
366 enabled: true,
367 config: StringInvertedIndexConfig {},
368 }),
369 fts_index: Some(FtsIndexType {
370 enabled: false,
371 config: FtsIndexConfig {},
372 }),
373 }),
374 float: Some(FloatValueType {
375 float_inverted_index: Some(FloatInvertedIndexType {
376 enabled: true,
377 config: FloatInvertedIndexConfig {},
378 }),
379 }),
380 int: Some(IntValueType {
381 int_inverted_index: Some(IntInvertedIndexType {
382 enabled: true,
383 config: IntInvertedIndexConfig {},
384 }),
385 }),
386 boolean: Some(BoolValueType {
387 bool_inverted_index: Some(BoolInvertedIndexType {
388 enabled: true,
389 config: BoolInvertedIndexConfig {},
390 }),
391 }),
392 float_list: Some(FloatListValueType {
393 vector_index: Some(vector_config),
394 }),
395 sparse_vector: Some(SparseVectorValueType {
396 sparse_vector_index: Some(SparseVectorIndexType {
397 enabled: false,
398 config: SparseVectorIndexConfig {
399 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
400 source_key: None,
401 bm25: Some(false),
402 },
403 }),
404 }),
405 };
406
407 let mut keys = HashMap::new();
409
410 let embedding_defaults = ValueTypes {
412 float_list: Some(FloatListValueType {
413 vector_index: Some(VectorIndexType {
414 enabled: true,
415 config: VectorIndexConfig {
416 space: Some(default_space()),
417 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
418 source_key: Some(DOCUMENT_KEY.to_string()),
419 hnsw: match default_knn_index {
420 KnnIndex::Hnsw => Some(HnswIndexConfig {
421 ef_construction: Some(default_construction_ef()),
422 max_neighbors: Some(default_m()),
423 ef_search: Some(default_search_ef()),
424 num_threads: Some(default_num_threads()),
425 batch_size: Some(default_batch_size()),
426 sync_threshold: Some(default_sync_threshold()),
427 resize_factor: Some(default_resize_factor()),
428 }),
429 KnnIndex::Spann => None,
430 },
431 spann: match default_knn_index {
432 KnnIndex::Hnsw => None,
433 KnnIndex::Spann => Some(SpannIndexConfig {
434 search_nprobe: Some(default_search_nprobe()),
435 search_rng_factor: Some(default_search_rng_factor()),
436 search_rng_epsilon: Some(default_search_rng_epsilon()),
437 nreplica_count: Some(default_nreplica_count()),
438 write_rng_factor: Some(default_write_rng_factor()),
439 write_rng_epsilon: Some(default_write_rng_epsilon()),
440 split_threshold: Some(default_split_threshold()),
441 num_samples_kmeans: Some(default_num_samples_kmeans()),
442 initial_lambda: Some(default_initial_lambda()),
443 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
444 merge_threshold: Some(default_merge_threshold()),
445 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
446 write_nprobe: Some(default_write_nprobe()),
447 ef_construction: Some(default_construction_ef_spann()),
448 ef_search: Some(default_search_ef_spann()),
449 max_neighbors: Some(default_m_spann()),
450 }),
451 },
452 },
453 }),
454 }),
455 ..Default::default()
456 };
457 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
458
459 let document_defaults = ValueTypes {
461 string: Some(StringValueType {
462 fts_index: Some(FtsIndexType {
463 enabled: true,
464 config: FtsIndexConfig {},
465 }),
466 string_inverted_index: Some(StringInvertedIndexType {
467 enabled: false,
468 config: StringInvertedIndexConfig {},
469 }),
470 }),
471 ..Default::default()
472 };
473 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
474
475 Schema { defaults, keys }
476 }
477
478 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
479 let to_internal = |vector_index: &VectorIndexType| {
480 let space = vector_index.config.space.clone();
481 vector_index
482 .config
483 .spann
484 .clone()
485 .map(|config| (space.as_ref(), &config).into())
486 };
487
488 self.keys
489 .get(EMBEDDING_KEY)
490 .and_then(|value_types| value_types.float_list.as_ref())
491 .and_then(|float_list| float_list.vector_index.as_ref())
492 .and_then(to_internal)
493 .or_else(|| {
494 self.defaults
495 .float_list
496 .as_ref()
497 .and_then(|float_list| float_list.vector_index.as_ref())
498 .and_then(to_internal)
499 })
500 }
501
502 pub fn reconcile_with_defaults(user_schema: Option<Schema>) -> Result<Self, String> {
509 let default_schema = Schema::new_default(KnnIndex::Spann);
510
511 match user_schema {
512 Some(user) => {
513 let merged_defaults =
515 Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
516
517 let mut merged_keys = default_schema.keys.clone();
519 for (key, user_value_types) in user.keys {
520 if let Some(default_value_types) = merged_keys.get(&key) {
521 let merged_value_types =
523 Self::merge_value_types(default_value_types, &user_value_types)?;
524 merged_keys.insert(key, merged_value_types);
525 } else {
526 merged_keys.insert(key, user_value_types);
528 }
529 }
530
531 Ok(Schema {
532 defaults: merged_defaults,
533 keys: merged_keys,
534 })
535 }
536 None => Ok(default_schema),
537 }
538 }
539
540 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
542 if self.defaults != other.defaults {
543 return Err(SchemaError::InvalidSchema {
544 reason: "Cannot merge schemas with differing defaults".to_string(),
545 });
546 }
547
548 let mut keys = self.keys.clone();
549
550 for (key, other_value_types) in &other.keys {
551 if let Some(existing) = keys.get(key).cloned() {
552 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
553 keys.insert(key.clone(), merged);
554 } else {
555 keys.insert(key.clone(), other_value_types.clone());
556 }
557 }
558
559 Ok(Schema {
560 defaults: self.defaults.clone(),
561 keys,
562 })
563 }
564
565 fn merge_override_value_types(
566 key: &str,
567 left: &ValueTypes,
568 right: &ValueTypes,
569 ) -> Result<ValueTypes, SchemaError> {
570 Ok(ValueTypes {
571 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
572 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
573 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
574 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
575 float_list: Self::merge_float_list_override(
576 key,
577 left.float_list.as_ref(),
578 right.float_list.as_ref(),
579 )?,
580 sparse_vector: Self::merge_sparse_vector_override(
581 key,
582 left.sparse_vector.as_ref(),
583 right.sparse_vector.as_ref(),
584 )?,
585 })
586 }
587
588 fn merge_string_override(
589 key: &str,
590 left: Option<&StringValueType>,
591 right: Option<&StringValueType>,
592 ) -> Result<Option<StringValueType>, SchemaError> {
593 match (left, right) {
594 (Some(l), Some(r)) => Ok(Some(StringValueType {
595 string_inverted_index: Self::merge_index_or_error(
596 l.string_inverted_index.as_ref(),
597 r.string_inverted_index.as_ref(),
598 &format!("key '{key}' string.string_inverted_index"),
599 )?,
600 fts_index: Self::merge_index_or_error(
601 l.fts_index.as_ref(),
602 r.fts_index.as_ref(),
603 &format!("key '{key}' string.fts_index"),
604 )?,
605 })),
606 (Some(l), None) => Ok(Some(l.clone())),
607 (None, Some(r)) => Ok(Some(r.clone())),
608 (None, None) => Ok(None),
609 }
610 }
611
612 fn merge_float_override(
613 key: &str,
614 left: Option<&FloatValueType>,
615 right: Option<&FloatValueType>,
616 ) -> Result<Option<FloatValueType>, SchemaError> {
617 match (left, right) {
618 (Some(l), Some(r)) => Ok(Some(FloatValueType {
619 float_inverted_index: Self::merge_index_or_error(
620 l.float_inverted_index.as_ref(),
621 r.float_inverted_index.as_ref(),
622 &format!("key '{key}' float.float_inverted_index"),
623 )?,
624 })),
625 (Some(l), None) => Ok(Some(l.clone())),
626 (None, Some(r)) => Ok(Some(r.clone())),
627 (None, None) => Ok(None),
628 }
629 }
630
631 fn merge_int_override(
632 key: &str,
633 left: Option<&IntValueType>,
634 right: Option<&IntValueType>,
635 ) -> Result<Option<IntValueType>, SchemaError> {
636 match (left, right) {
637 (Some(l), Some(r)) => Ok(Some(IntValueType {
638 int_inverted_index: Self::merge_index_or_error(
639 l.int_inverted_index.as_ref(),
640 r.int_inverted_index.as_ref(),
641 &format!("key '{key}' int.int_inverted_index"),
642 )?,
643 })),
644 (Some(l), None) => Ok(Some(l.clone())),
645 (None, Some(r)) => Ok(Some(r.clone())),
646 (None, None) => Ok(None),
647 }
648 }
649
650 fn merge_bool_override(
651 key: &str,
652 left: Option<&BoolValueType>,
653 right: Option<&BoolValueType>,
654 ) -> Result<Option<BoolValueType>, SchemaError> {
655 match (left, right) {
656 (Some(l), Some(r)) => Ok(Some(BoolValueType {
657 bool_inverted_index: Self::merge_index_or_error(
658 l.bool_inverted_index.as_ref(),
659 r.bool_inverted_index.as_ref(),
660 &format!("key '{key}' bool.bool_inverted_index"),
661 )?,
662 })),
663 (Some(l), None) => Ok(Some(l.clone())),
664 (None, Some(r)) => Ok(Some(r.clone())),
665 (None, None) => Ok(None),
666 }
667 }
668
669 fn merge_float_list_override(
670 key: &str,
671 left: Option<&FloatListValueType>,
672 right: Option<&FloatListValueType>,
673 ) -> Result<Option<FloatListValueType>, SchemaError> {
674 match (left, right) {
675 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
676 vector_index: Self::merge_index_or_error(
677 l.vector_index.as_ref(),
678 r.vector_index.as_ref(),
679 &format!("key '{key}' float_list.vector_index"),
680 )?,
681 })),
682 (Some(l), None) => Ok(Some(l.clone())),
683 (None, Some(r)) => Ok(Some(r.clone())),
684 (None, None) => Ok(None),
685 }
686 }
687
688 fn merge_sparse_vector_override(
689 key: &str,
690 left: Option<&SparseVectorValueType>,
691 right: Option<&SparseVectorValueType>,
692 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
693 match (left, right) {
694 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
695 sparse_vector_index: Self::merge_index_or_error(
696 l.sparse_vector_index.as_ref(),
697 r.sparse_vector_index.as_ref(),
698 &format!("key '{key}' sparse_vector.sparse_vector_index"),
699 )?,
700 })),
701 (Some(l), None) => Ok(Some(l.clone())),
702 (None, Some(r)) => Ok(Some(r.clone())),
703 (None, None) => Ok(None),
704 }
705 }
706
707 fn merge_index_or_error<T: Clone + PartialEq>(
708 left: Option<&T>,
709 right: Option<&T>,
710 context: &str,
711 ) -> Result<Option<T>, SchemaError> {
712 match (left, right) {
713 (Some(l), Some(r)) => {
714 if l == r {
715 Ok(Some(l.clone()))
716 } else {
717 Err(SchemaError::InvalidSchema {
718 reason: format!("Conflicting configuration for {context}"),
719 })
720 }
721 }
722 (Some(l), None) => Ok(Some(l.clone())),
723 (None, Some(r)) => Ok(Some(r.clone())),
724 (None, None) => Ok(None),
725 }
726 }
727
728 fn merge_value_types(default: &ValueTypes, user: &ValueTypes) -> Result<ValueTypes, String> {
731 let float_list =
733 Self::merge_float_list_type(default.float_list.as_ref(), user.float_list.as_ref());
734
735 if let Some(ref fl) = float_list {
737 Self::validate_float_list_value_type(fl)?;
738 }
739
740 Ok(ValueTypes {
741 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
742 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
743 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
744 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
745 float_list,
746 sparse_vector: Self::merge_sparse_vector_type(
747 default.sparse_vector.as_ref(),
748 user.sparse_vector.as_ref(),
749 )?,
750 })
751 }
752
753 fn merge_string_type(
755 default: Option<&StringValueType>,
756 user: Option<&StringValueType>,
757 ) -> Result<Option<StringValueType>, String> {
758 match (default, user) {
759 (Some(default), Some(user)) => Ok(Some(StringValueType {
760 string_inverted_index: Self::merge_string_inverted_index_type(
761 default.string_inverted_index.as_ref(),
762 user.string_inverted_index.as_ref(),
763 )?,
764 fts_index: Self::merge_fts_index_type(
765 default.fts_index.as_ref(),
766 user.fts_index.as_ref(),
767 )?,
768 })),
769 (Some(default), None) => Ok(Some(default.clone())),
770 (None, Some(user)) => Ok(Some(user.clone())),
771 (None, None) => Ok(None),
772 }
773 }
774
775 fn merge_float_type(
777 default: Option<&FloatValueType>,
778 user: Option<&FloatValueType>,
779 ) -> Result<Option<FloatValueType>, String> {
780 match (default, user) {
781 (Some(default), Some(user)) => Ok(Some(FloatValueType {
782 float_inverted_index: Self::merge_float_inverted_index_type(
783 default.float_inverted_index.as_ref(),
784 user.float_inverted_index.as_ref(),
785 )?,
786 })),
787 (Some(default), None) => Ok(Some(default.clone())),
788 (None, Some(user)) => Ok(Some(user.clone())),
789 (None, None) => Ok(None),
790 }
791 }
792
793 fn merge_int_type(
795 default: Option<&IntValueType>,
796 user: Option<&IntValueType>,
797 ) -> Result<Option<IntValueType>, String> {
798 match (default, user) {
799 (Some(default), Some(user)) => Ok(Some(IntValueType {
800 int_inverted_index: Self::merge_int_inverted_index_type(
801 default.int_inverted_index.as_ref(),
802 user.int_inverted_index.as_ref(),
803 )?,
804 })),
805 (Some(default), None) => Ok(Some(default.clone())),
806 (None, Some(user)) => Ok(Some(user.clone())),
807 (None, None) => Ok(None),
808 }
809 }
810
811 fn merge_bool_type(
813 default: Option<&BoolValueType>,
814 user: Option<&BoolValueType>,
815 ) -> Result<Option<BoolValueType>, String> {
816 match (default, user) {
817 (Some(default), Some(user)) => Ok(Some(BoolValueType {
818 bool_inverted_index: Self::merge_bool_inverted_index_type(
819 default.bool_inverted_index.as_ref(),
820 user.bool_inverted_index.as_ref(),
821 )?,
822 })),
823 (Some(default), None) => Ok(Some(default.clone())),
824 (None, Some(user)) => Ok(Some(user.clone())),
825 (None, None) => Ok(None),
826 }
827 }
828
829 fn merge_float_list_type(
831 default: Option<&FloatListValueType>,
832 user: Option<&FloatListValueType>,
833 ) -> Option<FloatListValueType> {
834 match (default, user) {
835 (Some(default), Some(user)) => Some(FloatListValueType {
836 vector_index: Self::merge_vector_index_type(
837 default.vector_index.as_ref(),
838 user.vector_index.as_ref(),
839 ),
840 }),
841 (Some(default), None) => Some(default.clone()),
842 (None, Some(user)) => Some(user.clone()),
843 (None, None) => None,
844 }
845 }
846
847 fn merge_sparse_vector_type(
849 default: Option<&SparseVectorValueType>,
850 user: Option<&SparseVectorValueType>,
851 ) -> Result<Option<SparseVectorValueType>, String> {
852 match (default, user) {
853 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
854 sparse_vector_index: Self::merge_sparse_vector_index_type(
855 default.sparse_vector_index.as_ref(),
856 user.sparse_vector_index.as_ref(),
857 )?,
858 })),
859 (Some(default), None) => Ok(Some(default.clone())),
860 (None, Some(user)) => Ok(Some(user.clone())),
861 (None, None) => Ok(None),
862 }
863 }
864
865 fn merge_string_inverted_index_type(
867 default: Option<&StringInvertedIndexType>,
868 user: Option<&StringInvertedIndexType>,
869 ) -> Result<Option<StringInvertedIndexType>, String> {
870 match (default, user) {
871 (Some(_default), Some(user)) => {
872 Ok(Some(StringInvertedIndexType {
873 enabled: user.enabled, config: user.config.clone(), }))
876 }
877 (Some(default), None) => Ok(Some(default.clone())),
878 (None, Some(user)) => Ok(Some(user.clone())),
879 (None, None) => Ok(None),
880 }
881 }
882
883 fn merge_fts_index_type(
884 default: Option<&FtsIndexType>,
885 user: Option<&FtsIndexType>,
886 ) -> Result<Option<FtsIndexType>, String> {
887 match (default, user) {
888 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
889 enabled: user.enabled,
890 config: user.config.clone(),
891 })),
892 (Some(default), None) => Ok(Some(default.clone())),
893 (None, Some(user)) => Ok(Some(user.clone())),
894 (None, None) => Ok(None),
895 }
896 }
897
898 fn merge_float_inverted_index_type(
899 default: Option<&FloatInvertedIndexType>,
900 user: Option<&FloatInvertedIndexType>,
901 ) -> Result<Option<FloatInvertedIndexType>, String> {
902 match (default, user) {
903 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
904 enabled: user.enabled,
905 config: user.config.clone(),
906 })),
907 (Some(default), None) => Ok(Some(default.clone())),
908 (None, Some(user)) => Ok(Some(user.clone())),
909 (None, None) => Ok(None),
910 }
911 }
912
913 fn merge_int_inverted_index_type(
914 default: Option<&IntInvertedIndexType>,
915 user: Option<&IntInvertedIndexType>,
916 ) -> Result<Option<IntInvertedIndexType>, String> {
917 match (default, user) {
918 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
919 enabled: user.enabled,
920 config: user.config.clone(),
921 })),
922 (Some(default), None) => Ok(Some(default.clone())),
923 (None, Some(user)) => Ok(Some(user.clone())),
924 (None, None) => Ok(None),
925 }
926 }
927
928 fn merge_bool_inverted_index_type(
929 default: Option<&BoolInvertedIndexType>,
930 user: Option<&BoolInvertedIndexType>,
931 ) -> Result<Option<BoolInvertedIndexType>, String> {
932 match (default, user) {
933 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
934 enabled: user.enabled,
935 config: user.config.clone(),
936 })),
937 (Some(default), None) => Ok(Some(default.clone())),
938 (None, Some(user)) => Ok(Some(user.clone())),
939 (None, None) => Ok(None),
940 }
941 }
942
943 fn merge_vector_index_type(
944 default: Option<&VectorIndexType>,
945 user: Option<&VectorIndexType>,
946 ) -> Option<VectorIndexType> {
947 match (default, user) {
948 (Some(default), Some(user)) => Some(VectorIndexType {
949 enabled: user.enabled,
950 config: Self::merge_vector_index_config(&default.config, &user.config),
951 }),
952 (Some(default), None) => Some(default.clone()),
953 (None, Some(user)) => Some(user.clone()),
954 (None, None) => None,
955 }
956 }
957
958 fn merge_sparse_vector_index_type(
959 default: Option<&SparseVectorIndexType>,
960 user: Option<&SparseVectorIndexType>,
961 ) -> Result<Option<SparseVectorIndexType>, String> {
962 match (default, user) {
963 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
964 enabled: user.enabled,
965 config: Self::merge_sparse_vector_index_config(&default.config, &user.config)?,
966 })),
967 (Some(default), None) => Ok(Some(default.clone())),
968 (None, Some(user)) => Ok(Some(user.clone())),
969 (None, None) => Ok(None),
970 }
971 }
972
973 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), String> {
976 if let Some(vector_index) = &float_list.vector_index {
977 if let Some(hnsw) = &vector_index.config.hnsw {
978 hnsw.validate()
979 .map_err(|e| format!("Invalid HNSW configuration: {}", e))?;
980 }
981 if let Some(spann) = &vector_index.config.spann {
982 spann
983 .validate()
984 .map_err(|e| format!("Invalid SPANN configuration: {}", e))?;
985 }
986 }
987 Ok(())
988 }
989
990 fn merge_vector_index_config(
992 default: &VectorIndexConfig,
993 user: &VectorIndexConfig,
994 ) -> VectorIndexConfig {
995 VectorIndexConfig {
996 space: user.space.clone().or(default.space.clone()),
997 embedding_function: user
998 .embedding_function
999 .clone()
1000 .or(default.embedding_function.clone()),
1001 source_key: user.source_key.clone().or(default.source_key.clone()),
1002 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1003 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1004 }
1005 }
1006
1007 fn merge_sparse_vector_index_config(
1009 default: &SparseVectorIndexConfig,
1010 user: &SparseVectorIndexConfig,
1011 ) -> Result<SparseVectorIndexConfig, String> {
1012 Ok(SparseVectorIndexConfig {
1013 embedding_function: user
1014 .embedding_function
1015 .clone()
1016 .or(default.embedding_function.clone()),
1017 source_key: user.source_key.clone().or(default.source_key.clone()),
1018 bm25: user.bm25.or(default.bm25),
1019 })
1020 }
1021
1022 fn merge_hnsw_configs(
1024 default_hnsw: Option<&HnswIndexConfig>,
1025 user_hnsw: Option<&HnswIndexConfig>,
1026 ) -> Option<HnswIndexConfig> {
1027 match (default_hnsw, user_hnsw) {
1028 (Some(default), Some(user)) => Some(HnswIndexConfig {
1029 ef_construction: user.ef_construction.or(default.ef_construction),
1030 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1031 ef_search: user.ef_search.or(default.ef_search),
1032 num_threads: user.num_threads.or(default.num_threads),
1033 batch_size: user.batch_size.or(default.batch_size),
1034 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1035 resize_factor: user.resize_factor.or(default.resize_factor),
1036 }),
1037 (Some(default), None) => Some(default.clone()),
1038 (None, Some(user)) => Some(user.clone()),
1039 (None, None) => None,
1040 }
1041 }
1042
1043 fn merge_spann_configs(
1045 default_spann: Option<&SpannIndexConfig>,
1046 user_spann: Option<&SpannIndexConfig>,
1047 ) -> Option<SpannIndexConfig> {
1048 match (default_spann, user_spann) {
1049 (Some(default), Some(user)) => Some(SpannIndexConfig {
1050 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1051 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1052 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1053 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1054 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1055 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1056 split_threshold: user.split_threshold.or(default.split_threshold),
1057 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1058 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1059 reassign_neighbor_count: user
1060 .reassign_neighbor_count
1061 .or(default.reassign_neighbor_count),
1062 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1063 num_centers_to_merge_to: user
1064 .num_centers_to_merge_to
1065 .or(default.num_centers_to_merge_to),
1066 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1067 ef_construction: user.ef_construction.or(default.ef_construction),
1068 ef_search: user.ef_search.or(default.ef_search),
1069 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1070 }),
1071 (Some(default), None) => Some(default.clone()),
1072 (None, Some(user)) => Some(user.clone()),
1073 (None, None) => None,
1074 }
1075 }
1076
1077 pub fn reconcile_with_collection_config(
1084 schema: Schema,
1085 collection_config: InternalCollectionConfiguration,
1086 ) -> Result<Schema, String> {
1087 if collection_config.is_default() {
1089 return Ok(schema);
1091 }
1092
1093 if !Self::is_schema_default(&schema) {
1095 return Err(
1097 "Cannot set both collection config and schema at the same time".to_string(),
1098 );
1099 }
1100
1101 Self::convert_collection_config_to_schema(collection_config)
1103 }
1104
1105 pub fn reconcile_schema_and_config(
1106 schema: Option<Schema>,
1107 configuration: Option<InternalCollectionConfiguration>,
1108 ) -> Result<Schema, String> {
1109 let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1110 if let Some(config) = configuration {
1111 Self::reconcile_with_collection_config(reconciled_schema, config)
1112 } else {
1113 Ok(reconciled_schema)
1114 }
1115 }
1116
1117 fn is_schema_default(schema: &Schema) -> bool {
1119 let default_hnsw = Schema::new_default(KnnIndex::Hnsw);
1121 let default_spann = Schema::new_default(KnnIndex::Spann);
1122
1123 schema == &default_hnsw || schema == &default_spann
1124 }
1125
1126 fn convert_collection_config_to_schema(
1128 collection_config: InternalCollectionConfiguration,
1129 ) -> Result<Schema, String> {
1130 let mut schema = Schema::new_default(KnnIndex::Spann); let vector_config = match collection_config.vector_index {
1135 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1136 space: Some(hnsw_config.space),
1137 embedding_function: collection_config.embedding_function,
1138 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: Some(HnswIndexConfig {
1140 ef_construction: Some(hnsw_config.ef_construction),
1141 max_neighbors: Some(hnsw_config.max_neighbors),
1142 ef_search: Some(hnsw_config.ef_search),
1143 num_threads: Some(hnsw_config.num_threads),
1144 batch_size: Some(hnsw_config.batch_size),
1145 sync_threshold: Some(hnsw_config.sync_threshold),
1146 resize_factor: Some(hnsw_config.resize_factor),
1147 }),
1148 spann: None,
1149 },
1150 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1151 space: Some(spann_config.space),
1152 embedding_function: collection_config.embedding_function,
1153 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: None,
1155 spann: Some(SpannIndexConfig {
1156 search_nprobe: Some(spann_config.search_nprobe),
1157 search_rng_factor: Some(spann_config.search_rng_factor),
1158 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1159 nreplica_count: Some(spann_config.nreplica_count),
1160 write_rng_factor: Some(spann_config.write_rng_factor),
1161 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1162 split_threshold: Some(spann_config.split_threshold),
1163 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1164 initial_lambda: Some(spann_config.initial_lambda),
1165 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1166 merge_threshold: Some(spann_config.merge_threshold),
1167 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1168 write_nprobe: Some(spann_config.write_nprobe),
1169 ef_construction: Some(spann_config.ef_construction),
1170 ef_search: Some(spann_config.ef_search),
1171 max_neighbors: Some(spann_config.max_neighbors),
1172 }),
1173 },
1174 };
1175
1176 if let Some(float_list) = &mut schema.defaults.float_list {
1179 if let Some(vector_index) = &mut float_list.vector_index {
1180 vector_index.config = vector_config.clone();
1181 }
1182 }
1183
1184 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1187 if let Some(float_list) = &mut embedding_types.float_list {
1188 if let Some(vector_index) = &mut float_list.vector_index {
1189 vector_index.config = vector_config;
1190 }
1191 }
1192 }
1193
1194 Ok(schema)
1195 }
1196
1197 pub fn is_metadata_type_index_enabled(
1199 &self,
1200 key: &str,
1201 value_type: MetadataValueType,
1202 ) -> Result<bool, SchemaError> {
1203 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1204
1205 match value_type {
1206 MetadataValueType::Bool => match &v_type.boolean {
1207 Some(bool_type) => match &bool_type.bool_inverted_index {
1208 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1209 None => Err(SchemaError::MissingIndexConfiguration {
1210 key: key.to_string(),
1211 value_type: "bool".to_string(),
1212 }),
1213 },
1214 None => match &self.defaults.boolean {
1215 Some(bool_type) => match &bool_type.bool_inverted_index {
1216 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1217 None => Err(SchemaError::MissingIndexConfiguration {
1218 key: key.to_string(),
1219 value_type: "bool".to_string(),
1220 }),
1221 },
1222 None => Err(SchemaError::MissingIndexConfiguration {
1223 key: key.to_string(),
1224 value_type: "bool".to_string(),
1225 }),
1226 },
1227 },
1228 MetadataValueType::Int => match &v_type.int {
1229 Some(int_type) => match &int_type.int_inverted_index {
1230 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1231 None => Err(SchemaError::MissingIndexConfiguration {
1232 key: key.to_string(),
1233 value_type: "int".to_string(),
1234 }),
1235 },
1236 None => match &self.defaults.int {
1237 Some(int_type) => match &int_type.int_inverted_index {
1238 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1239 None => Err(SchemaError::MissingIndexConfiguration {
1240 key: key.to_string(),
1241 value_type: "int".to_string(),
1242 }),
1243 },
1244 None => Err(SchemaError::MissingIndexConfiguration {
1245 key: key.to_string(),
1246 value_type: "int".to_string(),
1247 }),
1248 },
1249 },
1250 MetadataValueType::Float => match &v_type.float {
1251 Some(float_type) => match &float_type.float_inverted_index {
1252 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1253 None => Err(SchemaError::MissingIndexConfiguration {
1254 key: key.to_string(),
1255 value_type: "float".to_string(),
1256 }),
1257 },
1258 None => match &self.defaults.float {
1259 Some(float_type) => match &float_type.float_inverted_index {
1260 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1261 None => Err(SchemaError::MissingIndexConfiguration {
1262 key: key.to_string(),
1263 value_type: "float".to_string(),
1264 }),
1265 },
1266 None => Err(SchemaError::MissingIndexConfiguration {
1267 key: key.to_string(),
1268 value_type: "float".to_string(),
1269 }),
1270 },
1271 },
1272 MetadataValueType::Str => match &v_type.string {
1273 Some(string_type) => match &string_type.string_inverted_index {
1274 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1275 None => Err(SchemaError::MissingIndexConfiguration {
1276 key: key.to_string(),
1277 value_type: "string".to_string(),
1278 }),
1279 },
1280 None => match &self.defaults.string {
1281 Some(string_type) => match &string_type.string_inverted_index {
1282 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1283 None => Err(SchemaError::MissingIndexConfiguration {
1284 key: key.to_string(),
1285 value_type: "string".to_string(),
1286 }),
1287 },
1288 None => Err(SchemaError::MissingIndexConfiguration {
1289 key: key.to_string(),
1290 value_type: "string".to_string(),
1291 }),
1292 },
1293 },
1294 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1295 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1296 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1297 None => Err(SchemaError::MissingIndexConfiguration {
1298 key: key.to_string(),
1299 value_type: "sparse_vector".to_string(),
1300 }),
1301 },
1302 None => match &self.defaults.sparse_vector {
1303 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1304 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1305 None => Err(SchemaError::MissingIndexConfiguration {
1306 key: key.to_string(),
1307 value_type: "sparse_vector".to_string(),
1308 }),
1309 },
1310 None => Err(SchemaError::MissingIndexConfiguration {
1311 key: key.to_string(),
1312 value_type: "sparse_vector".to_string(),
1313 }),
1314 },
1315 },
1316 }
1317 }
1318
1319 pub fn is_metadata_where_indexing_enabled(
1320 &self,
1321 where_clause: &Where,
1322 ) -> Result<(), FilterValidationError> {
1323 match where_clause {
1324 Where::Composite(composite) => {
1325 for child in &composite.children {
1326 self.is_metadata_where_indexing_enabled(child)?;
1327 }
1328 Ok(())
1329 }
1330 Where::Document(_) => Ok(()),
1331 Where::Metadata(expression) => {
1332 let value_type = match &expression.comparison {
1333 MetadataComparison::Primitive(_, value) => value.value_type(),
1334 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1335 };
1336 let is_enabled = self
1337 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1338 .map_err(FilterValidationError::Schema)?;
1339 if !is_enabled {
1340 return Err(FilterValidationError::IndexingDisabled {
1341 key: expression.key.clone(),
1342 value_type,
1343 });
1344 }
1345 Ok(())
1346 }
1347 }
1348 }
1349
1350 pub fn is_knn_key_indexing_enabled(
1351 &self,
1352 key: &str,
1353 query: &QueryVector,
1354 ) -> Result<(), FilterValidationError> {
1355 match query {
1356 QueryVector::Sparse(_) => {
1357 let is_enabled = self
1358 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1359 .map_err(FilterValidationError::Schema)?;
1360 if !is_enabled {
1361 return Err(FilterValidationError::IndexingDisabled {
1362 key: key.to_string(),
1363 value_type: MetadataValueType::SparseVector,
1364 });
1365 }
1366 Ok(())
1367 }
1368 QueryVector::Dense(_) => {
1369 Ok(())
1372 }
1373 }
1374 }
1375
1376 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1377 let value_types = self.keys.entry(key.to_string()).or_default();
1378 match value_type {
1379 MetadataValueType::Bool => {
1380 if value_types.boolean.is_none() {
1381 value_types.boolean = self.defaults.boolean.clone();
1382 return true;
1383 }
1384 }
1385 MetadataValueType::Int => {
1386 if value_types.int.is_none() {
1387 value_types.int = self.defaults.int.clone();
1388 return true;
1389 }
1390 }
1391 MetadataValueType::Float => {
1392 if value_types.float.is_none() {
1393 value_types.float = self.defaults.float.clone();
1394 return true;
1395 }
1396 }
1397 MetadataValueType::Str => {
1398 if value_types.string.is_none() {
1399 value_types.string = self.defaults.string.clone();
1400 return true;
1401 }
1402 }
1403 MetadataValueType::SparseVector => {
1404 if value_types.sparse_vector.is_none() {
1405 value_types.sparse_vector = self.defaults.sparse_vector.clone();
1406 return true;
1407 }
1408 }
1409 }
1410 false
1411 }
1412}
1413
1414#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1419#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1420#[serde(deny_unknown_fields)]
1421pub struct VectorIndexConfig {
1422 #[serde(skip_serializing_if = "Option::is_none")]
1424 pub space: Option<Space>,
1425 #[serde(skip_serializing_if = "Option::is_none")]
1427 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1428 #[serde(skip_serializing_if = "Option::is_none")]
1430 pub source_key: Option<String>,
1431 #[serde(skip_serializing_if = "Option::is_none")]
1433 pub hnsw: Option<HnswIndexConfig>,
1434 #[serde(skip_serializing_if = "Option::is_none")]
1436 pub spann: Option<SpannIndexConfig>,
1437}
1438
1439#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1441#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1442#[serde(deny_unknown_fields)]
1443pub struct HnswIndexConfig {
1444 #[serde(skip_serializing_if = "Option::is_none")]
1445 pub ef_construction: Option<usize>,
1446 #[serde(skip_serializing_if = "Option::is_none")]
1447 pub max_neighbors: Option<usize>,
1448 #[serde(skip_serializing_if = "Option::is_none")]
1449 pub ef_search: Option<usize>,
1450 #[serde(skip_serializing_if = "Option::is_none")]
1451 pub num_threads: Option<usize>,
1452 #[serde(skip_serializing_if = "Option::is_none")]
1453 #[validate(range(min = 2))]
1454 pub batch_size: Option<usize>,
1455 #[serde(skip_serializing_if = "Option::is_none")]
1456 #[validate(range(min = 2))]
1457 pub sync_threshold: Option<usize>,
1458 #[serde(skip_serializing_if = "Option::is_none")]
1459 pub resize_factor: Option<f64>,
1460}
1461
1462#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
1464#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1465#[serde(deny_unknown_fields)]
1466pub struct SpannIndexConfig {
1467 #[serde(skip_serializing_if = "Option::is_none")]
1468 #[validate(range(max = 128))]
1469 pub search_nprobe: Option<u32>,
1470 #[serde(skip_serializing_if = "Option::is_none")]
1471 #[validate(range(min = 1.0, max = 1.0))]
1472 pub search_rng_factor: Option<f32>,
1473 #[serde(skip_serializing_if = "Option::is_none")]
1474 #[validate(range(min = 5.0, max = 10.0))]
1475 pub search_rng_epsilon: Option<f32>,
1476 #[serde(skip_serializing_if = "Option::is_none")]
1477 #[validate(range(max = 8))]
1478 pub nreplica_count: Option<u32>,
1479 #[serde(skip_serializing_if = "Option::is_none")]
1480 #[validate(range(min = 1.0, max = 1.0))]
1481 pub write_rng_factor: Option<f32>,
1482 #[serde(skip_serializing_if = "Option::is_none")]
1483 #[validate(range(min = 5.0, max = 10.0))]
1484 pub write_rng_epsilon: Option<f32>,
1485 #[serde(skip_serializing_if = "Option::is_none")]
1486 #[validate(range(min = 50, max = 200))]
1487 pub split_threshold: Option<u32>,
1488 #[serde(skip_serializing_if = "Option::is_none")]
1489 #[validate(range(max = 1000))]
1490 pub num_samples_kmeans: Option<usize>,
1491 #[serde(skip_serializing_if = "Option::is_none")]
1492 #[validate(range(min = 100.0, max = 100.0))]
1493 pub initial_lambda: Option<f32>,
1494 #[serde(skip_serializing_if = "Option::is_none")]
1495 #[validate(range(max = 64))]
1496 pub reassign_neighbor_count: Option<u32>,
1497 #[serde(skip_serializing_if = "Option::is_none")]
1498 #[validate(range(min = 25, max = 100))]
1499 pub merge_threshold: Option<u32>,
1500 #[serde(skip_serializing_if = "Option::is_none")]
1501 #[validate(range(max = 8))]
1502 pub num_centers_to_merge_to: Option<u32>,
1503 #[serde(skip_serializing_if = "Option::is_none")]
1504 #[validate(range(max = 64))]
1505 pub write_nprobe: Option<u32>,
1506 #[serde(skip_serializing_if = "Option::is_none")]
1507 #[validate(range(max = 200))]
1508 pub ef_construction: Option<usize>,
1509 #[serde(skip_serializing_if = "Option::is_none")]
1510 #[validate(range(max = 200))]
1511 pub ef_search: Option<usize>,
1512 #[serde(skip_serializing_if = "Option::is_none")]
1513 #[validate(range(max = 64))]
1514 pub max_neighbors: Option<usize>,
1515}
1516
1517#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1518#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1519#[serde(deny_unknown_fields)]
1520pub struct SparseVectorIndexConfig {
1521 #[serde(skip_serializing_if = "Option::is_none")]
1523 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1524 #[serde(skip_serializing_if = "Option::is_none")]
1526 pub source_key: Option<String>,
1527 #[serde(skip_serializing_if = "Option::is_none")]
1529 pub bm25: Option<bool>,
1530}
1531
1532#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1533#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1534#[serde(deny_unknown_fields)]
1535pub struct FtsIndexConfig {
1536 }
1538
1539#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1540#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1541#[serde(deny_unknown_fields)]
1542pub struct StringInvertedIndexConfig {
1543 }
1545
1546#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1547#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1548#[serde(deny_unknown_fields)]
1549pub struct IntInvertedIndexConfig {
1550 }
1552
1553#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1554#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1555#[serde(deny_unknown_fields)]
1556pub struct FloatInvertedIndexConfig {
1557 }
1559
1560#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1561#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1562#[serde(deny_unknown_fields)]
1563pub struct BoolInvertedIndexConfig {
1564 }
1566
1567#[cfg(test)]
1568mod tests {
1569 use super::*;
1570 use crate::hnsw_configuration::Space;
1571 use crate::metadata::SparseVector;
1572 use crate::{
1573 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
1574 };
1575 use serde_json::json;
1576
1577 #[test]
1578 fn test_reconcile_with_defaults_none_user_schema() {
1579 let result = Schema::reconcile_with_defaults(None).unwrap();
1581 let expected = Schema::new_default(KnnIndex::Spann);
1582 assert_eq!(result, expected);
1583 }
1584
1585 #[test]
1586 fn test_reconcile_with_defaults_empty_user_schema() {
1587 let user_schema = Schema {
1589 defaults: ValueTypes::default(),
1590 keys: HashMap::new(),
1591 };
1592
1593 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1594 let expected = Schema::new_default(KnnIndex::Spann);
1595 assert_eq!(result, expected);
1596 }
1597
1598 #[test]
1599 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
1600 let mut user_schema = Schema {
1602 defaults: ValueTypes::default(),
1603 keys: HashMap::new(),
1604 };
1605
1606 user_schema.defaults.string = Some(StringValueType {
1607 string_inverted_index: Some(StringInvertedIndexType {
1608 enabled: false, config: StringInvertedIndexConfig {},
1610 }),
1611 fts_index: None,
1612 });
1613
1614 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1615
1616 assert!(
1618 !result
1619 .defaults
1620 .string
1621 .as_ref()
1622 .unwrap()
1623 .string_inverted_index
1624 .as_ref()
1625 .unwrap()
1626 .enabled
1627 );
1628 assert!(result.defaults.float.is_some());
1630 assert!(result.defaults.int.is_some());
1631 }
1632
1633 #[test]
1634 fn test_reconcile_with_defaults_user_overrides_vector_config() {
1635 let mut user_schema = Schema {
1637 defaults: ValueTypes::default(),
1638 keys: HashMap::new(),
1639 };
1640
1641 user_schema.defaults.float_list = Some(FloatListValueType {
1642 vector_index: Some(VectorIndexType {
1643 enabled: true, config: VectorIndexConfig {
1645 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
1649 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
1653 batch_size: None,
1654 sync_threshold: None,
1655 resize_factor: None,
1656 }),
1657 spann: None,
1658 },
1659 }),
1660 });
1661
1662 let result = {
1664 let default_schema = Schema::new_default(KnnIndex::Hnsw);
1665 let merged_defaults =
1666 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
1667 let mut merged_keys = default_schema.keys.clone();
1668 for (key, user_value_types) in user_schema.keys {
1669 if let Some(default_value_types) = merged_keys.get(&key) {
1670 let merged_value_types =
1671 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
1672 merged_keys.insert(key, merged_value_types);
1673 } else {
1674 merged_keys.insert(key, user_value_types);
1675 }
1676 }
1677 Schema {
1678 defaults: merged_defaults,
1679 keys: merged_keys,
1680 }
1681 };
1682
1683 let vector_config = &result
1684 .defaults
1685 .float_list
1686 .as_ref()
1687 .unwrap()
1688 .vector_index
1689 .as_ref()
1690 .unwrap()
1691 .config;
1692
1693 assert_eq!(vector_config.space, Some(Space::L2));
1695 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
1696 assert_eq!(
1697 vector_config.hnsw.as_ref().unwrap().ef_construction,
1698 Some(500)
1699 );
1700
1701 assert_eq!(
1703 vector_config.embedding_function,
1704 Some(EmbeddingFunctionConfiguration::Legacy)
1705 );
1706 assert_eq!(
1708 vector_config.hnsw.as_ref().unwrap().max_neighbors,
1709 Some(default_m())
1710 );
1711 }
1712
1713 #[test]
1714 fn test_reconcile_with_defaults_keys() {
1715 let mut user_schema = Schema {
1717 defaults: ValueTypes::default(),
1718 keys: HashMap::new(),
1719 };
1720
1721 let custom_key_types = ValueTypes {
1723 string: Some(StringValueType {
1724 fts_index: Some(FtsIndexType {
1725 enabled: true,
1726 config: FtsIndexConfig {},
1727 }),
1728 string_inverted_index: Some(StringInvertedIndexType {
1729 enabled: false,
1730 config: StringInvertedIndexConfig {},
1731 }),
1732 }),
1733 ..Default::default()
1734 };
1735 user_schema
1736 .keys
1737 .insert("custom_key".to_string(), custom_key_types);
1738
1739 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1740
1741 assert!(result.keys.contains_key(EMBEDDING_KEY));
1743 assert!(result.keys.contains_key(DOCUMENT_KEY));
1744
1745 assert!(result.keys.contains_key("custom_key"));
1747 let custom_override = result.keys.get("custom_key").unwrap();
1748 assert!(
1749 custom_override
1750 .string
1751 .as_ref()
1752 .unwrap()
1753 .fts_index
1754 .as_ref()
1755 .unwrap()
1756 .enabled
1757 );
1758 }
1759
1760 #[test]
1761 fn test_reconcile_with_defaults_override_existing_key() {
1762 let mut user_schema = Schema {
1764 defaults: ValueTypes::default(),
1765 keys: HashMap::new(),
1766 };
1767
1768 let embedding_override = ValueTypes {
1770 float_list: Some(FloatListValueType {
1771 vector_index: Some(VectorIndexType {
1772 enabled: false, config: VectorIndexConfig {
1774 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1776 source_key: Some("custom_embedding_key".to_string()),
1777 hnsw: None,
1778 spann: None,
1779 },
1780 }),
1781 }),
1782 ..Default::default()
1783 };
1784 user_schema
1785 .keys
1786 .insert(EMBEDDING_KEY.to_string(), embedding_override);
1787
1788 let result = Schema::reconcile_with_defaults(Some(user_schema)).unwrap();
1789
1790 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
1791 let vector_config = &embedding_config
1792 .float_list
1793 .as_ref()
1794 .unwrap()
1795 .vector_index
1796 .as_ref()
1797 .unwrap();
1798
1799 assert!(!vector_config.enabled);
1801 assert_eq!(vector_config.config.space, Some(Space::Ip));
1802 assert_eq!(
1803 vector_config.config.source_key,
1804 Some("custom_embedding_key".to_string())
1805 );
1806 }
1807
1808 #[test]
1809 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
1810 let collection_config = InternalCollectionConfiguration {
1811 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
1812 space: Space::Cosine,
1813 ef_construction: 128,
1814 ef_search: 96,
1815 max_neighbors: 42,
1816 num_threads: 8,
1817 resize_factor: 1.5,
1818 sync_threshold: 2_000,
1819 batch_size: 256,
1820 }),
1821 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1822 EmbeddingFunctionNewConfiguration {
1823 name: "custom".to_string(),
1824 config: json!({"alpha": 1}),
1825 },
1826 )),
1827 };
1828
1829 let schema =
1830 Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1831 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1832
1833 assert_eq!(reconstructed, collection_config);
1834 }
1835
1836 #[test]
1837 fn test_convert_schema_to_collection_config_spann_roundtrip() {
1838 let spann_config = InternalSpannConfiguration {
1839 space: Space::Cosine,
1840 search_nprobe: 11,
1841 search_rng_factor: 1.7,
1842 write_nprobe: 5,
1843 nreplica_count: 3,
1844 split_threshold: 150,
1845 merge_threshold: 80,
1846 ef_construction: 120,
1847 ef_search: 90,
1848 max_neighbors: 40,
1849 ..Default::default()
1850 };
1851
1852 let collection_config = InternalCollectionConfiguration {
1853 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
1854 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1855 EmbeddingFunctionNewConfiguration {
1856 name: "custom".to_string(),
1857 config: json!({"beta": true}),
1858 },
1859 )),
1860 };
1861
1862 let schema =
1863 Schema::convert_collection_config_to_schema(collection_config.clone()).unwrap();
1864 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
1865
1866 assert_eq!(reconstructed, collection_config);
1867 }
1868
1869 #[test]
1870 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
1871 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1872 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
1873 if let Some(float_list) = &mut embedding.float_list {
1874 if let Some(vector_index) = &mut float_list.vector_index {
1875 vector_index.config.spann = Some(SpannIndexConfig {
1876 search_nprobe: Some(1),
1877 search_rng_factor: Some(1.0),
1878 search_rng_epsilon: Some(0.1),
1879 nreplica_count: Some(1),
1880 write_rng_factor: Some(1.0),
1881 write_rng_epsilon: Some(0.1),
1882 split_threshold: Some(100),
1883 num_samples_kmeans: Some(10),
1884 initial_lambda: Some(0.5),
1885 reassign_neighbor_count: Some(10),
1886 merge_threshold: Some(50),
1887 num_centers_to_merge_to: Some(3),
1888 write_nprobe: Some(1),
1889 ef_construction: Some(50),
1890 ef_search: Some(40),
1891 max_neighbors: Some(20),
1892 });
1893 }
1894 }
1895 }
1896
1897 let result = InternalCollectionConfiguration::try_from(&schema);
1898 assert!(result.is_err());
1899 }
1900
1901 #[test]
1902 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
1903 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1904 let before = schema.clone();
1905 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
1906 assert!(!modified);
1907 assert_eq!(schema, before);
1908 }
1909
1910 #[test]
1911 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
1912 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1913 assert!(!schema.keys.contains_key("custom_field"));
1914
1915 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1916
1917 assert!(modified);
1918 let entry = schema
1919 .keys
1920 .get("custom_field")
1921 .expect("expected new key override to be inserted");
1922 assert_eq!(entry.boolean, schema.defaults.boolean);
1923 assert!(entry.string.is_none());
1924 assert!(entry.int.is_none());
1925 assert!(entry.float.is_none());
1926 assert!(entry.float_list.is_none());
1927 assert!(entry.sparse_vector.is_none());
1928 }
1929
1930 #[test]
1931 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
1932 let mut schema = Schema::new_default(KnnIndex::Hnsw);
1933 let initial_len = schema.keys.len();
1934 schema.keys.insert(
1935 "custom_field".to_string(),
1936 ValueTypes {
1937 string: schema.defaults.string.clone(),
1938 ..Default::default()
1939 },
1940 );
1941
1942 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1943
1944 assert!(modified);
1945 assert_eq!(schema.keys.len(), initial_len + 1);
1946 let entry = schema
1947 .keys
1948 .get("custom_field")
1949 .expect("expected key override to exist after ensure call");
1950 assert!(entry.string.is_some());
1951 assert_eq!(entry.boolean, schema.defaults.boolean);
1952 }
1953
1954 #[test]
1955 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
1956 let schema = Schema::new_default(KnnIndex::Spann);
1957 let result = schema.is_knn_key_indexing_enabled(
1958 "custom_sparse",
1959 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1960 );
1961
1962 let err = result.expect_err("expected indexing disabled error");
1963 match err {
1964 FilterValidationError::IndexingDisabled { key, value_type } => {
1965 assert_eq!(key, "custom_sparse");
1966 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
1967 }
1968 other => panic!("unexpected error variant: {other:?}"),
1969 }
1970 }
1971
1972 #[test]
1973 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
1974 let mut schema = Schema::new_default(KnnIndex::Spann);
1975 schema.keys.insert(
1976 "sparse_enabled".to_string(),
1977 ValueTypes {
1978 sparse_vector: Some(SparseVectorValueType {
1979 sparse_vector_index: Some(SparseVectorIndexType {
1980 enabled: true,
1981 config: SparseVectorIndexConfig {
1982 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1983 source_key: None,
1984 bm25: None,
1985 },
1986 }),
1987 }),
1988 ..Default::default()
1989 },
1990 );
1991
1992 let result = schema.is_knn_key_indexing_enabled(
1993 "sparse_enabled",
1994 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1995 );
1996
1997 assert!(result.is_ok());
1998 }
1999
2000 #[test]
2001 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
2002 let schema = Schema::new_default(KnnIndex::Spann);
2003 let result = schema.is_knn_key_indexing_enabled(
2004 EMBEDDING_KEY,
2005 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
2006 );
2007
2008 assert!(result.is_ok());
2009 }
2010
2011 #[test]
2012 fn test_merge_hnsw_configs_field_level() {
2013 let default_hnsw = HnswIndexConfig {
2015 ef_construction: Some(200),
2016 max_neighbors: Some(16),
2017 ef_search: Some(10),
2018 num_threads: Some(4),
2019 batch_size: Some(100),
2020 sync_threshold: Some(1000),
2021 resize_factor: Some(1.2),
2022 };
2023
2024 let user_hnsw = HnswIndexConfig {
2025 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
2033
2034 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
2035
2036 assert_eq!(result.ef_construction, Some(300));
2038 assert_eq!(result.ef_search, Some(20));
2039 assert_eq!(result.sync_threshold, Some(2000));
2040
2041 assert_eq!(result.max_neighbors, Some(16));
2043 assert_eq!(result.num_threads, Some(4));
2044 assert_eq!(result.batch_size, Some(100));
2045 assert_eq!(result.resize_factor, Some(1.2));
2046 }
2047
2048 #[test]
2049 fn test_merge_spann_configs_field_level() {
2050 let default_spann = SpannIndexConfig {
2052 search_nprobe: Some(10),
2053 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
2056 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
2060 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
2062 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
2065 ef_construction: Some(100),
2066 ef_search: Some(10),
2067 max_neighbors: Some(16),
2068 };
2069
2070 let user_spann = SpannIndexConfig {
2071 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
2076 write_rng_epsilon: None,
2077 split_threshold: Some(150), num_samples_kmeans: None,
2079 initial_lambda: None,
2080 reassign_neighbor_count: None,
2081 merge_threshold: None,
2082 num_centers_to_merge_to: None,
2083 write_nprobe: None,
2084 ef_construction: None,
2085 ef_search: None,
2086 max_neighbors: None,
2087 };
2088
2089 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
2090
2091 assert_eq!(result.search_nprobe, Some(20));
2093 assert_eq!(result.search_rng_epsilon, Some(8.0));
2094 assert_eq!(result.split_threshold, Some(150));
2095
2096 assert_eq!(result.search_rng_factor, Some(1.0));
2098 assert_eq!(result.nreplica_count, Some(3));
2099 assert_eq!(result.initial_lambda, Some(100.0));
2100 }
2101
2102 #[test]
2103 fn test_spann_index_config_into_internal_configuration() {
2104 let config = SpannIndexConfig {
2105 search_nprobe: Some(33),
2106 search_rng_factor: Some(1.2),
2107 search_rng_epsilon: None,
2108 nreplica_count: None,
2109 write_rng_factor: Some(1.5),
2110 write_rng_epsilon: None,
2111 split_threshold: Some(75),
2112 num_samples_kmeans: None,
2113 initial_lambda: Some(0.9),
2114 reassign_neighbor_count: Some(40),
2115 merge_threshold: None,
2116 num_centers_to_merge_to: Some(4),
2117 write_nprobe: Some(60),
2118 ef_construction: Some(180),
2119 ef_search: Some(170),
2120 max_neighbors: Some(32),
2121 };
2122
2123 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
2124 assert_eq!(with_space.space, Space::Cosine);
2125 assert_eq!(with_space.search_nprobe, 33);
2126 assert_eq!(with_space.search_rng_factor, 1.2);
2127 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2128 assert_eq!(with_space.write_rng_factor, 1.5);
2129 assert_eq!(with_space.write_nprobe, 60);
2130 assert_eq!(with_space.ef_construction, 180);
2131 assert_eq!(with_space.ef_search, 170);
2132 assert_eq!(with_space.max_neighbors, 32);
2133 assert_eq!(with_space.merge_threshold, default_merge_threshold());
2134
2135 let default_space_config: InternalSpannConfiguration = (None, &config).into();
2136 assert_eq!(default_space_config.space, default_space());
2137 }
2138
2139 #[test]
2140 fn test_merge_string_type_combinations() {
2141 let default = StringValueType {
2145 string_inverted_index: Some(StringInvertedIndexType {
2146 enabled: true,
2147 config: StringInvertedIndexConfig {},
2148 }),
2149 fts_index: Some(FtsIndexType {
2150 enabled: false,
2151 config: FtsIndexConfig {},
2152 }),
2153 };
2154
2155 let user = StringValueType {
2156 string_inverted_index: Some(StringInvertedIndexType {
2157 enabled: false, config: StringInvertedIndexConfig {},
2159 }),
2160 fts_index: None, };
2162
2163 let result = Schema::merge_string_type(Some(&default), Some(&user))
2164 .unwrap()
2165 .unwrap();
2166 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
2171 .unwrap()
2172 .unwrap();
2173 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2174
2175 let result = Schema::merge_string_type(None, Some(&user))
2177 .unwrap()
2178 .unwrap();
2179 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2180
2181 let result = Schema::merge_string_type(None, None).unwrap();
2183 assert!(result.is_none());
2184 }
2185
2186 #[test]
2187 fn test_merge_vector_index_config_comprehensive() {
2188 let default_config = VectorIndexConfig {
2190 space: Some(Space::Cosine),
2191 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2192 source_key: Some("default_key".to_string()),
2193 hnsw: Some(HnswIndexConfig {
2194 ef_construction: Some(200),
2195 max_neighbors: Some(16),
2196 ef_search: Some(10),
2197 num_threads: Some(4),
2198 batch_size: Some(100),
2199 sync_threshold: Some(1000),
2200 resize_factor: Some(1.2),
2201 }),
2202 spann: None,
2203 };
2204
2205 let user_config = VectorIndexConfig {
2206 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
2210 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
2214 batch_size: None,
2215 sync_threshold: None,
2216 resize_factor: None,
2217 }),
2218 spann: Some(SpannIndexConfig {
2219 search_nprobe: Some(15),
2220 search_rng_factor: None,
2221 search_rng_epsilon: None,
2222 nreplica_count: None,
2223 write_rng_factor: None,
2224 write_rng_epsilon: None,
2225 split_threshold: None,
2226 num_samples_kmeans: None,
2227 initial_lambda: None,
2228 reassign_neighbor_count: None,
2229 merge_threshold: None,
2230 num_centers_to_merge_to: None,
2231 write_nprobe: None,
2232 ef_construction: None,
2233 ef_search: None,
2234 max_neighbors: None,
2235 }), };
2237
2238 let result = Schema::merge_vector_index_config(&default_config, &user_config);
2239
2240 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
2243 result.embedding_function,
2244 Some(EmbeddingFunctionConfiguration::Legacy)
2245 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_some());
2254 assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2255 }
2256
2257 #[test]
2258 fn test_merge_sparse_vector_index_config() {
2259 let default_config = SparseVectorIndexConfig {
2261 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2262 source_key: Some("default_sparse_key".to_string()),
2263 bm25: None,
2264 };
2265
2266 let user_config = SparseVectorIndexConfig {
2267 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
2270 };
2271
2272 let result =
2273 Schema::merge_sparse_vector_index_config(&default_config, &user_config).unwrap();
2274
2275 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2277 assert_eq!(
2279 result.embedding_function,
2280 Some(EmbeddingFunctionConfiguration::Legacy)
2281 );
2282 }
2283
2284 #[test]
2285 fn test_complex_nested_merging_scenario() {
2286 let mut user_schema = Schema {
2288 defaults: ValueTypes::default(),
2289 keys: HashMap::new(),
2290 };
2291
2292 user_schema.defaults.string = Some(StringValueType {
2294 string_inverted_index: Some(StringInvertedIndexType {
2295 enabled: false,
2296 config: StringInvertedIndexConfig {},
2297 }),
2298 fts_index: Some(FtsIndexType {
2299 enabled: true,
2300 config: FtsIndexConfig {},
2301 }),
2302 });
2303
2304 user_schema.defaults.float_list = Some(FloatListValueType {
2305 vector_index: Some(VectorIndexType {
2306 enabled: true,
2307 config: VectorIndexConfig {
2308 space: Some(Space::Ip),
2309 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
2311 hnsw: Some(HnswIndexConfig {
2312 ef_construction: Some(400),
2313 max_neighbors: Some(32),
2314 ef_search: None, num_threads: None,
2316 batch_size: None,
2317 sync_threshold: None,
2318 resize_factor: None,
2319 }),
2320 spann: None,
2321 },
2322 }),
2323 });
2324
2325 let custom_key_override = ValueTypes {
2327 string: Some(StringValueType {
2328 fts_index: Some(FtsIndexType {
2329 enabled: true,
2330 config: FtsIndexConfig {},
2331 }),
2332 string_inverted_index: None,
2333 }),
2334 ..Default::default()
2335 };
2336 user_schema
2337 .keys
2338 .insert("custom_field".to_string(), custom_key_override);
2339
2340 let result = {
2342 let default_schema = Schema::new_default(KnnIndex::Hnsw);
2343 let merged_defaults =
2344 Schema::merge_value_types(&default_schema.defaults, &user_schema.defaults).unwrap();
2345 let mut merged_keys = default_schema.keys.clone();
2346 for (key, user_value_types) in user_schema.keys {
2347 if let Some(default_value_types) = merged_keys.get(&key) {
2348 let merged_value_types =
2349 Schema::merge_value_types(default_value_types, &user_value_types).unwrap();
2350 merged_keys.insert(key, merged_value_types);
2351 } else {
2352 merged_keys.insert(key, user_value_types);
2353 }
2354 }
2355 Schema {
2356 defaults: merged_defaults,
2357 keys: merged_keys,
2358 }
2359 };
2360
2361 assert!(
2365 !result
2366 .defaults
2367 .string
2368 .as_ref()
2369 .unwrap()
2370 .string_inverted_index
2371 .as_ref()
2372 .unwrap()
2373 .enabled
2374 );
2375 assert!(
2376 result
2377 .defaults
2378 .string
2379 .as_ref()
2380 .unwrap()
2381 .fts_index
2382 .as_ref()
2383 .unwrap()
2384 .enabled
2385 );
2386
2387 let vector_config = &result
2388 .defaults
2389 .float_list
2390 .as_ref()
2391 .unwrap()
2392 .vector_index
2393 .as_ref()
2394 .unwrap()
2395 .config;
2396 assert_eq!(vector_config.space, Some(Space::Ip));
2397 assert_eq!(
2398 vector_config.embedding_function,
2399 Some(EmbeddingFunctionConfiguration::Legacy)
2400 ); assert_eq!(
2402 vector_config.source_key,
2403 Some("custom_vector_key".to_string())
2404 );
2405 assert_eq!(
2406 vector_config.hnsw.as_ref().unwrap().ef_construction,
2407 Some(400)
2408 );
2409 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2410 assert_eq!(
2411 vector_config.hnsw.as_ref().unwrap().ef_search,
2412 Some(default_search_ef())
2413 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
2421 assert!(
2422 custom_override
2423 .string
2424 .as_ref()
2425 .unwrap()
2426 .fts_index
2427 .as_ref()
2428 .unwrap()
2429 .enabled
2430 );
2431 assert!(custom_override
2432 .string
2433 .as_ref()
2434 .unwrap()
2435 .string_inverted_index
2436 .is_none());
2437 }
2438
2439 #[test]
2440 fn test_reconcile_with_collection_config_default_config() {
2441 let schema = Schema::new_default(KnnIndex::Hnsw);
2443 let collection_config = InternalCollectionConfiguration::default_hnsw();
2444
2445 let result =
2446 Schema::reconcile_with_collection_config(schema.clone(), collection_config).unwrap();
2447 assert_eq!(result, schema);
2448 }
2449
2450 #[test]
2451 fn test_reconcile_with_collection_config_both_non_default() {
2452 let mut schema = Schema::new_default(KnnIndex::Hnsw);
2454 schema.defaults.string = Some(StringValueType {
2455 fts_index: Some(FtsIndexType {
2456 enabled: true,
2457 config: FtsIndexConfig {},
2458 }),
2459 string_inverted_index: None,
2460 });
2461
2462 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
2463 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
2465 {
2466 hnsw_config.ef_construction = 500; }
2468
2469 let result = Schema::reconcile_with_collection_config(schema, collection_config);
2470 assert!(result.is_err());
2471 assert_eq!(
2472 result.unwrap_err(),
2473 "Cannot set both collection config and schema at the same time"
2474 );
2475 }
2476
2477 #[test]
2478 fn test_reconcile_with_collection_config_hnsw_override() {
2479 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
2483 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2484 ef_construction: 300,
2485 max_neighbors: 32,
2486 ef_search: 50,
2487 num_threads: 8,
2488 batch_size: 200,
2489 sync_threshold: 2000,
2490 resize_factor: 1.5,
2491 space: Space::L2,
2492 }),
2493 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2494 };
2495
2496 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2497
2498 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2500 let vector_index = embedding_override
2501 .float_list
2502 .as_ref()
2503 .unwrap()
2504 .vector_index
2505 .as_ref()
2506 .unwrap();
2507
2508 assert!(vector_index.enabled);
2509 assert_eq!(vector_index.config.space, Some(Space::L2));
2510 assert_eq!(
2511 vector_index.config.embedding_function,
2512 Some(EmbeddingFunctionConfiguration::Legacy)
2513 );
2514 assert_eq!(
2515 vector_index.config.source_key,
2516 Some(DOCUMENT_KEY.to_string())
2517 );
2518
2519 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
2520 assert_eq!(hnsw_config.ef_construction, Some(300));
2521 assert_eq!(hnsw_config.max_neighbors, Some(32));
2522 assert_eq!(hnsw_config.ef_search, Some(50));
2523 assert_eq!(hnsw_config.num_threads, Some(8));
2524 assert_eq!(hnsw_config.batch_size, Some(200));
2525 assert_eq!(hnsw_config.sync_threshold, Some(2000));
2526 assert_eq!(hnsw_config.resize_factor, Some(1.5));
2527
2528 assert!(vector_index.config.spann.is_none());
2529 }
2530
2531 #[test]
2532 fn test_reconcile_with_collection_config_spann_override() {
2533 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
2537 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
2538 search_nprobe: 20,
2539 search_rng_factor: 3.0,
2540 search_rng_epsilon: 0.2,
2541 nreplica_count: 5,
2542 write_rng_factor: 2.0,
2543 write_rng_epsilon: 0.1,
2544 split_threshold: 2000,
2545 num_samples_kmeans: 200,
2546 initial_lambda: 0.8,
2547 reassign_neighbor_count: 100,
2548 merge_threshold: 800,
2549 num_centers_to_merge_to: 20,
2550 write_nprobe: 10,
2551 ef_construction: 400,
2552 ef_search: 60,
2553 max_neighbors: 24,
2554 space: Space::Cosine,
2555 }),
2556 embedding_function: None,
2557 };
2558
2559 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2560
2561 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2563 let vector_index = embedding_override
2564 .float_list
2565 .as_ref()
2566 .unwrap()
2567 .vector_index
2568 .as_ref()
2569 .unwrap();
2570
2571 assert!(vector_index.enabled);
2572 assert_eq!(vector_index.config.space, Some(Space::Cosine));
2573 assert_eq!(vector_index.config.embedding_function, None);
2574 assert_eq!(
2575 vector_index.config.source_key,
2576 Some(DOCUMENT_KEY.to_string())
2577 );
2578
2579 assert!(vector_index.config.hnsw.is_none());
2580
2581 let spann_config = vector_index.config.spann.as_ref().unwrap();
2582 assert_eq!(spann_config.search_nprobe, Some(20));
2583 assert_eq!(spann_config.search_rng_factor, Some(3.0));
2584 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
2585 assert_eq!(spann_config.nreplica_count, Some(5));
2586 assert_eq!(spann_config.write_rng_factor, Some(2.0));
2587 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
2588 assert_eq!(spann_config.split_threshold, Some(2000));
2589 assert_eq!(spann_config.num_samples_kmeans, Some(200));
2590 assert_eq!(spann_config.initial_lambda, Some(0.8));
2591 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
2592 assert_eq!(spann_config.merge_threshold, Some(800));
2593 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
2594 assert_eq!(spann_config.write_nprobe, Some(10));
2595 assert_eq!(spann_config.ef_construction, Some(400));
2596 assert_eq!(spann_config.ef_search, Some(60));
2597 assert_eq!(spann_config.max_neighbors, Some(24));
2598 }
2599
2600 #[test]
2601 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
2602 let schema = Schema::new_default(KnnIndex::Hnsw);
2605
2606 let collection_config = InternalCollectionConfiguration {
2607 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2608 ef_construction: 300,
2609 max_neighbors: 32,
2610 ef_search: 50,
2611 num_threads: 8,
2612 batch_size: 200,
2613 sync_threshold: 2000,
2614 resize_factor: 1.5,
2615 space: Space::L2,
2616 }),
2617 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2618 };
2619
2620 let result = Schema::reconcile_with_collection_config(schema, collection_config).unwrap();
2621
2622 let defaults_vector_index = result
2624 .defaults
2625 .float_list
2626 .as_ref()
2627 .unwrap()
2628 .vector_index
2629 .as_ref()
2630 .unwrap();
2631
2632 assert!(!defaults_vector_index.enabled);
2634 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
2636 assert_eq!(
2637 defaults_vector_index.config.embedding_function,
2638 Some(EmbeddingFunctionConfiguration::Legacy)
2639 );
2640 assert_eq!(
2641 defaults_vector_index.config.source_key,
2642 Some(DOCUMENT_KEY.to_string())
2643 );
2644 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
2645 assert_eq!(defaults_hnsw.ef_construction, Some(300));
2646 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
2647
2648 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2650 let embedding_vector_index = embedding_override
2651 .float_list
2652 .as_ref()
2653 .unwrap()
2654 .vector_index
2655 .as_ref()
2656 .unwrap();
2657
2658 assert!(embedding_vector_index.enabled);
2660 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
2662 assert_eq!(
2663 embedding_vector_index.config.embedding_function,
2664 Some(EmbeddingFunctionConfiguration::Legacy)
2665 );
2666 assert_eq!(
2667 embedding_vector_index.config.source_key,
2668 Some(DOCUMENT_KEY.to_string())
2669 );
2670 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
2671 assert_eq!(embedding_hnsw.ef_construction, Some(300));
2672 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
2673 }
2674
2675 #[test]
2676 fn test_is_schema_default() {
2677 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
2679 assert!(Schema::is_schema_default(&default_hnsw_schema));
2680
2681 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
2682 assert!(Schema::is_schema_default(&default_spann_schema));
2683
2684 let empty_schema = Schema {
2686 defaults: ValueTypes::default(),
2687 keys: HashMap::new(),
2688 };
2689 assert!(!Schema::is_schema_default(&empty_schema));
2690
2691 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
2693 if let Some(ref mut string_type) = modified_schema.defaults.string {
2695 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
2696 string_inverted.enabled = false; }
2698 }
2699 assert!(!Schema::is_schema_default(&modified_schema));
2700
2701 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
2703 schema_with_extra_overrides
2704 .keys
2705 .insert("custom_key".to_string(), ValueTypes::default());
2706 assert!(!Schema::is_schema_default(&schema_with_extra_overrides));
2707 }
2708
2709 #[test]
2710 fn test_add_merges_keys_by_value_type() {
2711 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2712 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2713
2714 let string_override = ValueTypes {
2715 string: Some(StringValueType {
2716 string_inverted_index: Some(StringInvertedIndexType {
2717 enabled: true,
2718 config: StringInvertedIndexConfig {},
2719 }),
2720 fts_index: None,
2721 }),
2722 ..Default::default()
2723 };
2724 schema_a
2725 .keys
2726 .insert("custom_field".to_string(), string_override);
2727
2728 let float_override = ValueTypes {
2729 float: Some(FloatValueType {
2730 float_inverted_index: Some(FloatInvertedIndexType {
2731 enabled: true,
2732 config: FloatInvertedIndexConfig {},
2733 }),
2734 }),
2735 ..Default::default()
2736 };
2737 schema_b
2738 .keys
2739 .insert("custom_field".to_string(), float_override);
2740
2741 let merged = schema_a.merge(&schema_b).unwrap();
2742 let merged_override = merged.keys.get("custom_field").unwrap();
2743
2744 assert!(merged_override.string.is_some());
2745 assert!(merged_override.float.is_some());
2746 assert!(
2747 merged_override
2748 .string
2749 .as_ref()
2750 .unwrap()
2751 .string_inverted_index
2752 .as_ref()
2753 .unwrap()
2754 .enabled
2755 );
2756 assert!(
2757 merged_override
2758 .float
2759 .as_ref()
2760 .unwrap()
2761 .float_inverted_index
2762 .as_ref()
2763 .unwrap()
2764 .enabled
2765 );
2766 }
2767
2768 #[test]
2769 fn test_add_rejects_different_defaults() {
2770 let schema_a = Schema::new_default(KnnIndex::Hnsw);
2771 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2772
2773 if let Some(string_type) = schema_b.defaults.string.as_mut() {
2774 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
2775 string_index.enabled = false;
2776 }
2777 }
2778
2779 let err = schema_a.merge(&schema_b).unwrap_err();
2780 match err {
2781 SchemaError::InvalidSchema { reason } => {
2782 assert_eq!(reason, "Cannot merge schemas with differing defaults")
2783 }
2784 _ => panic!("Expected InvalidSchema error"),
2785 }
2786 }
2787
2788 #[test]
2789 fn test_add_detects_conflicting_value_type_configuration() {
2790 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
2791 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
2792
2793 let string_override_enabled = ValueTypes {
2794 string: Some(StringValueType {
2795 string_inverted_index: Some(StringInvertedIndexType {
2796 enabled: true,
2797 config: StringInvertedIndexConfig {},
2798 }),
2799 fts_index: None,
2800 }),
2801 ..Default::default()
2802 };
2803 schema_a
2804 .keys
2805 .insert("custom_field".to_string(), string_override_enabled);
2806
2807 let string_override_disabled = ValueTypes {
2808 string: Some(StringValueType {
2809 string_inverted_index: Some(StringInvertedIndexType {
2810 enabled: false,
2811 config: StringInvertedIndexConfig {},
2812 }),
2813 fts_index: None,
2814 }),
2815 ..Default::default()
2816 };
2817 schema_b
2818 .keys
2819 .insert("custom_field".to_string(), string_override_disabled);
2820
2821 let err = schema_a.merge(&schema_b).unwrap_err();
2822 match err {
2823 SchemaError::InvalidSchema { reason } => {
2824 assert!(reason.contains("Conflicting configuration"));
2825 }
2826 _ => panic!("Expected InvalidSchema error"),
2827 }
2828 }
2829
2830 #[test]
2832 fn test_backward_compatibility_aliases() {
2833 let old_format_json = r###"{
2835 "defaults": {
2836 "#string": {
2837 "$fts_index": {
2838 "enabled": true,
2839 "config": {}
2840 }
2841 },
2842 "#int": {
2843 "$int_inverted_index": {
2844 "enabled": true,
2845 "config": {}
2846 }
2847 },
2848 "#float_list": {
2849 "$vector_index": {
2850 "enabled": true,
2851 "config": {
2852 "spann": {
2853 "search_nprobe": 10
2854 }
2855 }
2856 }
2857 }
2858 },
2859 "key_overrides": {
2860 "#document": {
2861 "#string": {
2862 "$fts_index": {
2863 "enabled": false,
2864 "config": {}
2865 }
2866 }
2867 }
2868 }
2869 }"###;
2870
2871 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
2872
2873 let new_format_json = r###"{
2875 "defaults": {
2876 "string": {
2877 "fts_index": {
2878 "enabled": true,
2879 "config": {}
2880 }
2881 },
2882 "int": {
2883 "int_inverted_index": {
2884 "enabled": true,
2885 "config": {}
2886 }
2887 },
2888 "float_list": {
2889 "vector_index": {
2890 "enabled": true,
2891 "config": {
2892 "spann": {
2893 "search_nprobe": 10
2894 }
2895 }
2896 }
2897 }
2898 },
2899 "keys": {
2900 "#document": {
2901 "string": {
2902 "fts_index": {
2903 "enabled": false,
2904 "config": {}
2905 }
2906 }
2907 }
2908 }
2909 }"###;
2910
2911 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
2912
2913 assert_eq!(schema_from_old, schema_from_new);
2915
2916 assert!(schema_from_old.defaults.string.is_some());
2918 assert!(schema_from_old
2919 .defaults
2920 .string
2921 .as_ref()
2922 .unwrap()
2923 .fts_index
2924 .is_some());
2925 assert!(
2926 schema_from_old
2927 .defaults
2928 .string
2929 .as_ref()
2930 .unwrap()
2931 .fts_index
2932 .as_ref()
2933 .unwrap()
2934 .enabled
2935 );
2936
2937 assert!(schema_from_old.defaults.int.is_some());
2938 assert!(schema_from_old
2939 .defaults
2940 .int
2941 .as_ref()
2942 .unwrap()
2943 .int_inverted_index
2944 .is_some());
2945
2946 assert!(schema_from_old.defaults.float_list.is_some());
2947 assert!(schema_from_old
2948 .defaults
2949 .float_list
2950 .as_ref()
2951 .unwrap()
2952 .vector_index
2953 .is_some());
2954
2955 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
2956 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
2957 assert!(doc_override.string.is_some());
2958 assert!(
2959 !doc_override
2960 .string
2961 .as_ref()
2962 .unwrap()
2963 .fts_index
2964 .as_ref()
2965 .unwrap()
2966 .enabled
2967 );
2968
2969 let serialized = serde_json::to_string(&schema_from_old).unwrap();
2971
2972 assert!(serialized.contains(r#""keys":"#));
2974 assert!(serialized.contains(r#""string":"#));
2975 assert!(serialized.contains(r#""fts_index":"#));
2976 assert!(serialized.contains(r#""int_inverted_index":"#));
2977 assert!(serialized.contains(r#""vector_index":"#));
2978
2979 assert!(!serialized.contains(r#""key_overrides":"#));
2981 assert!(!serialized.contains(r###""#string":"###));
2982 assert!(!serialized.contains(r###""$fts_index":"###));
2983 assert!(!serialized.contains(r###""$int_inverted_index":"###));
2984 assert!(!serialized.contains(r###""$vector_index":"###));
2985 }
2986
2987 #[test]
2988 fn test_hnsw_index_config_validation() {
2989 use validator::Validate;
2990
2991 let valid_config = HnswIndexConfig {
2993 batch_size: Some(10),
2994 sync_threshold: Some(100),
2995 ef_construction: Some(100),
2996 max_neighbors: Some(16),
2997 ..Default::default()
2998 };
2999 assert!(valid_config.validate().is_ok());
3000
3001 let invalid_batch_size = HnswIndexConfig {
3003 batch_size: Some(1),
3004 ..Default::default()
3005 };
3006 assert!(invalid_batch_size.validate().is_err());
3007
3008 let invalid_sync_threshold = HnswIndexConfig {
3010 sync_threshold: Some(1),
3011 ..Default::default()
3012 };
3013 assert!(invalid_sync_threshold.validate().is_err());
3014
3015 let boundary_config = HnswIndexConfig {
3017 batch_size: Some(2),
3018 sync_threshold: Some(2),
3019 ..Default::default()
3020 };
3021 assert!(boundary_config.validate().is_ok());
3022
3023 let all_none_config = HnswIndexConfig {
3025 ..Default::default()
3026 };
3027 assert!(all_none_config.validate().is_ok());
3028
3029 let other_fields_config = HnswIndexConfig {
3031 ef_construction: Some(1),
3032 max_neighbors: Some(1),
3033 ef_search: Some(1),
3034 num_threads: Some(1),
3035 resize_factor: Some(0.1),
3036 ..Default::default()
3037 };
3038 assert!(other_fields_config.validate().is_ok());
3039 }
3040
3041 #[test]
3042 fn test_spann_index_config_validation() {
3043 use validator::Validate;
3044
3045 let valid_config = SpannIndexConfig {
3047 write_nprobe: Some(32),
3048 nreplica_count: Some(4),
3049 split_threshold: Some(100),
3050 merge_threshold: Some(50),
3051 reassign_neighbor_count: Some(32),
3052 num_centers_to_merge_to: Some(4),
3053 ef_construction: Some(100),
3054 ef_search: Some(100),
3055 max_neighbors: Some(32),
3056 search_rng_factor: Some(1.0),
3057 write_rng_factor: Some(1.0),
3058 search_rng_epsilon: Some(7.5),
3059 write_rng_epsilon: Some(7.5),
3060 ..Default::default()
3061 };
3062 assert!(valid_config.validate().is_ok());
3063
3064 let invalid_write_nprobe = SpannIndexConfig {
3066 write_nprobe: Some(200),
3067 ..Default::default()
3068 };
3069 assert!(invalid_write_nprobe.validate().is_err());
3070
3071 let invalid_split_threshold = SpannIndexConfig {
3073 split_threshold: Some(10),
3074 ..Default::default()
3075 };
3076 assert!(invalid_split_threshold.validate().is_err());
3077
3078 let invalid_split_threshold_high = SpannIndexConfig {
3080 split_threshold: Some(250),
3081 ..Default::default()
3082 };
3083 assert!(invalid_split_threshold_high.validate().is_err());
3084
3085 let invalid_nreplica = SpannIndexConfig {
3087 nreplica_count: Some(10),
3088 ..Default::default()
3089 };
3090 assert!(invalid_nreplica.validate().is_err());
3091
3092 let invalid_reassign = SpannIndexConfig {
3094 reassign_neighbor_count: Some(100),
3095 ..Default::default()
3096 };
3097 assert!(invalid_reassign.validate().is_err());
3098
3099 let invalid_merge_threshold_low = SpannIndexConfig {
3101 merge_threshold: Some(5),
3102 ..Default::default()
3103 };
3104 assert!(invalid_merge_threshold_low.validate().is_err());
3105
3106 let invalid_merge_threshold_high = SpannIndexConfig {
3107 merge_threshold: Some(150),
3108 ..Default::default()
3109 };
3110 assert!(invalid_merge_threshold_high.validate().is_err());
3111
3112 let invalid_num_centers = SpannIndexConfig {
3114 num_centers_to_merge_to: Some(10),
3115 ..Default::default()
3116 };
3117 assert!(invalid_num_centers.validate().is_err());
3118
3119 let invalid_ef_construction = SpannIndexConfig {
3121 ef_construction: Some(300),
3122 ..Default::default()
3123 };
3124 assert!(invalid_ef_construction.validate().is_err());
3125
3126 let invalid_ef_search = SpannIndexConfig {
3128 ef_search: Some(300),
3129 ..Default::default()
3130 };
3131 assert!(invalid_ef_search.validate().is_err());
3132
3133 let invalid_max_neighbors = SpannIndexConfig {
3135 max_neighbors: Some(100),
3136 ..Default::default()
3137 };
3138 assert!(invalid_max_neighbors.validate().is_err());
3139
3140 let invalid_search_nprobe = SpannIndexConfig {
3142 search_nprobe: Some(200),
3143 ..Default::default()
3144 };
3145 assert!(invalid_search_nprobe.validate().is_err());
3146
3147 let invalid_search_rng_factor_low = SpannIndexConfig {
3149 search_rng_factor: Some(0.9),
3150 ..Default::default()
3151 };
3152 assert!(invalid_search_rng_factor_low.validate().is_err());
3153
3154 let invalid_search_rng_factor_high = SpannIndexConfig {
3155 search_rng_factor: Some(1.1),
3156 ..Default::default()
3157 };
3158 assert!(invalid_search_rng_factor_high.validate().is_err());
3159
3160 let valid_search_rng_factor = SpannIndexConfig {
3162 search_rng_factor: Some(1.0),
3163 ..Default::default()
3164 };
3165 assert!(valid_search_rng_factor.validate().is_ok());
3166
3167 let invalid_search_rng_epsilon_low = SpannIndexConfig {
3169 search_rng_epsilon: Some(4.0),
3170 ..Default::default()
3171 };
3172 assert!(invalid_search_rng_epsilon_low.validate().is_err());
3173
3174 let invalid_search_rng_epsilon_high = SpannIndexConfig {
3175 search_rng_epsilon: Some(11.0),
3176 ..Default::default()
3177 };
3178 assert!(invalid_search_rng_epsilon_high.validate().is_err());
3179
3180 let valid_search_rng_epsilon = SpannIndexConfig {
3182 search_rng_epsilon: Some(7.5),
3183 ..Default::default()
3184 };
3185 assert!(valid_search_rng_epsilon.validate().is_ok());
3186
3187 let invalid_write_rng_factor_low = SpannIndexConfig {
3189 write_rng_factor: Some(0.9),
3190 ..Default::default()
3191 };
3192 assert!(invalid_write_rng_factor_low.validate().is_err());
3193
3194 let invalid_write_rng_factor_high = SpannIndexConfig {
3195 write_rng_factor: Some(1.1),
3196 ..Default::default()
3197 };
3198 assert!(invalid_write_rng_factor_high.validate().is_err());
3199
3200 let valid_write_rng_factor = SpannIndexConfig {
3202 write_rng_factor: Some(1.0),
3203 ..Default::default()
3204 };
3205 assert!(valid_write_rng_factor.validate().is_ok());
3206
3207 let invalid_write_rng_epsilon_low = SpannIndexConfig {
3209 write_rng_epsilon: Some(4.0),
3210 ..Default::default()
3211 };
3212 assert!(invalid_write_rng_epsilon_low.validate().is_err());
3213
3214 let invalid_write_rng_epsilon_high = SpannIndexConfig {
3215 write_rng_epsilon: Some(11.0),
3216 ..Default::default()
3217 };
3218 assert!(invalid_write_rng_epsilon_high.validate().is_err());
3219
3220 let valid_write_rng_epsilon = SpannIndexConfig {
3222 write_rng_epsilon: Some(7.5),
3223 ..Default::default()
3224 };
3225 assert!(valid_write_rng_epsilon.validate().is_ok());
3226
3227 let invalid_num_samples_kmeans = SpannIndexConfig {
3229 num_samples_kmeans: Some(1500),
3230 ..Default::default()
3231 };
3232 assert!(invalid_num_samples_kmeans.validate().is_err());
3233
3234 let valid_num_samples_kmeans = SpannIndexConfig {
3236 num_samples_kmeans: Some(500),
3237 ..Default::default()
3238 };
3239 assert!(valid_num_samples_kmeans.validate().is_ok());
3240
3241 let invalid_initial_lambda_high = SpannIndexConfig {
3243 initial_lambda: Some(150.0),
3244 ..Default::default()
3245 };
3246 assert!(invalid_initial_lambda_high.validate().is_err());
3247
3248 let invalid_initial_lambda_low = SpannIndexConfig {
3249 initial_lambda: Some(50.0),
3250 ..Default::default()
3251 };
3252 assert!(invalid_initial_lambda_low.validate().is_err());
3253
3254 let valid_initial_lambda = SpannIndexConfig {
3256 initial_lambda: Some(100.0),
3257 ..Default::default()
3258 };
3259 assert!(valid_initial_lambda.validate().is_ok());
3260
3261 let all_none_config = SpannIndexConfig {
3263 ..Default::default()
3264 };
3265 assert!(all_none_config.validate().is_ok());
3266 }
3267}