1use chroma_error::{ChromaError, ErrorCodes};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use thiserror::Error;
5
6use crate::collection_configuration::{
7 EmbeddingFunctionConfiguration, InternalCollectionConfiguration, VectorIndexConfiguration,
8};
9use crate::hnsw_configuration::Space;
10use crate::metadata::{MetadataComparison, MetadataValueType, Where};
11use crate::operator::QueryVector;
12use crate::{
13 default_batch_size, default_construction_ef, default_construction_ef_spann,
14 default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
15 default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
16 default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
17 default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
18 default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
19 default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor,
20 InternalSpannConfiguration, KnnIndex,
21};
22
23impl ChromaError for SchemaError {
24 fn code(&self) -> ErrorCodes {
25 ErrorCodes::Internal
26 }
27}
28
29#[derive(Debug, Error)]
30pub enum SchemaError {
31 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
32 MissingIndexConfiguration { key: String, value_type: String },
33 #[error("Schema reconciliation failed: {reason}")]
34 InvalidSchema { reason: String },
35}
36
37#[derive(Debug, Error)]
38pub enum FilterValidationError {
39 #[error(
40 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
41 )]
42 IndexingDisabled {
43 key: String,
44 value_type: MetadataValueType,
45 },
46 #[error(transparent)]
47 Schema(#[from] SchemaError),
48}
49
50impl ChromaError for FilterValidationError {
51 fn code(&self) -> ErrorCodes {
52 match self {
53 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
54 FilterValidationError::Schema(_) => ErrorCodes::Internal,
55 }
56 }
57}
58
59pub const STRING_VALUE_NAME: &str = "string";
66pub const INT_VALUE_NAME: &str = "int";
67pub const BOOL_VALUE_NAME: &str = "bool";
68pub const FLOAT_VALUE_NAME: &str = "float";
69pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
70pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
71
72pub const FTS_INDEX_NAME: &str = "fts_index";
74pub const VECTOR_INDEX_NAME: &str = "vector_index";
75pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
76pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
77pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
78pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
79pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
80
81pub const DOCUMENT_KEY: &str = "#document";
83pub const EMBEDDING_KEY: &str = "#embedding";
84
85#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
93#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
94pub struct InternalSchema {
95 pub defaults: ValueTypes,
97 #[serde(rename = "keys", alias = "key_overrides")]
100 pub keys: HashMap<String, ValueTypes>,
101}
102
103pub fn is_embedding_function_default(
104 embedding_function: &Option<EmbeddingFunctionConfiguration>,
105) -> bool {
106 match embedding_function {
107 None => true,
108 Some(embedding_function) => embedding_function.is_default(),
109 }
110}
111
112pub fn is_space_default(space: &Option<Space>) -> bool {
114 match space {
115 None => true, Some(s) => *s == default_space(), }
118}
119
120pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
122 hnsw_config.ef_construction == Some(default_construction_ef())
123 && hnsw_config.ef_search == Some(default_search_ef())
124 && hnsw_config.max_neighbors == Some(default_m())
125 && hnsw_config.num_threads == Some(default_num_threads())
126 && hnsw_config.batch_size == Some(default_batch_size())
127 && hnsw_config.sync_threshold == Some(default_sync_threshold())
128 && hnsw_config.resize_factor == Some(default_resize_factor())
129}
130
131#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
138#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
139pub struct ValueTypes {
140 #[serde(
141 rename = "string",
142 alias = "#string",
143 skip_serializing_if = "Option::is_none"
144 )] pub string: Option<StringValueType>,
146
147 #[serde(
148 rename = "float_list",
149 alias = "#float_list",
150 skip_serializing_if = "Option::is_none"
151 )]
152 pub float_list: Option<FloatListValueType>,
154
155 #[serde(
156 rename = "sparse_vector",
157 alias = "#sparse_vector",
158 skip_serializing_if = "Option::is_none"
159 )]
160 pub sparse_vector: Option<SparseVectorValueType>,
162
163 #[serde(
164 rename = "int",
165 alias = "#int",
166 skip_serializing_if = "Option::is_none"
167 )] pub int: Option<IntValueType>,
169
170 #[serde(
171 rename = "float",
172 alias = "#float",
173 skip_serializing_if = "Option::is_none"
174 )] pub float: Option<FloatValueType>,
176
177 #[serde(
178 rename = "bool",
179 alias = "#bool",
180 skip_serializing_if = "Option::is_none"
181 )] pub boolean: Option<BoolValueType>,
183}
184
185#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
187#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
188pub struct StringValueType {
189 #[serde(
190 rename = "fts_index",
191 alias = "$fts_index",
192 skip_serializing_if = "Option::is_none"
193 )] pub fts_index: Option<FtsIndexType>,
195
196 #[serde(
197 rename = "string_inverted_index", alias = "$string_inverted_index",
199 skip_serializing_if = "Option::is_none"
200 )]
201 pub string_inverted_index: Option<StringInvertedIndexType>,
202}
203
204#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
206#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
207pub struct FloatListValueType {
208 #[serde(
209 rename = "vector_index",
210 alias = "$vector_index",
211 skip_serializing_if = "Option::is_none"
212 )] pub vector_index: Option<VectorIndexType>,
214}
215
216#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
218#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
219pub struct SparseVectorValueType {
220 #[serde(
221 rename = "sparse_vector_index", alias = "$sparse_vector_index",
223 skip_serializing_if = "Option::is_none"
224 )]
225 pub sparse_vector_index: Option<SparseVectorIndexType>,
226}
227
228#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
230#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
231pub struct IntValueType {
232 #[serde(
233 rename = "int_inverted_index",
234 alias = "$int_inverted_index",
235 skip_serializing_if = "Option::is_none"
236 )]
237 pub int_inverted_index: Option<IntInvertedIndexType>,
239}
240
241#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
243#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
244pub struct FloatValueType {
245 #[serde(
246 rename = "float_inverted_index", alias = "$float_inverted_index",
248 skip_serializing_if = "Option::is_none"
249 )]
250 pub float_inverted_index: Option<FloatInvertedIndexType>,
251}
252
253#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
255#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
256pub struct BoolValueType {
257 #[serde(
258 rename = "bool_inverted_index", alias = "$bool_inverted_index",
260 skip_serializing_if = "Option::is_none"
261 )]
262 pub bool_inverted_index: Option<BoolInvertedIndexType>,
263}
264
265#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
267#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
268pub struct FtsIndexType {
269 pub enabled: bool,
270 pub config: FtsIndexConfig,
271}
272
273#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
274#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
275pub struct VectorIndexType {
276 pub enabled: bool,
277 pub config: VectorIndexConfig,
278}
279
280#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
281#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
282pub struct SparseVectorIndexType {
283 pub enabled: bool,
284 pub config: SparseVectorIndexConfig,
285}
286
287#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
288#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
289pub struct StringInvertedIndexType {
290 pub enabled: bool,
291 pub config: StringInvertedIndexConfig,
292}
293
294#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
295#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
296pub struct IntInvertedIndexType {
297 pub enabled: bool,
298 pub config: IntInvertedIndexConfig,
299}
300
301#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
302#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
303pub struct FloatInvertedIndexType {
304 pub enabled: bool,
305 pub config: FloatInvertedIndexConfig,
306}
307
308#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
309#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
310pub struct BoolInvertedIndexType {
311 pub enabled: bool,
312 pub config: BoolInvertedIndexConfig,
313}
314
315impl InternalSchema {
316 pub fn new_default(default_knn_index: KnnIndex) -> Self {
318 let vector_config = VectorIndexType {
320 enabled: false,
321 config: VectorIndexConfig {
322 space: Some(default_space()),
323 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
324 source_key: None,
325 hnsw: match default_knn_index {
326 KnnIndex::Hnsw => Some(HnswIndexConfig {
327 ef_construction: Some(default_construction_ef()),
328 max_neighbors: Some(default_m()),
329 ef_search: Some(default_search_ef()),
330 num_threads: Some(default_num_threads()),
331 batch_size: Some(default_batch_size()),
332 sync_threshold: Some(default_sync_threshold()),
333 resize_factor: Some(default_resize_factor()),
334 }),
335 KnnIndex::Spann => None,
336 },
337 spann: match default_knn_index {
338 KnnIndex::Hnsw => None,
339 KnnIndex::Spann => Some(SpannIndexConfig {
340 search_nprobe: Some(default_search_nprobe()),
341 search_rng_factor: Some(default_search_rng_factor()),
342 search_rng_epsilon: Some(default_search_rng_epsilon()),
343 nreplica_count: Some(default_nreplica_count()),
344 write_rng_factor: Some(default_write_rng_factor()),
345 write_rng_epsilon: Some(default_write_rng_epsilon()),
346 split_threshold: Some(default_split_threshold()),
347 num_samples_kmeans: Some(default_num_samples_kmeans()),
348 initial_lambda: Some(default_initial_lambda()),
349 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
350 merge_threshold: Some(default_merge_threshold()),
351 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
352 write_nprobe: Some(default_write_nprobe()),
353 ef_construction: Some(default_construction_ef_spann()),
354 ef_search: Some(default_search_ef_spann()),
355 max_neighbors: Some(default_m_spann()),
356 }),
357 },
358 },
359 };
360
361 let defaults = ValueTypes {
363 string: Some(StringValueType {
364 string_inverted_index: Some(StringInvertedIndexType {
365 enabled: true,
366 config: StringInvertedIndexConfig {},
367 }),
368 fts_index: Some(FtsIndexType {
369 enabled: false,
370 config: FtsIndexConfig {},
371 }),
372 }),
373 float: Some(FloatValueType {
374 float_inverted_index: Some(FloatInvertedIndexType {
375 enabled: true,
376 config: FloatInvertedIndexConfig {},
377 }),
378 }),
379 int: Some(IntValueType {
380 int_inverted_index: Some(IntInvertedIndexType {
381 enabled: true,
382 config: IntInvertedIndexConfig {},
383 }),
384 }),
385 boolean: Some(BoolValueType {
386 bool_inverted_index: Some(BoolInvertedIndexType {
387 enabled: true,
388 config: BoolInvertedIndexConfig {},
389 }),
390 }),
391 float_list: Some(FloatListValueType {
392 vector_index: Some(vector_config),
393 }),
394 sparse_vector: Some(SparseVectorValueType {
395 sparse_vector_index: Some(SparseVectorIndexType {
396 enabled: false,
397 config: SparseVectorIndexConfig {
398 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
399 source_key: None,
400 bm25: Some(false),
401 },
402 }),
403 }),
404 };
405
406 let mut keys = HashMap::new();
408
409 let embedding_defaults = ValueTypes {
411 float_list: Some(FloatListValueType {
412 vector_index: Some(VectorIndexType {
413 enabled: true,
414 config: VectorIndexConfig {
415 space: Some(default_space()),
416 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
417 source_key: Some(DOCUMENT_KEY.to_string()),
418 hnsw: match default_knn_index {
419 KnnIndex::Hnsw => Some(HnswIndexConfig {
420 ef_construction: Some(default_construction_ef()),
421 max_neighbors: Some(default_m()),
422 ef_search: Some(default_search_ef()),
423 num_threads: Some(default_num_threads()),
424 batch_size: Some(default_batch_size()),
425 sync_threshold: Some(default_sync_threshold()),
426 resize_factor: Some(default_resize_factor()),
427 }),
428 KnnIndex::Spann => None,
429 },
430 spann: match default_knn_index {
431 KnnIndex::Hnsw => None,
432 KnnIndex::Spann => Some(SpannIndexConfig {
433 search_nprobe: Some(default_search_nprobe()),
434 search_rng_factor: Some(default_search_rng_factor()),
435 search_rng_epsilon: Some(default_search_rng_epsilon()),
436 nreplica_count: Some(default_nreplica_count()),
437 write_rng_factor: Some(default_write_rng_factor()),
438 write_rng_epsilon: Some(default_write_rng_epsilon()),
439 split_threshold: Some(default_split_threshold()),
440 num_samples_kmeans: Some(default_num_samples_kmeans()),
441 initial_lambda: Some(default_initial_lambda()),
442 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
443 merge_threshold: Some(default_merge_threshold()),
444 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
445 write_nprobe: Some(default_write_nprobe()),
446 ef_construction: Some(default_construction_ef_spann()),
447 ef_search: Some(default_search_ef_spann()),
448 max_neighbors: Some(default_m_spann()),
449 }),
450 },
451 },
452 }),
453 }),
454 ..Default::default()
455 };
456 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
457
458 let document_defaults = ValueTypes {
460 string: Some(StringValueType {
461 fts_index: Some(FtsIndexType {
462 enabled: true,
463 config: FtsIndexConfig {},
464 }),
465 string_inverted_index: Some(StringInvertedIndexType {
466 enabled: false,
467 config: StringInvertedIndexConfig {},
468 }),
469 }),
470 ..Default::default()
471 };
472 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
473
474 InternalSchema { defaults, keys }
475 }
476
477 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
478 let to_internal = |vector_index: &VectorIndexType| {
479 let space = vector_index.config.space.clone();
480 vector_index
481 .config
482 .spann
483 .clone()
484 .map(|config| config.into_internal_configuration(space))
485 };
486
487 self.keys
488 .get(EMBEDDING_KEY)
489 .and_then(|value_types| value_types.float_list.as_ref())
490 .and_then(|float_list| float_list.vector_index.as_ref())
491 .and_then(to_internal)
492 .or_else(|| {
493 self.defaults
494 .float_list
495 .as_ref()
496 .and_then(|float_list| float_list.vector_index.as_ref())
497 .and_then(to_internal)
498 })
499 }
500
501 pub fn reconcile_with_defaults(user_schema: Option<InternalSchema>) -> Result<Self, String> {
508 let default_schema = InternalSchema::new_default(KnnIndex::Spann);
509
510 match user_schema {
511 Some(user) => {
512 let merged_defaults =
514 Self::merge_value_types(&default_schema.defaults, &user.defaults)?;
515
516 let mut merged_keys = default_schema.keys.clone();
518 for (key, user_value_types) in user.keys {
519 if let Some(default_value_types) = merged_keys.get(&key) {
520 let merged_value_types =
522 Self::merge_value_types(default_value_types, &user_value_types)?;
523 merged_keys.insert(key, merged_value_types);
524 } else {
525 merged_keys.insert(key, user_value_types);
527 }
528 }
529
530 Ok(InternalSchema {
531 defaults: merged_defaults,
532 keys: merged_keys,
533 })
534 }
535 None => Ok(default_schema),
536 }
537 }
538
539 pub fn merge(&self, other: &InternalSchema) -> Result<InternalSchema, SchemaError> {
541 if self.defaults != other.defaults {
542 return Err(SchemaError::InvalidSchema {
543 reason: "Cannot merge schemas with differing defaults".to_string(),
544 });
545 }
546
547 let mut keys = self.keys.clone();
548
549 for (key, other_value_types) in &other.keys {
550 if let Some(existing) = keys.get(key).cloned() {
551 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
552 keys.insert(key.clone(), merged);
553 } else {
554 keys.insert(key.clone(), other_value_types.clone());
555 }
556 }
557
558 Ok(InternalSchema {
559 defaults: self.defaults.clone(),
560 keys,
561 })
562 }
563
564 fn merge_override_value_types(
565 key: &str,
566 left: &ValueTypes,
567 right: &ValueTypes,
568 ) -> Result<ValueTypes, SchemaError> {
569 Ok(ValueTypes {
570 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
571 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
572 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
573 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
574 float_list: Self::merge_float_list_override(
575 key,
576 left.float_list.as_ref(),
577 right.float_list.as_ref(),
578 )?,
579 sparse_vector: Self::merge_sparse_vector_override(
580 key,
581 left.sparse_vector.as_ref(),
582 right.sparse_vector.as_ref(),
583 )?,
584 })
585 }
586
587 fn merge_string_override(
588 key: &str,
589 left: Option<&StringValueType>,
590 right: Option<&StringValueType>,
591 ) -> Result<Option<StringValueType>, SchemaError> {
592 match (left, right) {
593 (Some(l), Some(r)) => Ok(Some(StringValueType {
594 string_inverted_index: Self::merge_index_or_error(
595 l.string_inverted_index.as_ref(),
596 r.string_inverted_index.as_ref(),
597 &format!("key '{key}' string.string_inverted_index"),
598 )?,
599 fts_index: Self::merge_index_or_error(
600 l.fts_index.as_ref(),
601 r.fts_index.as_ref(),
602 &format!("key '{key}' string.fts_index"),
603 )?,
604 })),
605 (Some(l), None) => Ok(Some(l.clone())),
606 (None, Some(r)) => Ok(Some(r.clone())),
607 (None, None) => Ok(None),
608 }
609 }
610
611 fn merge_float_override(
612 key: &str,
613 left: Option<&FloatValueType>,
614 right: Option<&FloatValueType>,
615 ) -> Result<Option<FloatValueType>, SchemaError> {
616 match (left, right) {
617 (Some(l), Some(r)) => Ok(Some(FloatValueType {
618 float_inverted_index: Self::merge_index_or_error(
619 l.float_inverted_index.as_ref(),
620 r.float_inverted_index.as_ref(),
621 &format!("key '{key}' float.float_inverted_index"),
622 )?,
623 })),
624 (Some(l), None) => Ok(Some(l.clone())),
625 (None, Some(r)) => Ok(Some(r.clone())),
626 (None, None) => Ok(None),
627 }
628 }
629
630 fn merge_int_override(
631 key: &str,
632 left: Option<&IntValueType>,
633 right: Option<&IntValueType>,
634 ) -> Result<Option<IntValueType>, SchemaError> {
635 match (left, right) {
636 (Some(l), Some(r)) => Ok(Some(IntValueType {
637 int_inverted_index: Self::merge_index_or_error(
638 l.int_inverted_index.as_ref(),
639 r.int_inverted_index.as_ref(),
640 &format!("key '{key}' int.int_inverted_index"),
641 )?,
642 })),
643 (Some(l), None) => Ok(Some(l.clone())),
644 (None, Some(r)) => Ok(Some(r.clone())),
645 (None, None) => Ok(None),
646 }
647 }
648
649 fn merge_bool_override(
650 key: &str,
651 left: Option<&BoolValueType>,
652 right: Option<&BoolValueType>,
653 ) -> Result<Option<BoolValueType>, SchemaError> {
654 match (left, right) {
655 (Some(l), Some(r)) => Ok(Some(BoolValueType {
656 bool_inverted_index: Self::merge_index_or_error(
657 l.bool_inverted_index.as_ref(),
658 r.bool_inverted_index.as_ref(),
659 &format!("key '{key}' bool.bool_inverted_index"),
660 )?,
661 })),
662 (Some(l), None) => Ok(Some(l.clone())),
663 (None, Some(r)) => Ok(Some(r.clone())),
664 (None, None) => Ok(None),
665 }
666 }
667
668 fn merge_float_list_override(
669 key: &str,
670 left: Option<&FloatListValueType>,
671 right: Option<&FloatListValueType>,
672 ) -> Result<Option<FloatListValueType>, SchemaError> {
673 match (left, right) {
674 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
675 vector_index: Self::merge_index_or_error(
676 l.vector_index.as_ref(),
677 r.vector_index.as_ref(),
678 &format!("key '{key}' float_list.vector_index"),
679 )?,
680 })),
681 (Some(l), None) => Ok(Some(l.clone())),
682 (None, Some(r)) => Ok(Some(r.clone())),
683 (None, None) => Ok(None),
684 }
685 }
686
687 fn merge_sparse_vector_override(
688 key: &str,
689 left: Option<&SparseVectorValueType>,
690 right: Option<&SparseVectorValueType>,
691 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
692 match (left, right) {
693 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
694 sparse_vector_index: Self::merge_index_or_error(
695 l.sparse_vector_index.as_ref(),
696 r.sparse_vector_index.as_ref(),
697 &format!("key '{key}' sparse_vector.sparse_vector_index"),
698 )?,
699 })),
700 (Some(l), None) => Ok(Some(l.clone())),
701 (None, Some(r)) => Ok(Some(r.clone())),
702 (None, None) => Ok(None),
703 }
704 }
705
706 fn merge_index_or_error<T: Clone + PartialEq>(
707 left: Option<&T>,
708 right: Option<&T>,
709 context: &str,
710 ) -> Result<Option<T>, SchemaError> {
711 match (left, right) {
712 (Some(l), Some(r)) => {
713 if l == r {
714 Ok(Some(l.clone()))
715 } else {
716 Err(SchemaError::InvalidSchema {
717 reason: format!("Conflicting configuration for {context}"),
718 })
719 }
720 }
721 (Some(l), None) => Ok(Some(l.clone())),
722 (None, Some(r)) => Ok(Some(r.clone())),
723 (None, None) => Ok(None),
724 }
725 }
726
727 fn merge_value_types(default: &ValueTypes, user: &ValueTypes) -> Result<ValueTypes, String> {
730 Ok(ValueTypes {
731 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
732 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
733 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
734 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
735 float_list: Self::merge_float_list_type(
736 default.float_list.as_ref(),
737 user.float_list.as_ref(),
738 )?,
739 sparse_vector: Self::merge_sparse_vector_type(
740 default.sparse_vector.as_ref(),
741 user.sparse_vector.as_ref(),
742 )?,
743 })
744 }
745
746 fn merge_string_type(
748 default: Option<&StringValueType>,
749 user: Option<&StringValueType>,
750 ) -> Result<Option<StringValueType>, String> {
751 match (default, user) {
752 (Some(default), Some(user)) => Ok(Some(StringValueType {
753 string_inverted_index: Self::merge_string_inverted_index_type(
754 default.string_inverted_index.as_ref(),
755 user.string_inverted_index.as_ref(),
756 )?,
757 fts_index: Self::merge_fts_index_type(
758 default.fts_index.as_ref(),
759 user.fts_index.as_ref(),
760 )?,
761 })),
762 (Some(default), None) => Ok(Some(default.clone())),
763 (None, Some(user)) => Ok(Some(user.clone())),
764 (None, None) => Ok(None),
765 }
766 }
767
768 fn merge_float_type(
770 default: Option<&FloatValueType>,
771 user: Option<&FloatValueType>,
772 ) -> Result<Option<FloatValueType>, String> {
773 match (default, user) {
774 (Some(default), Some(user)) => Ok(Some(FloatValueType {
775 float_inverted_index: Self::merge_float_inverted_index_type(
776 default.float_inverted_index.as_ref(),
777 user.float_inverted_index.as_ref(),
778 )?,
779 })),
780 (Some(default), None) => Ok(Some(default.clone())),
781 (None, Some(user)) => Ok(Some(user.clone())),
782 (None, None) => Ok(None),
783 }
784 }
785
786 fn merge_int_type(
788 default: Option<&IntValueType>,
789 user: Option<&IntValueType>,
790 ) -> Result<Option<IntValueType>, String> {
791 match (default, user) {
792 (Some(default), Some(user)) => Ok(Some(IntValueType {
793 int_inverted_index: Self::merge_int_inverted_index_type(
794 default.int_inverted_index.as_ref(),
795 user.int_inverted_index.as_ref(),
796 )?,
797 })),
798 (Some(default), None) => Ok(Some(default.clone())),
799 (None, Some(user)) => Ok(Some(user.clone())),
800 (None, None) => Ok(None),
801 }
802 }
803
804 fn merge_bool_type(
806 default: Option<&BoolValueType>,
807 user: Option<&BoolValueType>,
808 ) -> Result<Option<BoolValueType>, String> {
809 match (default, user) {
810 (Some(default), Some(user)) => Ok(Some(BoolValueType {
811 bool_inverted_index: Self::merge_bool_inverted_index_type(
812 default.bool_inverted_index.as_ref(),
813 user.bool_inverted_index.as_ref(),
814 )?,
815 })),
816 (Some(default), None) => Ok(Some(default.clone())),
817 (None, Some(user)) => Ok(Some(user.clone())),
818 (None, None) => Ok(None),
819 }
820 }
821
822 fn merge_float_list_type(
824 default: Option<&FloatListValueType>,
825 user: Option<&FloatListValueType>,
826 ) -> Result<Option<FloatListValueType>, String> {
827 match (default, user) {
828 (Some(default), Some(user)) => Ok(Some(FloatListValueType {
829 vector_index: Self::merge_vector_index_type(
830 default.vector_index.as_ref(),
831 user.vector_index.as_ref(),
832 )?,
833 })),
834 (Some(default), None) => Ok(Some(default.clone())),
835 (None, Some(user)) => Ok(Some(user.clone())),
836 (None, None) => Ok(None),
837 }
838 }
839
840 fn merge_sparse_vector_type(
842 default: Option<&SparseVectorValueType>,
843 user: Option<&SparseVectorValueType>,
844 ) -> Result<Option<SparseVectorValueType>, String> {
845 match (default, user) {
846 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
847 sparse_vector_index: Self::merge_sparse_vector_index_type(
848 default.sparse_vector_index.as_ref(),
849 user.sparse_vector_index.as_ref(),
850 )?,
851 })),
852 (Some(default), None) => Ok(Some(default.clone())),
853 (None, Some(user)) => Ok(Some(user.clone())),
854 (None, None) => Ok(None),
855 }
856 }
857
858 fn merge_string_inverted_index_type(
860 default: Option<&StringInvertedIndexType>,
861 user: Option<&StringInvertedIndexType>,
862 ) -> Result<Option<StringInvertedIndexType>, String> {
863 match (default, user) {
864 (Some(_default), Some(user)) => {
865 Ok(Some(StringInvertedIndexType {
866 enabled: user.enabled, config: user.config.clone(), }))
869 }
870 (Some(default), None) => Ok(Some(default.clone())),
871 (None, Some(user)) => Ok(Some(user.clone())),
872 (None, None) => Ok(None),
873 }
874 }
875
876 fn merge_fts_index_type(
877 default: Option<&FtsIndexType>,
878 user: Option<&FtsIndexType>,
879 ) -> Result<Option<FtsIndexType>, String> {
880 match (default, user) {
881 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
882 enabled: user.enabled,
883 config: user.config.clone(),
884 })),
885 (Some(default), None) => Ok(Some(default.clone())),
886 (None, Some(user)) => Ok(Some(user.clone())),
887 (None, None) => Ok(None),
888 }
889 }
890
891 fn merge_float_inverted_index_type(
892 default: Option<&FloatInvertedIndexType>,
893 user: Option<&FloatInvertedIndexType>,
894 ) -> Result<Option<FloatInvertedIndexType>, String> {
895 match (default, user) {
896 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
897 enabled: user.enabled,
898 config: user.config.clone(),
899 })),
900 (Some(default), None) => Ok(Some(default.clone())),
901 (None, Some(user)) => Ok(Some(user.clone())),
902 (None, None) => Ok(None),
903 }
904 }
905
906 fn merge_int_inverted_index_type(
907 default: Option<&IntInvertedIndexType>,
908 user: Option<&IntInvertedIndexType>,
909 ) -> Result<Option<IntInvertedIndexType>, String> {
910 match (default, user) {
911 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
912 enabled: user.enabled,
913 config: user.config.clone(),
914 })),
915 (Some(default), None) => Ok(Some(default.clone())),
916 (None, Some(user)) => Ok(Some(user.clone())),
917 (None, None) => Ok(None),
918 }
919 }
920
921 fn merge_bool_inverted_index_type(
922 default: Option<&BoolInvertedIndexType>,
923 user: Option<&BoolInvertedIndexType>,
924 ) -> Result<Option<BoolInvertedIndexType>, String> {
925 match (default, user) {
926 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
927 enabled: user.enabled,
928 config: user.config.clone(),
929 })),
930 (Some(default), None) => Ok(Some(default.clone())),
931 (None, Some(user)) => Ok(Some(user.clone())),
932 (None, None) => Ok(None),
933 }
934 }
935
936 fn merge_vector_index_type(
937 default: Option<&VectorIndexType>,
938 user: Option<&VectorIndexType>,
939 ) -> Result<Option<VectorIndexType>, String> {
940 match (default, user) {
941 (Some(default), Some(user)) => {
942 Ok(Some(VectorIndexType {
943 enabled: user.enabled, config: Self::merge_vector_index_config(&default.config, &user.config)?,
945 }))
946 }
947 (Some(default), None) => Ok(Some(default.clone())),
948 (None, Some(user)) => Ok(Some(user.clone())),
949 (None, None) => Ok(None),
950 }
951 }
952
953 fn merge_sparse_vector_index_type(
954 default: Option<&SparseVectorIndexType>,
955 user: Option<&SparseVectorIndexType>,
956 ) -> Result<Option<SparseVectorIndexType>, String> {
957 match (default, user) {
958 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
959 enabled: user.enabled,
960 config: Self::merge_sparse_vector_index_config(&default.config, &user.config)?,
961 })),
962 (Some(default), None) => Ok(Some(default.clone())),
963 (None, Some(user)) => Ok(Some(user.clone())),
964 (None, None) => Ok(None),
965 }
966 }
967
968 fn merge_vector_index_config(
970 default: &VectorIndexConfig,
971 user: &VectorIndexConfig,
972 ) -> Result<VectorIndexConfig, String> {
973 Ok(VectorIndexConfig {
974 space: user.space.clone().or(default.space.clone()),
975 embedding_function: user
976 .embedding_function
977 .clone()
978 .or(default.embedding_function.clone()),
979 source_key: user.source_key.clone().or(default.source_key.clone()),
980 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
981 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
982 })
983 }
984
985 fn merge_sparse_vector_index_config(
987 default: &SparseVectorIndexConfig,
988 user: &SparseVectorIndexConfig,
989 ) -> Result<SparseVectorIndexConfig, String> {
990 Ok(SparseVectorIndexConfig {
991 embedding_function: user
992 .embedding_function
993 .clone()
994 .or(default.embedding_function.clone()),
995 source_key: user.source_key.clone().or(default.source_key.clone()),
996 bm25: user.bm25.or(default.bm25),
997 })
998 }
999
1000 fn merge_hnsw_configs(
1002 default_hnsw: Option<&HnswIndexConfig>,
1003 user_hnsw: Option<&HnswIndexConfig>,
1004 ) -> Option<HnswIndexConfig> {
1005 match (default_hnsw, user_hnsw) {
1006 (Some(default), Some(user)) => Some(HnswIndexConfig {
1007 ef_construction: user.ef_construction.or(default.ef_construction),
1008 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1009 ef_search: user.ef_search.or(default.ef_search),
1010 num_threads: user.num_threads.or(default.num_threads),
1011 batch_size: user.batch_size.or(default.batch_size),
1012 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1013 resize_factor: user.resize_factor.or(default.resize_factor),
1014 }),
1015 (Some(default), None) => Some(default.clone()),
1016 (None, Some(user)) => Some(user.clone()),
1017 (None, None) => None,
1018 }
1019 }
1020
1021 fn merge_spann_configs(
1023 default_spann: Option<&SpannIndexConfig>,
1024 user_spann: Option<&SpannIndexConfig>,
1025 ) -> Option<SpannIndexConfig> {
1026 match (default_spann, user_spann) {
1027 (Some(default), Some(user)) => Some(SpannIndexConfig {
1028 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1029 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1030 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1031 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1032 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1033 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1034 split_threshold: user.split_threshold.or(default.split_threshold),
1035 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1036 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1037 reassign_neighbor_count: user
1038 .reassign_neighbor_count
1039 .or(default.reassign_neighbor_count),
1040 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1041 num_centers_to_merge_to: user
1042 .num_centers_to_merge_to
1043 .or(default.num_centers_to_merge_to),
1044 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1045 ef_construction: user.ef_construction.or(default.ef_construction),
1046 ef_search: user.ef_search.or(default.ef_search),
1047 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1048 }),
1049 (Some(default), None) => Some(default.clone()),
1050 (None, Some(user)) => Some(user.clone()),
1051 (None, None) => None,
1052 }
1053 }
1054
1055 pub fn reconcile_with_collection_config(
1062 schema: InternalSchema,
1063 collection_config: InternalCollectionConfiguration,
1064 ) -> Result<InternalSchema, String> {
1065 if collection_config.is_default() {
1067 return Ok(schema);
1069 }
1070
1071 if !Self::is_schema_default(&schema) {
1073 return Err(
1075 "Cannot set both collection config and schema at the same time".to_string(),
1076 );
1077 }
1078
1079 Self::convert_collection_config_to_schema(collection_config)
1081 }
1082
1083 pub fn reconcile_schema_and_config(
1084 schema: Option<InternalSchema>,
1085 configuration: Option<InternalCollectionConfiguration>,
1086 ) -> Result<InternalSchema, String> {
1087 let reconciled_schema = Self::reconcile_with_defaults(schema)?;
1088 if let Some(config) = configuration {
1089 Self::reconcile_with_collection_config(reconciled_schema, config)
1090 } else {
1091 Ok(reconciled_schema)
1092 }
1093 }
1094
1095 fn is_schema_default(schema: &InternalSchema) -> bool {
1097 let default_hnsw = InternalSchema::new_default(KnnIndex::Hnsw);
1099 let default_spann = InternalSchema::new_default(KnnIndex::Spann);
1100
1101 schema == &default_hnsw || schema == &default_spann
1102 }
1103
1104 fn convert_collection_config_to_schema(
1106 collection_config: InternalCollectionConfiguration,
1107 ) -> Result<InternalSchema, String> {
1108 let mut schema = InternalSchema::new_default(KnnIndex::Spann); let vector_config = match collection_config.vector_index {
1113 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
1114 space: Some(hnsw_config.space),
1115 embedding_function: collection_config.embedding_function,
1116 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: Some(HnswIndexConfig {
1118 ef_construction: Some(hnsw_config.ef_construction),
1119 max_neighbors: Some(hnsw_config.max_neighbors),
1120 ef_search: Some(hnsw_config.ef_search),
1121 num_threads: Some(hnsw_config.num_threads),
1122 batch_size: Some(hnsw_config.batch_size),
1123 sync_threshold: Some(hnsw_config.sync_threshold),
1124 resize_factor: Some(hnsw_config.resize_factor),
1125 }),
1126 spann: None,
1127 },
1128 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
1129 space: Some(spann_config.space),
1130 embedding_function: collection_config.embedding_function,
1131 source_key: Some(DOCUMENT_KEY.to_string()), hnsw: None,
1133 spann: Some(SpannIndexConfig {
1134 search_nprobe: Some(spann_config.search_nprobe),
1135 search_rng_factor: Some(spann_config.search_rng_factor),
1136 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
1137 nreplica_count: Some(spann_config.nreplica_count),
1138 write_rng_factor: Some(spann_config.write_rng_factor),
1139 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
1140 split_threshold: Some(spann_config.split_threshold),
1141 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
1142 initial_lambda: Some(spann_config.initial_lambda),
1143 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
1144 merge_threshold: Some(spann_config.merge_threshold),
1145 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
1146 write_nprobe: Some(spann_config.write_nprobe),
1147 ef_construction: Some(spann_config.ef_construction),
1148 ef_search: Some(spann_config.ef_search),
1149 max_neighbors: Some(spann_config.max_neighbors),
1150 }),
1151 },
1152 };
1153
1154 if let Some(float_list) = &mut schema.defaults.float_list {
1157 if let Some(vector_index) = &mut float_list.vector_index {
1158 vector_index.config = vector_config.clone();
1159 }
1160 }
1161
1162 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1165 if let Some(float_list) = &mut embedding_types.float_list {
1166 if let Some(vector_index) = &mut float_list.vector_index {
1167 vector_index.config = vector_config;
1168 }
1169 }
1170 }
1171
1172 Ok(schema)
1173 }
1174
1175 pub fn is_metadata_type_index_enabled(
1177 &self,
1178 key: &str,
1179 value_type: MetadataValueType,
1180 ) -> Result<bool, SchemaError> {
1181 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1182
1183 match value_type {
1184 MetadataValueType::Bool => match &v_type.boolean {
1185 Some(bool_type) => match &bool_type.bool_inverted_index {
1186 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1187 None => Err(SchemaError::MissingIndexConfiguration {
1188 key: key.to_string(),
1189 value_type: "bool".to_string(),
1190 }),
1191 },
1192 None => match &self.defaults.boolean {
1193 Some(bool_type) => match &bool_type.bool_inverted_index {
1194 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1195 None => Err(SchemaError::MissingIndexConfiguration {
1196 key: key.to_string(),
1197 value_type: "bool".to_string(),
1198 }),
1199 },
1200 None => Err(SchemaError::MissingIndexConfiguration {
1201 key: key.to_string(),
1202 value_type: "bool".to_string(),
1203 }),
1204 },
1205 },
1206 MetadataValueType::Int => match &v_type.int {
1207 Some(int_type) => match &int_type.int_inverted_index {
1208 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1209 None => Err(SchemaError::MissingIndexConfiguration {
1210 key: key.to_string(),
1211 value_type: "int".to_string(),
1212 }),
1213 },
1214 None => match &self.defaults.int {
1215 Some(int_type) => match &int_type.int_inverted_index {
1216 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1217 None => Err(SchemaError::MissingIndexConfiguration {
1218 key: key.to_string(),
1219 value_type: "int".to_string(),
1220 }),
1221 },
1222 None => Err(SchemaError::MissingIndexConfiguration {
1223 key: key.to_string(),
1224 value_type: "int".to_string(),
1225 }),
1226 },
1227 },
1228 MetadataValueType::Float => match &v_type.float {
1229 Some(float_type) => match &float_type.float_inverted_index {
1230 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1231 None => Err(SchemaError::MissingIndexConfiguration {
1232 key: key.to_string(),
1233 value_type: "float".to_string(),
1234 }),
1235 },
1236 None => match &self.defaults.float {
1237 Some(float_type) => match &float_type.float_inverted_index {
1238 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1239 None => Err(SchemaError::MissingIndexConfiguration {
1240 key: key.to_string(),
1241 value_type: "float".to_string(),
1242 }),
1243 },
1244 None => Err(SchemaError::MissingIndexConfiguration {
1245 key: key.to_string(),
1246 value_type: "float".to_string(),
1247 }),
1248 },
1249 },
1250 MetadataValueType::Str => match &v_type.string {
1251 Some(string_type) => match &string_type.string_inverted_index {
1252 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1253 None => Err(SchemaError::MissingIndexConfiguration {
1254 key: key.to_string(),
1255 value_type: "string".to_string(),
1256 }),
1257 },
1258 None => match &self.defaults.string {
1259 Some(string_type) => match &string_type.string_inverted_index {
1260 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1261 None => Err(SchemaError::MissingIndexConfiguration {
1262 key: key.to_string(),
1263 value_type: "string".to_string(),
1264 }),
1265 },
1266 None => Err(SchemaError::MissingIndexConfiguration {
1267 key: key.to_string(),
1268 value_type: "string".to_string(),
1269 }),
1270 },
1271 },
1272 MetadataValueType::SparseVector => match &v_type.sparse_vector {
1273 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1274 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1275 None => Err(SchemaError::MissingIndexConfiguration {
1276 key: key.to_string(),
1277 value_type: "sparse_vector".to_string(),
1278 }),
1279 },
1280 None => match &self.defaults.sparse_vector {
1281 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1282 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1283 None => Err(SchemaError::MissingIndexConfiguration {
1284 key: key.to_string(),
1285 value_type: "sparse_vector".to_string(),
1286 }),
1287 },
1288 None => Err(SchemaError::MissingIndexConfiguration {
1289 key: key.to_string(),
1290 value_type: "sparse_vector".to_string(),
1291 }),
1292 },
1293 },
1294 }
1295 }
1296
1297 pub fn is_metadata_where_indexing_enabled(
1298 &self,
1299 where_clause: &Where,
1300 ) -> Result<(), FilterValidationError> {
1301 match where_clause {
1302 Where::Composite(composite) => {
1303 for child in &composite.children {
1304 self.is_metadata_where_indexing_enabled(child)?;
1305 }
1306 Ok(())
1307 }
1308 Where::Document(_) => Ok(()),
1309 Where::Metadata(expression) => {
1310 let value_type = match &expression.comparison {
1311 MetadataComparison::Primitive(_, value) => value.value_type(),
1312 MetadataComparison::Set(_, set_value) => set_value.value_type(),
1313 };
1314 let is_enabled = self
1315 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1316 .map_err(FilterValidationError::Schema)?;
1317 if !is_enabled {
1318 return Err(FilterValidationError::IndexingDisabled {
1319 key: expression.key.clone(),
1320 value_type,
1321 });
1322 }
1323 Ok(())
1324 }
1325 }
1326 }
1327
1328 pub fn is_knn_key_indexing_enabled(
1329 &self,
1330 key: &str,
1331 query: &QueryVector,
1332 ) -> Result<(), FilterValidationError> {
1333 match query {
1334 QueryVector::Sparse(_) => {
1335 let is_enabled = self
1336 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1337 .map_err(FilterValidationError::Schema)?;
1338 if !is_enabled {
1339 return Err(FilterValidationError::IndexingDisabled {
1340 key: key.to_string(),
1341 value_type: MetadataValueType::SparseVector,
1342 });
1343 }
1344 Ok(())
1345 }
1346 QueryVector::Dense(_) => {
1347 Ok(())
1350 }
1351 }
1352 }
1353
1354 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
1355 let value_types = self.keys.entry(key.to_string()).or_default();
1356 match value_type {
1357 MetadataValueType::Bool => {
1358 if value_types.boolean.is_none() {
1359 value_types.boolean = self.defaults.boolean.clone();
1360 return true;
1361 }
1362 }
1363 MetadataValueType::Int => {
1364 if value_types.int.is_none() {
1365 value_types.int = self.defaults.int.clone();
1366 return true;
1367 }
1368 }
1369 MetadataValueType::Float => {
1370 if value_types.float.is_none() {
1371 value_types.float = self.defaults.float.clone();
1372 return true;
1373 }
1374 }
1375 MetadataValueType::Str => {
1376 if value_types.string.is_none() {
1377 value_types.string = self.defaults.string.clone();
1378 return true;
1379 }
1380 }
1381 MetadataValueType::SparseVector => {
1382 if value_types.sparse_vector.is_none() {
1383 value_types.sparse_vector = self.defaults.sparse_vector.clone();
1384 return true;
1385 }
1386 }
1387 }
1388 false
1389 }
1390}
1391
1392#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1397#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1398#[serde(deny_unknown_fields)]
1399pub struct VectorIndexConfig {
1400 #[serde(skip_serializing_if = "Option::is_none")]
1402 pub space: Option<Space>,
1403 #[serde(skip_serializing_if = "Option::is_none")]
1405 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1406 #[serde(skip_serializing_if = "Option::is_none")]
1408 pub source_key: Option<String>,
1409 #[serde(skip_serializing_if = "Option::is_none")]
1411 pub hnsw: Option<HnswIndexConfig>,
1412 #[serde(skip_serializing_if = "Option::is_none")]
1414 pub spann: Option<SpannIndexConfig>,
1415}
1416
1417#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1419#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1420#[serde(deny_unknown_fields)]
1421pub struct HnswIndexConfig {
1422 #[serde(skip_serializing_if = "Option::is_none")]
1423 pub ef_construction: Option<usize>,
1424 #[serde(skip_serializing_if = "Option::is_none")]
1425 pub max_neighbors: Option<usize>,
1426 #[serde(skip_serializing_if = "Option::is_none")]
1427 pub ef_search: Option<usize>,
1428 #[serde(skip_serializing_if = "Option::is_none")]
1429 pub num_threads: Option<usize>,
1430 #[serde(skip_serializing_if = "Option::is_none")]
1431 pub batch_size: Option<usize>,
1432 #[serde(skip_serializing_if = "Option::is_none")]
1433 pub sync_threshold: Option<usize>,
1434 #[serde(skip_serializing_if = "Option::is_none")]
1435 pub resize_factor: Option<f64>,
1436}
1437
1438#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1440#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1441#[serde(deny_unknown_fields)]
1442pub struct SpannIndexConfig {
1443 #[serde(skip_serializing_if = "Option::is_none")]
1444 pub search_nprobe: Option<u32>,
1445 #[serde(skip_serializing_if = "Option::is_none")]
1446 pub search_rng_factor: Option<f32>,
1447 #[serde(skip_serializing_if = "Option::is_none")]
1448 pub search_rng_epsilon: Option<f32>,
1449 #[serde(skip_serializing_if = "Option::is_none")]
1450 pub nreplica_count: Option<u32>,
1451 #[serde(skip_serializing_if = "Option::is_none")]
1452 pub write_rng_factor: Option<f32>,
1453 #[serde(skip_serializing_if = "Option::is_none")]
1454 pub write_rng_epsilon: Option<f32>,
1455 #[serde(skip_serializing_if = "Option::is_none")]
1456 pub split_threshold: Option<u32>,
1457 #[serde(skip_serializing_if = "Option::is_none")]
1458 pub num_samples_kmeans: Option<usize>,
1459 #[serde(skip_serializing_if = "Option::is_none")]
1460 pub initial_lambda: Option<f32>,
1461 #[serde(skip_serializing_if = "Option::is_none")]
1462 pub reassign_neighbor_count: Option<u32>,
1463 #[serde(skip_serializing_if = "Option::is_none")]
1464 pub merge_threshold: Option<u32>,
1465 #[serde(skip_serializing_if = "Option::is_none")]
1466 pub num_centers_to_merge_to: Option<u32>,
1467 #[serde(skip_serializing_if = "Option::is_none")]
1468 pub write_nprobe: Option<u32>,
1469 #[serde(skip_serializing_if = "Option::is_none")]
1470 pub ef_construction: Option<usize>,
1471 #[serde(skip_serializing_if = "Option::is_none")]
1472 pub ef_search: Option<usize>,
1473 #[serde(skip_serializing_if = "Option::is_none")]
1474 pub max_neighbors: Option<usize>,
1475}
1476
1477impl SpannIndexConfig {
1478 pub fn into_internal_configuration(
1479 self,
1480 vector_space: Option<Space>,
1481 ) -> InternalSpannConfiguration {
1482 InternalSpannConfiguration {
1483 search_nprobe: self.search_nprobe.unwrap_or(default_search_nprobe()),
1484 search_rng_factor: self
1485 .search_rng_factor
1486 .unwrap_or(default_search_rng_factor()),
1487 search_rng_epsilon: self
1488 .search_rng_epsilon
1489 .unwrap_or(default_search_rng_epsilon()),
1490 nreplica_count: self.nreplica_count.unwrap_or(default_nreplica_count()),
1491 write_rng_factor: self.write_rng_factor.unwrap_or(default_write_rng_factor()),
1492 write_rng_epsilon: self
1493 .write_rng_epsilon
1494 .unwrap_or(default_write_rng_epsilon()),
1495 split_threshold: self.split_threshold.unwrap_or(default_split_threshold()),
1496 num_samples_kmeans: self
1497 .num_samples_kmeans
1498 .unwrap_or(default_num_samples_kmeans()),
1499 initial_lambda: self.initial_lambda.unwrap_or(default_initial_lambda()),
1500 reassign_neighbor_count: self
1501 .reassign_neighbor_count
1502 .unwrap_or(default_reassign_neighbor_count()),
1503 merge_threshold: self.merge_threshold.unwrap_or(default_merge_threshold()),
1504 num_centers_to_merge_to: self
1505 .num_centers_to_merge_to
1506 .unwrap_or(default_num_centers_to_merge_to()),
1507 write_nprobe: self.write_nprobe.unwrap_or(default_write_nprobe()),
1508 ef_construction: self
1509 .ef_construction
1510 .unwrap_or(default_construction_ef_spann()),
1511 ef_search: self.ef_search.unwrap_or(default_search_ef_spann()),
1512 max_neighbors: self.max_neighbors.unwrap_or(default_m_spann()),
1513 space: vector_space.unwrap_or(default_space()),
1514 }
1515 }
1516}
1517
1518#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1519#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1520#[serde(deny_unknown_fields)]
1521pub struct SparseVectorIndexConfig {
1522 #[serde(skip_serializing_if = "Option::is_none")]
1524 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
1525 #[serde(skip_serializing_if = "Option::is_none")]
1527 pub source_key: Option<String>,
1528 #[serde(skip_serializing_if = "Option::is_none")]
1530 pub bm25: Option<bool>,
1531}
1532
1533#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1534#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1535#[serde(deny_unknown_fields)]
1536pub struct FtsIndexConfig {
1537 }
1539
1540#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1541#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1542#[serde(deny_unknown_fields)]
1543pub struct StringInvertedIndexConfig {
1544 }
1546
1547#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1548#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1549#[serde(deny_unknown_fields)]
1550pub struct IntInvertedIndexConfig {
1551 }
1553
1554#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1555#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1556#[serde(deny_unknown_fields)]
1557pub struct FloatInvertedIndexConfig {
1558 }
1560
1561#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1562#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
1563#[serde(deny_unknown_fields)]
1564pub struct BoolInvertedIndexConfig {
1565 }
1567
1568#[cfg(test)]
1569mod tests {
1570 use super::*;
1571 use crate::hnsw_configuration::Space;
1572 use crate::metadata::SparseVector;
1573 use crate::{InternalHnswConfiguration, InternalSpannConfiguration};
1574
1575 #[test]
1576 fn test_reconcile_with_defaults_none_user_schema() {
1577 let result = InternalSchema::reconcile_with_defaults(None).unwrap();
1579 let expected = InternalSchema::new_default(KnnIndex::Spann);
1580 assert_eq!(result, expected);
1581 }
1582
1583 #[test]
1584 fn test_reconcile_with_defaults_empty_user_schema() {
1585 let user_schema = InternalSchema {
1587 defaults: ValueTypes::default(),
1588 keys: HashMap::new(),
1589 };
1590
1591 let result = InternalSchema::reconcile_with_defaults(Some(user_schema)).unwrap();
1592 let expected = InternalSchema::new_default(KnnIndex::Spann);
1593 assert_eq!(result, expected);
1594 }
1595
1596 #[test]
1597 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
1598 let mut user_schema = InternalSchema {
1600 defaults: ValueTypes::default(),
1601 keys: HashMap::new(),
1602 };
1603
1604 user_schema.defaults.string = Some(StringValueType {
1605 string_inverted_index: Some(StringInvertedIndexType {
1606 enabled: false, config: StringInvertedIndexConfig {},
1608 }),
1609 fts_index: None,
1610 });
1611
1612 let result = InternalSchema::reconcile_with_defaults(Some(user_schema)).unwrap();
1613
1614 assert!(
1616 !result
1617 .defaults
1618 .string
1619 .as_ref()
1620 .unwrap()
1621 .string_inverted_index
1622 .as_ref()
1623 .unwrap()
1624 .enabled
1625 );
1626 assert!(result.defaults.float.is_some());
1628 assert!(result.defaults.int.is_some());
1629 }
1630
1631 #[test]
1632 fn test_reconcile_with_defaults_user_overrides_vector_config() {
1633 let mut user_schema = InternalSchema {
1635 defaults: ValueTypes::default(),
1636 keys: HashMap::new(),
1637 };
1638
1639 user_schema.defaults.float_list = Some(FloatListValueType {
1640 vector_index: Some(VectorIndexType {
1641 enabled: true, config: VectorIndexConfig {
1643 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
1647 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
1651 batch_size: None,
1652 sync_threshold: None,
1653 resize_factor: None,
1654 }),
1655 spann: None,
1656 },
1657 }),
1658 });
1659
1660 let result = {
1662 let default_schema = InternalSchema::new_default(KnnIndex::Hnsw);
1663 let merged_defaults =
1664 InternalSchema::merge_value_types(&default_schema.defaults, &user_schema.defaults)
1665 .unwrap();
1666 let mut merged_keys = default_schema.keys.clone();
1667 for (key, user_value_types) in user_schema.keys {
1668 if let Some(default_value_types) = merged_keys.get(&key) {
1669 let merged_value_types =
1670 InternalSchema::merge_value_types(default_value_types, &user_value_types)
1671 .unwrap();
1672 merged_keys.insert(key, merged_value_types);
1673 } else {
1674 merged_keys.insert(key, user_value_types);
1675 }
1676 }
1677 InternalSchema {
1678 defaults: merged_defaults,
1679 keys: merged_keys,
1680 }
1681 };
1682
1683 let vector_config = &result
1684 .defaults
1685 .float_list
1686 .as_ref()
1687 .unwrap()
1688 .vector_index
1689 .as_ref()
1690 .unwrap()
1691 .config;
1692
1693 assert_eq!(vector_config.space, Some(Space::L2));
1695 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
1696 assert_eq!(
1697 vector_config.hnsw.as_ref().unwrap().ef_construction,
1698 Some(500)
1699 );
1700
1701 assert_eq!(
1703 vector_config.embedding_function,
1704 Some(EmbeddingFunctionConfiguration::Legacy)
1705 );
1706 assert_eq!(
1708 vector_config.hnsw.as_ref().unwrap().max_neighbors,
1709 Some(default_m())
1710 );
1711 }
1712
1713 #[test]
1714 fn test_reconcile_with_defaults_keys() {
1715 let mut user_schema = InternalSchema {
1717 defaults: ValueTypes::default(),
1718 keys: HashMap::new(),
1719 };
1720
1721 let custom_key_types = ValueTypes {
1723 string: Some(StringValueType {
1724 fts_index: Some(FtsIndexType {
1725 enabled: true,
1726 config: FtsIndexConfig {},
1727 }),
1728 string_inverted_index: Some(StringInvertedIndexType {
1729 enabled: false,
1730 config: StringInvertedIndexConfig {},
1731 }),
1732 }),
1733 ..Default::default()
1734 };
1735 user_schema
1736 .keys
1737 .insert("custom_key".to_string(), custom_key_types);
1738
1739 let result = InternalSchema::reconcile_with_defaults(Some(user_schema)).unwrap();
1740
1741 assert!(result.keys.contains_key(EMBEDDING_KEY));
1743 assert!(result.keys.contains_key(DOCUMENT_KEY));
1744
1745 assert!(result.keys.contains_key("custom_key"));
1747 let custom_override = result.keys.get("custom_key").unwrap();
1748 assert!(
1749 custom_override
1750 .string
1751 .as_ref()
1752 .unwrap()
1753 .fts_index
1754 .as_ref()
1755 .unwrap()
1756 .enabled
1757 );
1758 }
1759
1760 #[test]
1761 fn test_reconcile_with_defaults_override_existing_key() {
1762 let mut user_schema = InternalSchema {
1764 defaults: ValueTypes::default(),
1765 keys: HashMap::new(),
1766 };
1767
1768 let embedding_override = ValueTypes {
1770 float_list: Some(FloatListValueType {
1771 vector_index: Some(VectorIndexType {
1772 enabled: false, config: VectorIndexConfig {
1774 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1776 source_key: Some("custom_embedding_key".to_string()),
1777 hnsw: None,
1778 spann: None,
1779 },
1780 }),
1781 }),
1782 ..Default::default()
1783 };
1784 user_schema
1785 .keys
1786 .insert(EMBEDDING_KEY.to_string(), embedding_override);
1787
1788 let result = InternalSchema::reconcile_with_defaults(Some(user_schema)).unwrap();
1789
1790 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
1791 let vector_config = &embedding_config
1792 .float_list
1793 .as_ref()
1794 .unwrap()
1795 .vector_index
1796 .as_ref()
1797 .unwrap();
1798
1799 assert!(!vector_config.enabled);
1801 assert_eq!(vector_config.config.space, Some(Space::Ip));
1802 assert_eq!(
1803 vector_config.config.source_key,
1804 Some("custom_embedding_key".to_string())
1805 );
1806 }
1807
1808 #[test]
1809 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
1810 let mut schema = InternalSchema::new_default(KnnIndex::Hnsw);
1811 let before = schema.clone();
1812 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
1813 assert!(!modified);
1814 assert_eq!(schema, before);
1815 }
1816
1817 #[test]
1818 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
1819 let mut schema = InternalSchema::new_default(KnnIndex::Hnsw);
1820 assert!(!schema.keys.contains_key("custom_field"));
1821
1822 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1823
1824 assert!(modified);
1825 let entry = schema
1826 .keys
1827 .get("custom_field")
1828 .expect("expected new key override to be inserted");
1829 assert_eq!(entry.boolean, schema.defaults.boolean);
1830 assert!(entry.string.is_none());
1831 assert!(entry.int.is_none());
1832 assert!(entry.float.is_none());
1833 assert!(entry.float_list.is_none());
1834 assert!(entry.sparse_vector.is_none());
1835 }
1836
1837 #[test]
1838 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
1839 let mut schema = InternalSchema::new_default(KnnIndex::Hnsw);
1840 let initial_len = schema.keys.len();
1841 schema.keys.insert(
1842 "custom_field".to_string(),
1843 ValueTypes {
1844 string: schema.defaults.string.clone(),
1845 ..Default::default()
1846 },
1847 );
1848
1849 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
1850
1851 assert!(modified);
1852 assert_eq!(schema.keys.len(), initial_len + 1);
1853 let entry = schema
1854 .keys
1855 .get("custom_field")
1856 .expect("expected key override to exist after ensure call");
1857 assert!(entry.string.is_some());
1858 assert_eq!(entry.boolean, schema.defaults.boolean);
1859 }
1860
1861 #[test]
1862 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
1863 let schema = InternalSchema::new_default(KnnIndex::Spann);
1864 let result = schema.is_knn_key_indexing_enabled(
1865 "custom_sparse",
1866 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1867 );
1868
1869 let err = result.expect_err("expected indexing disabled error");
1870 match err {
1871 FilterValidationError::IndexingDisabled { key, value_type } => {
1872 assert_eq!(key, "custom_sparse");
1873 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
1874 }
1875 other => panic!("unexpected error variant: {other:?}"),
1876 }
1877 }
1878
1879 #[test]
1880 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
1881 let mut schema = InternalSchema::new_default(KnnIndex::Spann);
1882 schema.keys.insert(
1883 "sparse_enabled".to_string(),
1884 ValueTypes {
1885 sparse_vector: Some(SparseVectorValueType {
1886 sparse_vector_index: Some(SparseVectorIndexType {
1887 enabled: true,
1888 config: SparseVectorIndexConfig {
1889 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
1890 source_key: None,
1891 bm25: None,
1892 },
1893 }),
1894 }),
1895 ..Default::default()
1896 },
1897 );
1898
1899 let result = schema.is_knn_key_indexing_enabled(
1900 "sparse_enabled",
1901 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32])),
1902 );
1903
1904 assert!(result.is_ok());
1905 }
1906
1907 #[test]
1908 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
1909 let schema = InternalSchema::new_default(KnnIndex::Spann);
1910 let result = schema.is_knn_key_indexing_enabled(
1911 EMBEDDING_KEY,
1912 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
1913 );
1914
1915 assert!(result.is_ok());
1916 }
1917
1918 #[test]
1919 fn test_merge_hnsw_configs_field_level() {
1920 let default_hnsw = HnswIndexConfig {
1922 ef_construction: Some(200),
1923 max_neighbors: Some(16),
1924 ef_search: Some(10),
1925 num_threads: Some(4),
1926 batch_size: Some(100),
1927 sync_threshold: Some(1000),
1928 resize_factor: Some(1.2),
1929 };
1930
1931 let user_hnsw = HnswIndexConfig {
1932 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
1940
1941 let result =
1942 InternalSchema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
1943
1944 assert_eq!(result.ef_construction, Some(300));
1946 assert_eq!(result.ef_search, Some(20));
1947 assert_eq!(result.sync_threshold, Some(2000));
1948
1949 assert_eq!(result.max_neighbors, Some(16));
1951 assert_eq!(result.num_threads, Some(4));
1952 assert_eq!(result.batch_size, Some(100));
1953 assert_eq!(result.resize_factor, Some(1.2));
1954 }
1955
1956 #[test]
1957 fn test_merge_spann_configs_field_level() {
1958 let default_spann = SpannIndexConfig {
1960 search_nprobe: Some(10),
1961 search_rng_factor: Some(2.0),
1962 search_rng_epsilon: Some(0.1),
1963 nreplica_count: Some(3),
1964 write_rng_factor: Some(1.5),
1965 write_rng_epsilon: Some(0.05),
1966 split_threshold: Some(1000),
1967 num_samples_kmeans: Some(100),
1968 initial_lambda: Some(0.5),
1969 reassign_neighbor_count: Some(50),
1970 merge_threshold: Some(500),
1971 num_centers_to_merge_to: Some(10),
1972 write_nprobe: Some(5),
1973 ef_construction: Some(200),
1974 ef_search: Some(10),
1975 max_neighbors: Some(16),
1976 };
1977
1978 let user_spann = SpannIndexConfig {
1979 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(0.2), nreplica_count: None, write_rng_factor: None,
1984 write_rng_epsilon: None,
1985 split_threshold: Some(2000), num_samples_kmeans: None,
1987 initial_lambda: None,
1988 reassign_neighbor_count: None,
1989 merge_threshold: None,
1990 num_centers_to_merge_to: None,
1991 write_nprobe: None,
1992 ef_construction: None,
1993 ef_search: None,
1994 max_neighbors: None,
1995 };
1996
1997 let result =
1998 InternalSchema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
1999
2000 assert_eq!(result.search_nprobe, Some(20));
2002 assert_eq!(result.search_rng_epsilon, Some(0.2));
2003 assert_eq!(result.split_threshold, Some(2000));
2004
2005 assert_eq!(result.search_rng_factor, Some(2.0));
2007 assert_eq!(result.nreplica_count, Some(3));
2008 assert_eq!(result.initial_lambda, Some(0.5));
2009 }
2010
2011 #[test]
2012 fn test_spann_index_config_into_internal_configuration() {
2013 let config = SpannIndexConfig {
2014 search_nprobe: Some(33),
2015 search_rng_factor: Some(1.2),
2016 search_rng_epsilon: None,
2017 nreplica_count: None,
2018 write_rng_factor: Some(1.5),
2019 write_rng_epsilon: None,
2020 split_threshold: Some(75),
2021 num_samples_kmeans: None,
2022 initial_lambda: Some(0.9),
2023 reassign_neighbor_count: Some(40),
2024 merge_threshold: None,
2025 num_centers_to_merge_to: Some(4),
2026 write_nprobe: Some(60),
2027 ef_construction: Some(180),
2028 ef_search: Some(170),
2029 max_neighbors: Some(32),
2030 };
2031
2032 let with_space = config
2033 .clone()
2034 .into_internal_configuration(Some(Space::Cosine));
2035 assert_eq!(with_space.space, Space::Cosine);
2036 assert_eq!(with_space.search_nprobe, 33);
2037 assert_eq!(with_space.search_rng_factor, 1.2);
2038 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
2039 assert_eq!(with_space.write_rng_factor, 1.5);
2040 assert_eq!(with_space.write_nprobe, 60);
2041 assert_eq!(with_space.ef_construction, 180);
2042 assert_eq!(with_space.ef_search, 170);
2043 assert_eq!(with_space.max_neighbors, 32);
2044 assert_eq!(with_space.merge_threshold, default_merge_threshold());
2045
2046 let default_space_config = config.into_internal_configuration(None);
2047 assert_eq!(default_space_config.space, default_space());
2048 }
2049
2050 #[test]
2051 fn test_merge_string_type_combinations() {
2052 let default = StringValueType {
2056 string_inverted_index: Some(StringInvertedIndexType {
2057 enabled: true,
2058 config: StringInvertedIndexConfig {},
2059 }),
2060 fts_index: Some(FtsIndexType {
2061 enabled: false,
2062 config: FtsIndexConfig {},
2063 }),
2064 };
2065
2066 let user = StringValueType {
2067 string_inverted_index: Some(StringInvertedIndexType {
2068 enabled: false, config: StringInvertedIndexConfig {},
2070 }),
2071 fts_index: None, };
2073
2074 let result = InternalSchema::merge_string_type(Some(&default), Some(&user))
2075 .unwrap()
2076 .unwrap();
2077 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = InternalSchema::merge_string_type(Some(&default), None)
2082 .unwrap()
2083 .unwrap();
2084 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
2085
2086 let result = InternalSchema::merge_string_type(None, Some(&user))
2088 .unwrap()
2089 .unwrap();
2090 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
2091
2092 let result = InternalSchema::merge_string_type(None, None).unwrap();
2094 assert!(result.is_none());
2095 }
2096
2097 #[test]
2098 fn test_merge_vector_index_config_comprehensive() {
2099 let default_config = VectorIndexConfig {
2101 space: Some(Space::Cosine),
2102 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2103 source_key: Some("default_key".to_string()),
2104 hnsw: Some(HnswIndexConfig {
2105 ef_construction: Some(200),
2106 max_neighbors: Some(16),
2107 ef_search: Some(10),
2108 num_threads: Some(4),
2109 batch_size: Some(100),
2110 sync_threshold: Some(1000),
2111 resize_factor: Some(1.2),
2112 }),
2113 spann: None,
2114 };
2115
2116 let user_config = VectorIndexConfig {
2117 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
2121 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
2125 batch_size: None,
2126 sync_threshold: None,
2127 resize_factor: None,
2128 }),
2129 spann: Some(SpannIndexConfig {
2130 search_nprobe: Some(15),
2131 search_rng_factor: None,
2132 search_rng_epsilon: None,
2133 nreplica_count: None,
2134 write_rng_factor: None,
2135 write_rng_epsilon: None,
2136 split_threshold: None,
2137 num_samples_kmeans: None,
2138 initial_lambda: None,
2139 reassign_neighbor_count: None,
2140 merge_threshold: None,
2141 num_centers_to_merge_to: None,
2142 write_nprobe: None,
2143 ef_construction: None,
2144 ef_search: None,
2145 max_neighbors: None,
2146 }), };
2148
2149 let result =
2150 InternalSchema::merge_vector_index_config(&default_config, &user_config).unwrap();
2151
2152 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
2155 result.embedding_function,
2156 Some(EmbeddingFunctionConfiguration::Legacy)
2157 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_some());
2166 assert_eq!(result.spann.as_ref().unwrap().search_nprobe, Some(15));
2167 }
2168
2169 #[test]
2170 fn test_merge_sparse_vector_index_config() {
2171 let default_config = SparseVectorIndexConfig {
2173 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2174 source_key: Some("default_sparse_key".to_string()),
2175 bm25: None,
2176 };
2177
2178 let user_config = SparseVectorIndexConfig {
2179 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
2182 };
2183
2184 let result =
2185 InternalSchema::merge_sparse_vector_index_config(&default_config, &user_config)
2186 .unwrap();
2187
2188 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
2190 assert_eq!(
2192 result.embedding_function,
2193 Some(EmbeddingFunctionConfiguration::Legacy)
2194 );
2195 }
2196
2197 #[test]
2198 fn test_complex_nested_merging_scenario() {
2199 let mut user_schema = InternalSchema {
2201 defaults: ValueTypes::default(),
2202 keys: HashMap::new(),
2203 };
2204
2205 user_schema.defaults.string = Some(StringValueType {
2207 string_inverted_index: Some(StringInvertedIndexType {
2208 enabled: false,
2209 config: StringInvertedIndexConfig {},
2210 }),
2211 fts_index: Some(FtsIndexType {
2212 enabled: true,
2213 config: FtsIndexConfig {},
2214 }),
2215 });
2216
2217 user_schema.defaults.float_list = Some(FloatListValueType {
2218 vector_index: Some(VectorIndexType {
2219 enabled: true,
2220 config: VectorIndexConfig {
2221 space: Some(Space::Ip),
2222 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
2224 hnsw: Some(HnswIndexConfig {
2225 ef_construction: Some(400),
2226 max_neighbors: Some(32),
2227 ef_search: None, num_threads: None,
2229 batch_size: None,
2230 sync_threshold: None,
2231 resize_factor: None,
2232 }),
2233 spann: None,
2234 },
2235 }),
2236 });
2237
2238 let custom_key_override = ValueTypes {
2240 string: Some(StringValueType {
2241 fts_index: Some(FtsIndexType {
2242 enabled: true,
2243 config: FtsIndexConfig {},
2244 }),
2245 string_inverted_index: None,
2246 }),
2247 ..Default::default()
2248 };
2249 user_schema
2250 .keys
2251 .insert("custom_field".to_string(), custom_key_override);
2252
2253 let result = {
2255 let default_schema = InternalSchema::new_default(KnnIndex::Hnsw);
2256 let merged_defaults =
2257 InternalSchema::merge_value_types(&default_schema.defaults, &user_schema.defaults)
2258 .unwrap();
2259 let mut merged_keys = default_schema.keys.clone();
2260 for (key, user_value_types) in user_schema.keys {
2261 if let Some(default_value_types) = merged_keys.get(&key) {
2262 let merged_value_types =
2263 InternalSchema::merge_value_types(default_value_types, &user_value_types)
2264 .unwrap();
2265 merged_keys.insert(key, merged_value_types);
2266 } else {
2267 merged_keys.insert(key, user_value_types);
2268 }
2269 }
2270 InternalSchema {
2271 defaults: merged_defaults,
2272 keys: merged_keys,
2273 }
2274 };
2275
2276 assert!(
2280 !result
2281 .defaults
2282 .string
2283 .as_ref()
2284 .unwrap()
2285 .string_inverted_index
2286 .as_ref()
2287 .unwrap()
2288 .enabled
2289 );
2290 assert!(
2291 result
2292 .defaults
2293 .string
2294 .as_ref()
2295 .unwrap()
2296 .fts_index
2297 .as_ref()
2298 .unwrap()
2299 .enabled
2300 );
2301
2302 let vector_config = &result
2303 .defaults
2304 .float_list
2305 .as_ref()
2306 .unwrap()
2307 .vector_index
2308 .as_ref()
2309 .unwrap()
2310 .config;
2311 assert_eq!(vector_config.space, Some(Space::Ip));
2312 assert_eq!(
2313 vector_config.embedding_function,
2314 Some(EmbeddingFunctionConfiguration::Legacy)
2315 ); assert_eq!(
2317 vector_config.source_key,
2318 Some("custom_vector_key".to_string())
2319 );
2320 assert_eq!(
2321 vector_config.hnsw.as_ref().unwrap().ef_construction,
2322 Some(400)
2323 );
2324 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
2325 assert_eq!(
2326 vector_config.hnsw.as_ref().unwrap().ef_search,
2327 Some(default_search_ef())
2328 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
2336 assert!(
2337 custom_override
2338 .string
2339 .as_ref()
2340 .unwrap()
2341 .fts_index
2342 .as_ref()
2343 .unwrap()
2344 .enabled
2345 );
2346 assert!(custom_override
2347 .string
2348 .as_ref()
2349 .unwrap()
2350 .string_inverted_index
2351 .is_none());
2352 }
2353
2354 #[test]
2355 fn test_reconcile_with_collection_config_default_config() {
2356 let schema = InternalSchema::new_default(KnnIndex::Hnsw);
2358 let collection_config = InternalCollectionConfiguration::default_hnsw();
2359
2360 let result =
2361 InternalSchema::reconcile_with_collection_config(schema.clone(), collection_config)
2362 .unwrap();
2363 assert_eq!(result, schema);
2364 }
2365
2366 #[test]
2367 fn test_reconcile_with_collection_config_both_non_default() {
2368 let mut schema = InternalSchema::new_default(KnnIndex::Hnsw);
2370 schema.defaults.string = Some(StringValueType {
2371 fts_index: Some(FtsIndexType {
2372 enabled: true,
2373 config: FtsIndexConfig {},
2374 }),
2375 string_inverted_index: None,
2376 });
2377
2378 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
2379 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
2381 {
2382 hnsw_config.ef_construction = 500; }
2384
2385 let result = InternalSchema::reconcile_with_collection_config(schema, collection_config);
2386 assert!(result.is_err());
2387 assert_eq!(
2388 result.unwrap_err(),
2389 "Cannot set both collection config and schema at the same time"
2390 );
2391 }
2392
2393 #[test]
2394 fn test_reconcile_with_collection_config_hnsw_override() {
2395 let schema = InternalSchema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
2399 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2400 ef_construction: 300,
2401 max_neighbors: 32,
2402 ef_search: 50,
2403 num_threads: 8,
2404 batch_size: 200,
2405 sync_threshold: 2000,
2406 resize_factor: 1.5,
2407 space: Space::L2,
2408 }),
2409 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2410 };
2411
2412 let result =
2413 InternalSchema::reconcile_with_collection_config(schema, collection_config).unwrap();
2414
2415 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2417 let vector_index = embedding_override
2418 .float_list
2419 .as_ref()
2420 .unwrap()
2421 .vector_index
2422 .as_ref()
2423 .unwrap();
2424
2425 assert!(vector_index.enabled);
2426 assert_eq!(vector_index.config.space, Some(Space::L2));
2427 assert_eq!(
2428 vector_index.config.embedding_function,
2429 Some(EmbeddingFunctionConfiguration::Legacy)
2430 );
2431 assert_eq!(
2432 vector_index.config.source_key,
2433 Some(DOCUMENT_KEY.to_string())
2434 );
2435
2436 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
2437 assert_eq!(hnsw_config.ef_construction, Some(300));
2438 assert_eq!(hnsw_config.max_neighbors, Some(32));
2439 assert_eq!(hnsw_config.ef_search, Some(50));
2440 assert_eq!(hnsw_config.num_threads, Some(8));
2441 assert_eq!(hnsw_config.batch_size, Some(200));
2442 assert_eq!(hnsw_config.sync_threshold, Some(2000));
2443 assert_eq!(hnsw_config.resize_factor, Some(1.5));
2444
2445 assert!(vector_index.config.spann.is_none());
2446 }
2447
2448 #[test]
2449 fn test_reconcile_with_collection_config_spann_override() {
2450 let schema = InternalSchema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
2454 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
2455 search_nprobe: 20,
2456 search_rng_factor: 3.0,
2457 search_rng_epsilon: 0.2,
2458 nreplica_count: 5,
2459 write_rng_factor: 2.0,
2460 write_rng_epsilon: 0.1,
2461 split_threshold: 2000,
2462 num_samples_kmeans: 200,
2463 initial_lambda: 0.8,
2464 reassign_neighbor_count: 100,
2465 merge_threshold: 800,
2466 num_centers_to_merge_to: 20,
2467 write_nprobe: 10,
2468 ef_construction: 400,
2469 ef_search: 60,
2470 max_neighbors: 24,
2471 space: Space::Cosine,
2472 }),
2473 embedding_function: None,
2474 };
2475
2476 let result =
2477 InternalSchema::reconcile_with_collection_config(schema, collection_config).unwrap();
2478
2479 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2481 let vector_index = embedding_override
2482 .float_list
2483 .as_ref()
2484 .unwrap()
2485 .vector_index
2486 .as_ref()
2487 .unwrap();
2488
2489 assert!(vector_index.enabled);
2490 assert_eq!(vector_index.config.space, Some(Space::Cosine));
2491 assert_eq!(vector_index.config.embedding_function, None);
2492 assert_eq!(
2493 vector_index.config.source_key,
2494 Some(DOCUMENT_KEY.to_string())
2495 );
2496
2497 assert!(vector_index.config.hnsw.is_none());
2498
2499 let spann_config = vector_index.config.spann.as_ref().unwrap();
2500 assert_eq!(spann_config.search_nprobe, Some(20));
2501 assert_eq!(spann_config.search_rng_factor, Some(3.0));
2502 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
2503 assert_eq!(spann_config.nreplica_count, Some(5));
2504 assert_eq!(spann_config.write_rng_factor, Some(2.0));
2505 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
2506 assert_eq!(spann_config.split_threshold, Some(2000));
2507 assert_eq!(spann_config.num_samples_kmeans, Some(200));
2508 assert_eq!(spann_config.initial_lambda, Some(0.8));
2509 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
2510 assert_eq!(spann_config.merge_threshold, Some(800));
2511 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
2512 assert_eq!(spann_config.write_nprobe, Some(10));
2513 assert_eq!(spann_config.ef_construction, Some(400));
2514 assert_eq!(spann_config.ef_search, Some(60));
2515 assert_eq!(spann_config.max_neighbors, Some(24));
2516 }
2517
2518 #[test]
2519 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
2520 let schema = InternalSchema::new_default(KnnIndex::Hnsw);
2523
2524 let collection_config = InternalCollectionConfiguration {
2525 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
2526 ef_construction: 300,
2527 max_neighbors: 32,
2528 ef_search: 50,
2529 num_threads: 8,
2530 batch_size: 200,
2531 sync_threshold: 2000,
2532 resize_factor: 1.5,
2533 space: Space::L2,
2534 }),
2535 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
2536 };
2537
2538 let result =
2539 InternalSchema::reconcile_with_collection_config(schema, collection_config).unwrap();
2540
2541 let defaults_vector_index = result
2543 .defaults
2544 .float_list
2545 .as_ref()
2546 .unwrap()
2547 .vector_index
2548 .as_ref()
2549 .unwrap();
2550
2551 assert!(!defaults_vector_index.enabled);
2553 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
2555 assert_eq!(
2556 defaults_vector_index.config.embedding_function,
2557 Some(EmbeddingFunctionConfiguration::Legacy)
2558 );
2559 assert_eq!(
2560 defaults_vector_index.config.source_key,
2561 Some(DOCUMENT_KEY.to_string())
2562 );
2563 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
2564 assert_eq!(defaults_hnsw.ef_construction, Some(300));
2565 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
2566
2567 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
2569 let embedding_vector_index = embedding_override
2570 .float_list
2571 .as_ref()
2572 .unwrap()
2573 .vector_index
2574 .as_ref()
2575 .unwrap();
2576
2577 assert!(embedding_vector_index.enabled);
2579 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
2581 assert_eq!(
2582 embedding_vector_index.config.embedding_function,
2583 Some(EmbeddingFunctionConfiguration::Legacy)
2584 );
2585 assert_eq!(
2586 embedding_vector_index.config.source_key,
2587 Some(DOCUMENT_KEY.to_string())
2588 );
2589 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
2590 assert_eq!(embedding_hnsw.ef_construction, Some(300));
2591 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
2592 }
2593
2594 #[test]
2595 fn test_is_schema_default() {
2596 let default_hnsw_schema = InternalSchema::new_default(KnnIndex::Hnsw);
2598 assert!(InternalSchema::is_schema_default(&default_hnsw_schema));
2599
2600 let default_spann_schema = InternalSchema::new_default(KnnIndex::Spann);
2601 assert!(InternalSchema::is_schema_default(&default_spann_schema));
2602
2603 let empty_schema = InternalSchema {
2605 defaults: ValueTypes::default(),
2606 keys: HashMap::new(),
2607 };
2608 assert!(!InternalSchema::is_schema_default(&empty_schema));
2609
2610 let mut modified_schema = InternalSchema::new_default(KnnIndex::Hnsw);
2612 if let Some(ref mut string_type) = modified_schema.defaults.string {
2614 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
2615 string_inverted.enabled = false; }
2617 }
2618 assert!(!InternalSchema::is_schema_default(&modified_schema));
2619
2620 let mut schema_with_extra_overrides = InternalSchema::new_default(KnnIndex::Hnsw);
2622 schema_with_extra_overrides
2623 .keys
2624 .insert("custom_key".to_string(), ValueTypes::default());
2625 assert!(!InternalSchema::is_schema_default(
2626 &schema_with_extra_overrides
2627 ));
2628 }
2629
2630 #[test]
2631 fn test_add_merges_keys_by_value_type() {
2632 let mut schema_a = InternalSchema::new_default(KnnIndex::Hnsw);
2633 let mut schema_b = InternalSchema::new_default(KnnIndex::Hnsw);
2634
2635 let string_override = ValueTypes {
2636 string: Some(StringValueType {
2637 string_inverted_index: Some(StringInvertedIndexType {
2638 enabled: true,
2639 config: StringInvertedIndexConfig {},
2640 }),
2641 fts_index: None,
2642 }),
2643 ..Default::default()
2644 };
2645 schema_a
2646 .keys
2647 .insert("custom_field".to_string(), string_override);
2648
2649 let float_override = ValueTypes {
2650 float: Some(FloatValueType {
2651 float_inverted_index: Some(FloatInvertedIndexType {
2652 enabled: true,
2653 config: FloatInvertedIndexConfig {},
2654 }),
2655 }),
2656 ..Default::default()
2657 };
2658 schema_b
2659 .keys
2660 .insert("custom_field".to_string(), float_override);
2661
2662 let merged = schema_a.merge(&schema_b).unwrap();
2663 let merged_override = merged.keys.get("custom_field").unwrap();
2664
2665 assert!(merged_override.string.is_some());
2666 assert!(merged_override.float.is_some());
2667 assert!(
2668 merged_override
2669 .string
2670 .as_ref()
2671 .unwrap()
2672 .string_inverted_index
2673 .as_ref()
2674 .unwrap()
2675 .enabled
2676 );
2677 assert!(
2678 merged_override
2679 .float
2680 .as_ref()
2681 .unwrap()
2682 .float_inverted_index
2683 .as_ref()
2684 .unwrap()
2685 .enabled
2686 );
2687 }
2688
2689 #[test]
2690 fn test_add_rejects_different_defaults() {
2691 let schema_a = InternalSchema::new_default(KnnIndex::Hnsw);
2692 let mut schema_b = InternalSchema::new_default(KnnIndex::Hnsw);
2693
2694 if let Some(string_type) = schema_b.defaults.string.as_mut() {
2695 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
2696 string_index.enabled = false;
2697 }
2698 }
2699
2700 let err = schema_a.merge(&schema_b).unwrap_err();
2701 match err {
2702 SchemaError::InvalidSchema { reason } => {
2703 assert_eq!(reason, "Cannot merge schemas with differing defaults")
2704 }
2705 _ => panic!("Expected InvalidSchema error"),
2706 }
2707 }
2708
2709 #[test]
2710 fn test_add_detects_conflicting_value_type_configuration() {
2711 let mut schema_a = InternalSchema::new_default(KnnIndex::Hnsw);
2712 let mut schema_b = InternalSchema::new_default(KnnIndex::Hnsw);
2713
2714 let string_override_enabled = ValueTypes {
2715 string: Some(StringValueType {
2716 string_inverted_index: Some(StringInvertedIndexType {
2717 enabled: true,
2718 config: StringInvertedIndexConfig {},
2719 }),
2720 fts_index: None,
2721 }),
2722 ..Default::default()
2723 };
2724 schema_a
2725 .keys
2726 .insert("custom_field".to_string(), string_override_enabled);
2727
2728 let string_override_disabled = ValueTypes {
2729 string: Some(StringValueType {
2730 string_inverted_index: Some(StringInvertedIndexType {
2731 enabled: false,
2732 config: StringInvertedIndexConfig {},
2733 }),
2734 fts_index: None,
2735 }),
2736 ..Default::default()
2737 };
2738 schema_b
2739 .keys
2740 .insert("custom_field".to_string(), string_override_disabled);
2741
2742 let err = schema_a.merge(&schema_b).unwrap_err();
2743 match err {
2744 SchemaError::InvalidSchema { reason } => {
2745 assert!(reason.contains("Conflicting configuration"));
2746 }
2747 _ => panic!("Expected InvalidSchema error"),
2748 }
2749 }
2750
2751 #[test]
2753 fn test_backward_compatibility_aliases() {
2754 let old_format_json = r###"{
2756 "defaults": {
2757 "#string": {
2758 "$fts_index": {
2759 "enabled": true,
2760 "config": {}
2761 }
2762 },
2763 "#int": {
2764 "$int_inverted_index": {
2765 "enabled": true,
2766 "config": {}
2767 }
2768 },
2769 "#float_list": {
2770 "$vector_index": {
2771 "enabled": true,
2772 "config": {
2773 "spann": {
2774 "search_nprobe": 10
2775 }
2776 }
2777 }
2778 }
2779 },
2780 "key_overrides": {
2781 "#document": {
2782 "#string": {
2783 "$fts_index": {
2784 "enabled": false,
2785 "config": {}
2786 }
2787 }
2788 }
2789 }
2790 }"###;
2791
2792 let schema_from_old: InternalSchema = serde_json::from_str(old_format_json).unwrap();
2793
2794 let new_format_json = r###"{
2796 "defaults": {
2797 "string": {
2798 "fts_index": {
2799 "enabled": true,
2800 "config": {}
2801 }
2802 },
2803 "int": {
2804 "int_inverted_index": {
2805 "enabled": true,
2806 "config": {}
2807 }
2808 },
2809 "float_list": {
2810 "vector_index": {
2811 "enabled": true,
2812 "config": {
2813 "spann": {
2814 "search_nprobe": 10
2815 }
2816 }
2817 }
2818 }
2819 },
2820 "keys": {
2821 "#document": {
2822 "string": {
2823 "fts_index": {
2824 "enabled": false,
2825 "config": {}
2826 }
2827 }
2828 }
2829 }
2830 }"###;
2831
2832 let schema_from_new: InternalSchema = serde_json::from_str(new_format_json).unwrap();
2833
2834 assert_eq!(schema_from_old, schema_from_new);
2836
2837 assert!(schema_from_old.defaults.string.is_some());
2839 assert!(schema_from_old
2840 .defaults
2841 .string
2842 .as_ref()
2843 .unwrap()
2844 .fts_index
2845 .is_some());
2846 assert!(
2847 schema_from_old
2848 .defaults
2849 .string
2850 .as_ref()
2851 .unwrap()
2852 .fts_index
2853 .as_ref()
2854 .unwrap()
2855 .enabled
2856 );
2857
2858 assert!(schema_from_old.defaults.int.is_some());
2859 assert!(schema_from_old
2860 .defaults
2861 .int
2862 .as_ref()
2863 .unwrap()
2864 .int_inverted_index
2865 .is_some());
2866
2867 assert!(schema_from_old.defaults.float_list.is_some());
2868 assert!(schema_from_old
2869 .defaults
2870 .float_list
2871 .as_ref()
2872 .unwrap()
2873 .vector_index
2874 .is_some());
2875
2876 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
2877 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
2878 assert!(doc_override.string.is_some());
2879 assert!(
2880 !doc_override
2881 .string
2882 .as_ref()
2883 .unwrap()
2884 .fts_index
2885 .as_ref()
2886 .unwrap()
2887 .enabled
2888 );
2889
2890 let serialized = serde_json::to_string(&schema_from_old).unwrap();
2892
2893 assert!(serialized.contains(r#""keys":"#));
2895 assert!(serialized.contains(r#""string":"#));
2896 assert!(serialized.contains(r#""fts_index":"#));
2897 assert!(serialized.contains(r#""int_inverted_index":"#));
2898 assert!(serialized.contains(r#""vector_index":"#));
2899
2900 assert!(!serialized.contains(r#""key_overrides":"#));
2902 assert!(!serialized.contains(r###""#string":"###));
2903 assert!(!serialized.contains(r###""$fts_index":"###));
2904 assert!(!serialized.contains(r###""$int_inverted_index":"###));
2905 assert!(!serialized.contains(r###""$vector_index":"###));
2906 }
2907}