1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11 EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12 UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18 default_batch_size, default_center_drift_threshold, default_construction_ef,
19 default_construction_ef_spann, default_initial_lambda, default_m, default_m_spann,
20 default_merge_threshold, default_nreplica_count, default_num_centers_to_merge_to,
21 default_num_samples_kmeans, default_num_threads, default_reassign_neighbor_count,
22 default_resize_factor, default_search_ef, default_search_ef_spann, default_search_nprobe,
23 default_search_rng_epsilon, default_search_rng_factor, default_space, default_split_threshold,
24 default_sync_threshold, default_write_nprobe, default_write_rng_epsilon,
25 default_write_rng_factor, ConversionError, HnswParametersFromSegmentError,
26 InternalHnswConfiguration, InternalSpannConfiguration, InternalUpdateCollectionConfiguration,
27 KnnIndex, Segment, UpdateCollectionConfiguration, CHROMA_KEY,
28};
29
30impl ChromaError for SchemaError {
31 fn code(&self) -> ErrorCodes {
32 match self {
33 SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
36 SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
37 SchemaError::DefaultsMismatch => ErrorCodes::Internal,
40 SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
41 SchemaError::InvalidConfigurationUpdate { .. } => ErrorCodes::Internal,
42
43 SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
46 SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
47 SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
48 SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
49 SchemaError::Builder(e) => e.code(),
50 }
51 }
52}
53
54#[derive(Debug, Error)]
55pub enum SchemaError {
56 #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
57 MissingIndexConfiguration { key: String, value_type: String },
58 #[error("Schema reconciliation failed: {reason}")]
59 InvalidSchema { reason: String },
60 #[error("Cannot set both collection config and schema simultaneously")]
61 ConfigAndSchemaConflict,
62 #[error("Cannot merge schemas with differing defaults")]
63 DefaultsMismatch,
64 #[error("Conflicting configuration for {context}")]
65 ConfigurationConflict { context: String },
66 #[error("Invalid HNSW configuration: {0}")]
67 InvalidHnswConfig(validator::ValidationErrors),
68 #[error("Invalid SPANN configuration: {0}")]
69 InvalidSpannConfig(validator::ValidationErrors),
70 #[error("Invalid schema input: {reason}")]
71 InvalidUserInput { reason: String },
72 #[error("Invalid configuration update: {message}")]
73 InvalidConfigurationUpdate { message: String },
74 #[error(transparent)]
75 Builder(#[from] SchemaBuilderError),
76}
77
78#[derive(Debug, Error)]
79pub enum SchemaBuilderError {
80 #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
81 VectorIndexMustBeGlobal { key: String },
82 #[error("Cannot modify special key '{key}' - it is managed automatically by the system.")]
83 SpecialKeyModificationNotAllowed { key: String },
84 #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
85 SparseVectorRequiresKey,
86 #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
87 MultipleSparseVectorIndexes { existing_key: String },
88 #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
89 VectorIndexDeletionNotSupported,
90 #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91 SparseVectorIndexDeletionNotSupported,
92 #[error(
93 "Key '{key}' cannot begin with '#'. Keys starting with '#' are reserved for system use."
94 )]
95 ReservedKeyPrefix { key: String },
96 #[error("FTS index deletion is only supported on #document key.")]
97 FtsIndexDeletionOnlyOnDocument,
98 #[error("FTS index can only be enabled on #document key. Use create_index(Some(\"#document\"), FtsIndexConfig) to enable FTS.")]
99 FtsIndexOnlyOnDocument,
100}
101
102#[derive(Debug, Error)]
103pub enum FilterValidationError {
104 #[error(
105 "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
106 )]
107 IndexingDisabled {
108 key: String,
109 value_type: MetadataValueType,
110 },
111 #[error("Cannot filter using full-text search because FTS indexing is disabled")]
112 FtsDisabled,
113 #[error(transparent)]
114 Schema(#[from] SchemaError),
115}
116
117impl ChromaError for SchemaBuilderError {
118 fn code(&self) -> ErrorCodes {
119 ErrorCodes::InvalidArgument
120 }
121}
122
123impl ChromaError for FilterValidationError {
124 fn code(&self) -> ErrorCodes {
125 match self {
126 FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
127 FilterValidationError::FtsDisabled => ErrorCodes::InvalidArgument,
128 FilterValidationError::Schema(_) => ErrorCodes::Internal,
129 }
130 }
131}
132
133pub const STRING_VALUE_NAME: &str = "string";
140pub const INT_VALUE_NAME: &str = "int";
141pub const BOOL_VALUE_NAME: &str = "bool";
142pub const FLOAT_VALUE_NAME: &str = "float";
143pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
144pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
145
146pub const FTS_INDEX_NAME: &str = "fts_index";
148pub const VECTOR_INDEX_NAME: &str = "vector_index";
149pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
150pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
151pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
152pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
153pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
154
155pub const DOCUMENT_KEY: &str = "#document";
157pub const EMBEDDING_KEY: &str = "#embedding";
158
159static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
161 Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
162 .expect("The CMEK pattern for GCP should be valid")
163});
164
165#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
170#[serde(rename_all = "snake_case")]
171pub enum Cmek {
172 Gcp(Arc<String>),
176}
177
178impl Cmek {
179 pub fn gcp(resource: String) -> Self {
189 Cmek::Gcp(Arc::new(resource))
190 }
191
192 pub fn validate_pattern(&self) -> bool {
198 match self {
199 Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
200 }
201 }
202}
203
204impl TryFrom<chroma_proto::Cmek> for Cmek {
205 type Error = ConversionError;
206
207 fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
208 match proto.provider {
209 Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
210 None => Err(ConversionError::DecodeError),
211 }
212 }
213}
214
215impl From<Cmek> for chroma_proto::Cmek {
216 fn from(cmek: Cmek) -> Self {
217 match cmek {
218 Cmek::Gcp(resource) => chroma_proto::Cmek {
219 provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
220 },
221 }
222 }
223}
224
225#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
234#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
235pub struct Schema {
236 pub defaults: ValueTypes,
238 #[serde(rename = "keys", alias = "key_overrides")]
241 pub keys: HashMap<String, ValueTypes>,
242 #[serde(skip_serializing_if = "Option::is_none")]
244 #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
245 pub cmek: Option<Cmek>,
246 #[serde(skip_serializing_if = "Option::is_none")]
248 pub source_attached_function_id: Option<String>,
249}
250
251impl Schema {
252 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
253 if let Some(vector_update) = &configuration.vector_index {
254 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
255 Self::apply_vector_index_update(default_vector_index, vector_update);
256 }
257 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
258 Self::apply_vector_index_update(embedding_vector_index, vector_update);
259 }
260 }
261
262 if let Some(embedding_function) = configuration.embedding_function.as_ref() {
263 if let Some(default_vector_index) = self.defaults_vector_index_mut() {
264 default_vector_index.config.embedding_function = Some(embedding_function.clone());
265 }
266 if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
267 embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
268 }
269 }
270 }
271
272 pub fn apply_update_configuration(
282 &mut self,
283 config: &UpdateCollectionConfiguration,
284 ) -> Result<(), SchemaError> {
285 if config.hnsw.is_some() {
287 return Err(SchemaError::InvalidConfigurationUpdate {
288 message: "HNSW configuration updates are not supported".to_string(),
289 });
290 }
291
292 if let Some(ref spann_update) = config.spann {
294 let defaults_spann = self
295 .defaults_vector_index_mut()
296 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
297 message: "schema missing defaults.float_list.vector_index".to_string(),
298 })?
299 .config
300 .spann
301 .as_mut()
302 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
303 message: "schema missing defaults spann config".to_string(),
304 })?;
305
306 if let Some(search_nprobe) = spann_update.search_nprobe {
307 defaults_spann.search_nprobe = Some(search_nprobe);
308 }
309 if let Some(ef_search) = spann_update.ef_search {
310 defaults_spann.ef_search = Some(ef_search);
311 }
312
313 let embedding_spann = self
314 .embedding_vector_index_mut()
315 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
316 message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
317 })?
318 .config
319 .spann
320 .as_mut()
321 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
322 message: "schema missing #embedding spann config".to_string(),
323 })?;
324
325 if let Some(search_nprobe) = spann_update.search_nprobe {
326 embedding_spann.search_nprobe = Some(search_nprobe);
327 }
328 if let Some(ef_search) = spann_update.ef_search {
329 embedding_spann.ef_search = Some(ef_search);
330 }
331 }
332
333 if let Some(ref ef) = config.embedding_function {
335 self.defaults_vector_index_mut()
336 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
337 message: "schema missing defaults.float_list.vector_index".to_string(),
338 })?
339 .config
340 .embedding_function = Some(ef.clone());
341
342 self.embedding_vector_index_mut()
343 .ok_or_else(|| SchemaError::InvalidConfigurationUpdate {
344 message: "schema missing keys[#embedding].float_list.vector_index".to_string(),
345 })?
346 .config
347 .embedding_function = Some(ef.clone());
348 }
349
350 Ok(())
351 }
352
353 fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
354 self.defaults
355 .float_list
356 .as_mut()
357 .and_then(|float_list| float_list.vector_index.as_mut())
358 }
359
360 fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
361 self.keys
362 .get_mut(EMBEDDING_KEY)
363 .and_then(|value_types| value_types.float_list.as_mut())
364 .and_then(|float_list| float_list.vector_index.as_mut())
365 }
366
367 fn apply_vector_index_update(
368 vector_index: &mut VectorIndexType,
369 update: &UpdateVectorIndexConfiguration,
370 ) {
371 match update {
372 UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
373 if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
374 if let Some(ef_search) = hnsw_update.ef_search {
375 hnsw_config.ef_search = Some(ef_search);
376 }
377 if let Some(max_neighbors) = hnsw_update.max_neighbors {
378 hnsw_config.max_neighbors = Some(max_neighbors);
379 }
380 if let Some(num_threads) = hnsw_update.num_threads {
381 hnsw_config.num_threads = Some(num_threads);
382 }
383 if let Some(resize_factor) = hnsw_update.resize_factor {
384 hnsw_config.resize_factor = Some(resize_factor);
385 }
386 if let Some(sync_threshold) = hnsw_update.sync_threshold {
387 hnsw_config.sync_threshold = Some(sync_threshold);
388 }
389 if let Some(batch_size) = hnsw_update.batch_size {
390 hnsw_config.batch_size = Some(batch_size);
391 }
392 }
393 }
394 UpdateVectorIndexConfiguration::Hnsw(None) => {}
395 UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
396 if let Some(spann_config) = vector_index.config.spann.as_mut() {
397 if let Some(search_nprobe) = spann_update.search_nprobe {
398 spann_config.search_nprobe = Some(search_nprobe);
399 }
400 if let Some(ef_search) = spann_update.ef_search {
401 spann_config.ef_search = Some(ef_search);
402 }
403 }
404 }
405 UpdateVectorIndexConfiguration::Spann(None) => {}
406 }
407 }
408
409 pub fn is_sparse_index_enabled(&self) -> bool {
410 let defaults_enabled = self
411 .defaults
412 .sparse_vector
413 .as_ref()
414 .and_then(|sv| sv.sparse_vector_index.as_ref())
415 .is_some_and(|idx| idx.enabled);
416 let key_enabled = self.keys.values().any(|value_types| {
417 value_types
418 .sparse_vector
419 .as_ref()
420 .and_then(|sv| sv.sparse_vector_index.as_ref())
421 .is_some_and(|idx| idx.enabled)
422 });
423 defaults_enabled || key_enabled
424 }
425
426 pub fn is_fts_enabled(&self) -> bool {
427 self.keys
429 .get(DOCUMENT_KEY)
430 .and_then(|vt| vt.string.as_ref())
431 .and_then(|s| s.fts_index.as_ref())
432 .or_else(|| {
433 self.defaults
434 .string
435 .as_ref()
436 .and_then(|s| s.fts_index.as_ref())
437 })
438 .is_none_or(|idx| idx.enabled)
439 }
440}
441
442impl Default for Schema {
443 fn default() -> Self {
460 let defaults = ValueTypes {
462 string: Some(StringValueType {
463 fts_index: Some(FtsIndexType {
464 enabled: false,
465 config: FtsIndexConfig {},
466 }),
467 string_inverted_index: Some(StringInvertedIndexType {
468 enabled: true,
469 config: StringInvertedIndexConfig {},
470 }),
471 }),
472 float_list: Some(FloatListValueType {
473 vector_index: Some(VectorIndexType {
474 enabled: false,
475 config: VectorIndexConfig {
476 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
478 source_key: None,
479 hnsw: None, spann: None, },
482 }),
483 }),
484 sparse_vector: Some(SparseVectorValueType {
485 sparse_vector_index: Some(SparseVectorIndexType {
486 enabled: false,
487 config: SparseVectorIndexConfig {
488 embedding_function: None,
489 source_key: None,
490 bm25: None,
491 },
492 }),
493 }),
494 int: Some(IntValueType {
495 int_inverted_index: Some(IntInvertedIndexType {
496 enabled: true,
497 config: IntInvertedIndexConfig {},
498 }),
499 }),
500 float: Some(FloatValueType {
501 float_inverted_index: Some(FloatInvertedIndexType {
502 enabled: true,
503 config: FloatInvertedIndexConfig {},
504 }),
505 }),
506 boolean: Some(BoolValueType {
507 bool_inverted_index: Some(BoolInvertedIndexType {
508 enabled: true,
509 config: BoolInvertedIndexConfig {},
510 }),
511 }),
512 };
513
514 let mut keys = HashMap::new();
516
517 keys.insert(
519 DOCUMENT_KEY.to_string(),
520 ValueTypes {
521 string: Some(StringValueType {
522 fts_index: Some(FtsIndexType {
523 enabled: true,
524 config: FtsIndexConfig {},
525 }),
526 string_inverted_index: Some(StringInvertedIndexType {
527 enabled: false,
528 config: StringInvertedIndexConfig {},
529 }),
530 }),
531 ..Default::default()
532 },
533 );
534
535 keys.insert(
537 EMBEDDING_KEY.to_string(),
538 ValueTypes {
539 float_list: Some(FloatListValueType {
540 vector_index: Some(VectorIndexType {
541 enabled: true,
542 config: VectorIndexConfig {
543 space: None, embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
545 source_key: Some(DOCUMENT_KEY.to_string()),
546 hnsw: None, spann: None, },
549 }),
550 }),
551 ..Default::default()
552 },
553 );
554
555 Schema {
556 defaults,
557 keys,
558 cmek: None,
559 source_attached_function_id: None,
560 }
561 }
562}
563
564pub fn is_embedding_function_default(
565 embedding_function: &Option<EmbeddingFunctionConfiguration>,
566) -> bool {
567 match embedding_function {
568 None => true,
569 Some(embedding_function) => embedding_function.is_default(),
570 }
571}
572
573pub fn is_space_default(space: &Option<Space>) -> bool {
575 match space {
576 None => true, Some(s) => *s == default_space(), }
579}
580
581pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
583 hnsw_config.ef_construction == Some(default_construction_ef())
584 && hnsw_config.ef_search == Some(default_search_ef())
585 && hnsw_config.max_neighbors == Some(default_m())
586 && hnsw_config.num_threads == Some(default_num_threads())
587 && hnsw_config.batch_size == Some(default_batch_size())
588 && hnsw_config.sync_threshold == Some(default_sync_threshold())
589 && hnsw_config.resize_factor == Some(default_resize_factor())
590}
591
592#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
599#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
600pub struct ValueTypes {
601 #[serde(
602 rename = "string",
603 alias = "#string",
604 skip_serializing_if = "Option::is_none"
605 )] pub string: Option<StringValueType>,
607
608 #[serde(
609 rename = "float_list",
610 alias = "#float_list",
611 skip_serializing_if = "Option::is_none"
612 )]
613 pub float_list: Option<FloatListValueType>,
615
616 #[serde(
617 rename = "sparse_vector",
618 alias = "#sparse_vector",
619 skip_serializing_if = "Option::is_none"
620 )]
621 pub sparse_vector: Option<SparseVectorValueType>,
623
624 #[serde(
625 rename = "int",
626 alias = "#int",
627 skip_serializing_if = "Option::is_none"
628 )] pub int: Option<IntValueType>,
630
631 #[serde(
632 rename = "float",
633 alias = "#float",
634 skip_serializing_if = "Option::is_none"
635 )] pub float: Option<FloatValueType>,
637
638 #[serde(
639 rename = "bool",
640 alias = "#bool",
641 skip_serializing_if = "Option::is_none"
642 )] pub boolean: Option<BoolValueType>,
644}
645
646#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
648#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
649pub struct StringValueType {
650 #[serde(
651 rename = "fts_index",
652 alias = "$fts_index",
653 skip_serializing_if = "Option::is_none"
654 )] pub fts_index: Option<FtsIndexType>,
656
657 #[serde(
658 rename = "string_inverted_index", alias = "$string_inverted_index",
660 skip_serializing_if = "Option::is_none"
661 )]
662 pub string_inverted_index: Option<StringInvertedIndexType>,
663}
664
665#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
667#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
668pub struct FloatListValueType {
669 #[serde(
670 rename = "vector_index",
671 alias = "$vector_index",
672 skip_serializing_if = "Option::is_none"
673 )] pub vector_index: Option<VectorIndexType>,
675}
676
677#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
679#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
680pub struct SparseVectorValueType {
681 #[serde(
682 rename = "sparse_vector_index", alias = "$sparse_vector_index",
684 skip_serializing_if = "Option::is_none"
685 )]
686 pub sparse_vector_index: Option<SparseVectorIndexType>,
687}
688
689#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
691#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
692pub struct IntValueType {
693 #[serde(
694 rename = "int_inverted_index",
695 alias = "$int_inverted_index",
696 skip_serializing_if = "Option::is_none"
697 )]
698 pub int_inverted_index: Option<IntInvertedIndexType>,
700}
701
702#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
704#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
705pub struct FloatValueType {
706 #[serde(
707 rename = "float_inverted_index", alias = "$float_inverted_index",
709 skip_serializing_if = "Option::is_none"
710 )]
711 pub float_inverted_index: Option<FloatInvertedIndexType>,
712}
713
714#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
716#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
717pub struct BoolValueType {
718 #[serde(
719 rename = "bool_inverted_index", alias = "$bool_inverted_index",
721 skip_serializing_if = "Option::is_none"
722 )]
723 pub bool_inverted_index: Option<BoolInvertedIndexType>,
724}
725
726#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
728#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
729pub struct FtsIndexType {
730 pub enabled: bool,
731 pub config: FtsIndexConfig,
732}
733
734#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
735#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
736pub struct VectorIndexType {
737 pub enabled: bool,
738 pub config: VectorIndexConfig,
739}
740
741#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
742#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
743pub struct SparseVectorIndexType {
744 pub enabled: bool,
745 pub config: SparseVectorIndexConfig,
746}
747
748#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
749#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
750pub struct StringInvertedIndexType {
751 pub enabled: bool,
752 pub config: StringInvertedIndexConfig,
753}
754
755#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
756#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
757pub struct IntInvertedIndexType {
758 pub enabled: bool,
759 pub config: IntInvertedIndexConfig,
760}
761
762#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
763#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
764pub struct FloatInvertedIndexType {
765 pub enabled: bool,
766 pub config: FloatInvertedIndexConfig,
767}
768
769#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
770#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
771pub struct BoolInvertedIndexType {
772 pub enabled: bool,
773 pub config: BoolInvertedIndexConfig,
774}
775
776impl Schema {
777 pub fn new_default(default_knn_index: KnnIndex) -> Self {
779 let vector_config = VectorIndexType {
781 enabled: false,
782 config: VectorIndexConfig {
783 space: Some(default_space()),
784 embedding_function: None,
785 source_key: None,
786 hnsw: match default_knn_index {
787 KnnIndex::Hnsw => Some(HnswIndexConfig {
788 ef_construction: Some(default_construction_ef()),
789 max_neighbors: Some(default_m()),
790 ef_search: Some(default_search_ef()),
791 num_threads: Some(default_num_threads()),
792 batch_size: Some(default_batch_size()),
793 sync_threshold: Some(default_sync_threshold()),
794 resize_factor: Some(default_resize_factor()),
795 }),
796 KnnIndex::Spann => None,
797 },
798 spann: match default_knn_index {
799 KnnIndex::Hnsw => None,
800 KnnIndex::Spann => Some(SpannIndexConfig {
801 search_nprobe: Some(default_search_nprobe()),
802 search_rng_factor: Some(default_search_rng_factor()),
803 search_rng_epsilon: Some(default_search_rng_epsilon()),
804 nreplica_count: Some(default_nreplica_count()),
805 write_rng_factor: Some(default_write_rng_factor()),
806 write_rng_epsilon: Some(default_write_rng_epsilon()),
807 split_threshold: Some(default_split_threshold()),
808 num_samples_kmeans: Some(default_num_samples_kmeans()),
809 initial_lambda: Some(default_initial_lambda()),
810 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
811 merge_threshold: Some(default_merge_threshold()),
812 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
813 write_nprobe: Some(default_write_nprobe()),
814 ef_construction: Some(default_construction_ef_spann()),
815 ef_search: Some(default_search_ef_spann()),
816 max_neighbors: Some(default_m_spann()),
817 center_drift_threshold: None,
818 quantize: Quantization::None,
819 }),
820 },
821 },
822 };
823
824 let defaults = ValueTypes {
826 string: Some(StringValueType {
827 string_inverted_index: Some(StringInvertedIndexType {
828 enabled: true,
829 config: StringInvertedIndexConfig {},
830 }),
831 fts_index: Some(FtsIndexType {
832 enabled: false,
833 config: FtsIndexConfig {},
834 }),
835 }),
836 float: Some(FloatValueType {
837 float_inverted_index: Some(FloatInvertedIndexType {
838 enabled: true,
839 config: FloatInvertedIndexConfig {},
840 }),
841 }),
842 int: Some(IntValueType {
843 int_inverted_index: Some(IntInvertedIndexType {
844 enabled: true,
845 config: IntInvertedIndexConfig {},
846 }),
847 }),
848 boolean: Some(BoolValueType {
849 bool_inverted_index: Some(BoolInvertedIndexType {
850 enabled: true,
851 config: BoolInvertedIndexConfig {},
852 }),
853 }),
854 float_list: Some(FloatListValueType {
855 vector_index: Some(vector_config),
856 }),
857 sparse_vector: Some(SparseVectorValueType {
858 sparse_vector_index: Some(SparseVectorIndexType {
859 enabled: false,
860 config: SparseVectorIndexConfig {
861 embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
862 source_key: None,
863 bm25: Some(false),
864 },
865 }),
866 }),
867 };
868
869 let mut keys = HashMap::new();
871
872 let embedding_defaults = ValueTypes {
874 float_list: Some(FloatListValueType {
875 vector_index: Some(VectorIndexType {
876 enabled: true,
877 config: VectorIndexConfig {
878 space: Some(default_space()),
879 embedding_function: None,
880 source_key: Some(DOCUMENT_KEY.to_string()),
881 hnsw: match default_knn_index {
882 KnnIndex::Hnsw => Some(HnswIndexConfig {
883 ef_construction: Some(default_construction_ef()),
884 max_neighbors: Some(default_m()),
885 ef_search: Some(default_search_ef()),
886 num_threads: Some(default_num_threads()),
887 batch_size: Some(default_batch_size()),
888 sync_threshold: Some(default_sync_threshold()),
889 resize_factor: Some(default_resize_factor()),
890 }),
891 KnnIndex::Spann => None,
892 },
893 spann: match default_knn_index {
894 KnnIndex::Hnsw => None,
895 KnnIndex::Spann => Some(SpannIndexConfig {
896 search_nprobe: Some(default_search_nprobe()),
897 search_rng_factor: Some(default_search_rng_factor()),
898 search_rng_epsilon: Some(default_search_rng_epsilon()),
899 nreplica_count: Some(default_nreplica_count()),
900 write_rng_factor: Some(default_write_rng_factor()),
901 write_rng_epsilon: Some(default_write_rng_epsilon()),
902 split_threshold: Some(default_split_threshold()),
903 num_samples_kmeans: Some(default_num_samples_kmeans()),
904 initial_lambda: Some(default_initial_lambda()),
905 reassign_neighbor_count: Some(default_reassign_neighbor_count()),
906 merge_threshold: Some(default_merge_threshold()),
907 num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
908 write_nprobe: Some(default_write_nprobe()),
909 ef_construction: Some(default_construction_ef_spann()),
910 ef_search: Some(default_search_ef_spann()),
911 max_neighbors: Some(default_m_spann()),
912 center_drift_threshold: None,
913 quantize: Quantization::None,
914 }),
915 },
916 },
917 }),
918 }),
919 ..Default::default()
920 };
921 keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
922
923 let document_defaults = ValueTypes {
925 string: Some(StringValueType {
926 fts_index: Some(FtsIndexType {
927 enabled: true,
928 config: FtsIndexConfig {},
929 }),
930 string_inverted_index: Some(StringInvertedIndexType {
931 enabled: false,
932 config: StringInvertedIndexConfig {},
933 }),
934 }),
935 ..Default::default()
936 };
937 keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
938
939 Schema {
940 defaults,
941 keys,
942 cmek: None,
943 source_attached_function_id: None,
944 }
945 }
946
947 pub fn get_spann_config(&self) -> Option<(SpannIndexConfig, Space)> {
948 let extract = |vector_index: &VectorIndexType| {
949 let space = vector_index.config.space.clone().unwrap_or_default();
950 vector_index
951 .config
952 .spann
953 .clone()
954 .map(|config| (config, space))
955 };
956
957 self.keys
958 .get(EMBEDDING_KEY)
959 .and_then(|value_types| value_types.float_list.as_ref())
960 .and_then(|float_list| float_list.vector_index.as_ref())
961 .and_then(extract)
962 .or_else(|| {
963 self.defaults
964 .float_list
965 .as_ref()
966 .and_then(|float_list| float_list.vector_index.as_ref())
967 .and_then(extract)
968 })
969 }
970
971 pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
972 let to_internal = |vector_index: &VectorIndexType| {
973 let space = vector_index.config.space.clone();
974 vector_index
975 .config
976 .spann
977 .clone()
978 .map(|config| (space.as_ref(), &config).into())
979 };
980
981 self.keys
982 .get(EMBEDDING_KEY)
983 .and_then(|value_types| value_types.float_list.as_ref())
984 .and_then(|float_list| float_list.vector_index.as_ref())
985 .and_then(to_internal)
986 .or_else(|| {
987 self.defaults
988 .float_list
989 .as_ref()
990 .and_then(|float_list| float_list.vector_index.as_ref())
991 .and_then(to_internal)
992 })
993 }
994
995 pub fn is_quantization_enabled(&self) -> bool {
997 let check_spann = |vector_index: &VectorIndexType| {
998 vector_index
999 .config
1000 .spann
1001 .as_ref()
1002 .is_some_and(|config| !matches!(config.quantize, Quantization::None))
1003 };
1004
1005 self.keys
1006 .get(EMBEDDING_KEY)
1007 .and_then(|value_types| value_types.float_list.as_ref())
1008 .and_then(|float_list| float_list.vector_index.as_ref())
1009 .map(check_spann)
1010 .unwrap_or_else(|| {
1011 self.defaults
1012 .float_list
1013 .as_ref()
1014 .and_then(|float_list| float_list.vector_index.as_ref())
1015 .map(check_spann)
1016 .unwrap_or(false)
1017 })
1018 }
1019
1020 pub fn get_spann_config_mut(&mut self) -> Option<&mut SpannIndexConfig> {
1023 if let Some(value_types) = self.keys.get_mut(EMBEDDING_KEY) {
1025 if let Some(float_list) = &mut value_types.float_list {
1026 if let Some(vector_index) = &mut float_list.vector_index {
1027 if let Some(spann_config) = &mut vector_index.config.spann {
1028 return Some(spann_config);
1029 }
1030 }
1031 }
1032 }
1033
1034 if let Some(float_list) = &mut self.defaults.float_list {
1036 if let Some(vector_index) = &mut float_list.vector_index {
1037 if let Some(spann_config) = &mut vector_index.config.spann {
1038 return Some(spann_config);
1039 }
1040 }
1041 }
1042
1043 None
1044 }
1045
1046 pub fn quantize(&mut self, variant: Quantization) {
1052 if let Some(spann_config) = self.get_spann_config_mut() {
1053 *spann_config = match variant {
1054 Quantization::None => SpannIndexConfig {
1055 quantize: variant,
1056 ..*spann_config
1057 },
1058 Quantization::FourBitRabitQWithUSearch => SpannIndexConfig {
1059 search_nprobe: Some(64),
1060 nreplica_count: Some(2),
1061 write_rng_factor: Some(4.0),
1062 write_rng_epsilon: Some(8.0),
1063 split_threshold: Some(512),
1064 reassign_neighbor_count: Some(32),
1065 merge_threshold: Some(128),
1066 write_nprobe: Some(64),
1067 ef_construction: Some(256),
1068 ef_search: Some(128),
1069 max_neighbors: Some(24),
1070 center_drift_threshold: Some(0.125),
1071 quantize: variant,
1072 ..*spann_config
1073 },
1074 };
1075 }
1076 }
1077
1078 pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
1079 let to_internal = |vector_index: &VectorIndexType| {
1080 if vector_index.config.spann.is_some() {
1081 return None;
1082 }
1083 let space = vector_index.config.space.as_ref();
1084 let hnsw_config = vector_index.config.hnsw.as_ref();
1085 Some((space, hnsw_config).into())
1086 };
1087
1088 self.keys
1089 .get(EMBEDDING_KEY)
1090 .and_then(|value_types| value_types.float_list.as_ref())
1091 .and_then(|float_list| float_list.vector_index.as_ref())
1092 .and_then(to_internal)
1093 .or_else(|| {
1094 self.defaults
1095 .float_list
1096 .as_ref()
1097 .and_then(|float_list| float_list.vector_index.as_ref())
1098 .and_then(to_internal)
1099 })
1100 }
1101
1102 pub fn get_internal_hnsw_config_with_legacy_fallback(
1103 &self,
1104 segment: &Segment,
1105 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
1106 if let Some(config) = self.get_internal_hnsw_config() {
1107 let config_from_metadata =
1108 InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
1109
1110 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
1111 return Ok(Some(config_from_metadata));
1112 }
1113
1114 return Ok(Some(config));
1115 }
1116
1117 Ok(None)
1118 }
1119
1120 pub fn reconcile_with_defaults(
1127 user_schema: Option<&Schema>,
1128 knn_index: KnnIndex,
1129 ) -> Result<Self, SchemaError> {
1130 let default_schema = Schema::new_default(knn_index);
1131
1132 match user_schema {
1133 Some(user) => {
1134 let merged_defaults =
1136 Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
1137
1138 let mut merged_keys = default_schema.keys.clone();
1140 for (key, user_value_types) in &user.keys {
1141 if let Some(default_value_types) = merged_keys.get(key) {
1142 let merged_value_types = Self::merge_value_types(
1144 default_value_types,
1145 user_value_types,
1146 knn_index,
1147 )?;
1148 merged_keys.insert(key.clone(), merged_value_types);
1149 } else {
1150 merged_keys.insert(key.clone(), user_value_types.clone());
1152 }
1153 }
1154
1155 Ok(Schema {
1156 defaults: merged_defaults,
1157 keys: merged_keys,
1158 cmek: user.cmek.clone().or(default_schema.cmek.clone()),
1159 source_attached_function_id: user
1160 .source_attached_function_id
1161 .clone()
1162 .or(default_schema.source_attached_function_id.clone()),
1163 })
1164 }
1165 None => Ok(default_schema),
1166 }
1167 }
1168
1169 pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
1171 if self.defaults != other.defaults {
1172 return Err(SchemaError::DefaultsMismatch);
1173 }
1174
1175 let mut keys = self.keys.clone();
1176
1177 for (key, other_value_types) in &other.keys {
1178 if let Some(existing) = keys.get(key).cloned() {
1179 let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
1180 keys.insert(key.clone(), merged);
1181 } else {
1182 keys.insert(key.clone(), other_value_types.clone());
1183 }
1184 }
1185
1186 Ok(Schema {
1187 defaults: self.defaults.clone(),
1188 keys,
1189 cmek: other.cmek.clone().or(self.cmek.clone()),
1190 source_attached_function_id: other
1191 .source_attached_function_id
1192 .clone()
1193 .or(self.source_attached_function_id.clone()),
1194 })
1195 }
1196
1197 fn merge_override_value_types(
1198 key: &str,
1199 left: &ValueTypes,
1200 right: &ValueTypes,
1201 ) -> Result<ValueTypes, SchemaError> {
1202 Ok(ValueTypes {
1203 string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
1204 float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
1205 int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
1206 boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
1207 float_list: Self::merge_float_list_override(
1208 key,
1209 left.float_list.as_ref(),
1210 right.float_list.as_ref(),
1211 )?,
1212 sparse_vector: Self::merge_sparse_vector_override(
1213 key,
1214 left.sparse_vector.as_ref(),
1215 right.sparse_vector.as_ref(),
1216 )?,
1217 })
1218 }
1219
1220 fn merge_string_override(
1221 key: &str,
1222 left: Option<&StringValueType>,
1223 right: Option<&StringValueType>,
1224 ) -> Result<Option<StringValueType>, SchemaError> {
1225 match (left, right) {
1226 (Some(l), Some(r)) => Ok(Some(StringValueType {
1227 string_inverted_index: Self::merge_index_or_error(
1228 l.string_inverted_index.as_ref(),
1229 r.string_inverted_index.as_ref(),
1230 &format!("key '{key}' string.string_inverted_index"),
1231 )?,
1232 fts_index: Self::merge_index_or_error(
1233 l.fts_index.as_ref(),
1234 r.fts_index.as_ref(),
1235 &format!("key '{key}' string.fts_index"),
1236 )?,
1237 })),
1238 (Some(l), None) => Ok(Some(l.clone())),
1239 (None, Some(r)) => Ok(Some(r.clone())),
1240 (None, None) => Ok(None),
1241 }
1242 }
1243
1244 fn merge_float_override(
1245 key: &str,
1246 left: Option<&FloatValueType>,
1247 right: Option<&FloatValueType>,
1248 ) -> Result<Option<FloatValueType>, SchemaError> {
1249 match (left, right) {
1250 (Some(l), Some(r)) => Ok(Some(FloatValueType {
1251 float_inverted_index: Self::merge_index_or_error(
1252 l.float_inverted_index.as_ref(),
1253 r.float_inverted_index.as_ref(),
1254 &format!("key '{key}' float.float_inverted_index"),
1255 )?,
1256 })),
1257 (Some(l), None) => Ok(Some(l.clone())),
1258 (None, Some(r)) => Ok(Some(r.clone())),
1259 (None, None) => Ok(None),
1260 }
1261 }
1262
1263 fn merge_int_override(
1264 key: &str,
1265 left: Option<&IntValueType>,
1266 right: Option<&IntValueType>,
1267 ) -> Result<Option<IntValueType>, SchemaError> {
1268 match (left, right) {
1269 (Some(l), Some(r)) => Ok(Some(IntValueType {
1270 int_inverted_index: Self::merge_index_or_error(
1271 l.int_inverted_index.as_ref(),
1272 r.int_inverted_index.as_ref(),
1273 &format!("key '{key}' int.int_inverted_index"),
1274 )?,
1275 })),
1276 (Some(l), None) => Ok(Some(l.clone())),
1277 (None, Some(r)) => Ok(Some(r.clone())),
1278 (None, None) => Ok(None),
1279 }
1280 }
1281
1282 fn merge_bool_override(
1283 key: &str,
1284 left: Option<&BoolValueType>,
1285 right: Option<&BoolValueType>,
1286 ) -> Result<Option<BoolValueType>, SchemaError> {
1287 match (left, right) {
1288 (Some(l), Some(r)) => Ok(Some(BoolValueType {
1289 bool_inverted_index: Self::merge_index_or_error(
1290 l.bool_inverted_index.as_ref(),
1291 r.bool_inverted_index.as_ref(),
1292 &format!("key '{key}' bool.bool_inverted_index"),
1293 )?,
1294 })),
1295 (Some(l), None) => Ok(Some(l.clone())),
1296 (None, Some(r)) => Ok(Some(r.clone())),
1297 (None, None) => Ok(None),
1298 }
1299 }
1300
1301 fn merge_float_list_override(
1302 key: &str,
1303 left: Option<&FloatListValueType>,
1304 right: Option<&FloatListValueType>,
1305 ) -> Result<Option<FloatListValueType>, SchemaError> {
1306 match (left, right) {
1307 (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1308 vector_index: Self::merge_index_or_error(
1309 l.vector_index.as_ref(),
1310 r.vector_index.as_ref(),
1311 &format!("key '{key}' float_list.vector_index"),
1312 )?,
1313 })),
1314 (Some(l), None) => Ok(Some(l.clone())),
1315 (None, Some(r)) => Ok(Some(r.clone())),
1316 (None, None) => Ok(None),
1317 }
1318 }
1319
1320 fn merge_sparse_vector_override(
1321 key: &str,
1322 left: Option<&SparseVectorValueType>,
1323 right: Option<&SparseVectorValueType>,
1324 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1325 match (left, right) {
1326 (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1327 sparse_vector_index: Self::merge_index_or_error(
1328 l.sparse_vector_index.as_ref(),
1329 r.sparse_vector_index.as_ref(),
1330 &format!("key '{key}' sparse_vector.sparse_vector_index"),
1331 )?,
1332 })),
1333 (Some(l), None) => Ok(Some(l.clone())),
1334 (None, Some(r)) => Ok(Some(r.clone())),
1335 (None, None) => Ok(None),
1336 }
1337 }
1338
1339 fn merge_index_or_error<T: Clone + PartialEq>(
1340 left: Option<&T>,
1341 right: Option<&T>,
1342 context: &str,
1343 ) -> Result<Option<T>, SchemaError> {
1344 match (left, right) {
1345 (Some(l), Some(r)) => {
1346 if l == r {
1347 Ok(Some(l.clone()))
1348 } else {
1349 Err(SchemaError::ConfigurationConflict {
1350 context: context.to_string(),
1351 })
1352 }
1353 }
1354 (Some(l), None) => Ok(Some(l.clone())),
1355 (None, Some(r)) => Ok(Some(r.clone())),
1356 (None, None) => Ok(None),
1357 }
1358 }
1359
1360 fn merge_value_types(
1363 default: &ValueTypes,
1364 user: &ValueTypes,
1365 knn_index: KnnIndex,
1366 ) -> Result<ValueTypes, SchemaError> {
1367 let float_list = Self::merge_float_list_type(
1369 default.float_list.as_ref(),
1370 user.float_list.as_ref(),
1371 knn_index,
1372 )?;
1373
1374 if let Some(ref fl) = float_list {
1376 Self::validate_float_list_value_type(fl)?;
1377 }
1378
1379 Ok(ValueTypes {
1380 string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1381 float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1382 int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1383 boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1384 float_list,
1385 sparse_vector: Self::merge_sparse_vector_type(
1386 default.sparse_vector.as_ref(),
1387 user.sparse_vector.as_ref(),
1388 )?,
1389 })
1390 }
1391
1392 fn merge_string_type(
1394 default: Option<&StringValueType>,
1395 user: Option<&StringValueType>,
1396 ) -> Result<Option<StringValueType>, SchemaError> {
1397 match (default, user) {
1398 (Some(default), Some(user)) => Ok(Some(StringValueType {
1399 string_inverted_index: Self::merge_string_inverted_index_type(
1400 default.string_inverted_index.as_ref(),
1401 user.string_inverted_index.as_ref(),
1402 )?,
1403 fts_index: Self::merge_fts_index_type(
1404 default.fts_index.as_ref(),
1405 user.fts_index.as_ref(),
1406 )?,
1407 })),
1408 (Some(default), None) => Ok(Some(default.clone())),
1409 (None, Some(user)) => Ok(Some(user.clone())),
1410 (None, None) => Ok(None),
1411 }
1412 }
1413
1414 fn merge_float_type(
1416 default: Option<&FloatValueType>,
1417 user: Option<&FloatValueType>,
1418 ) -> Result<Option<FloatValueType>, SchemaError> {
1419 match (default, user) {
1420 (Some(default), Some(user)) => Ok(Some(FloatValueType {
1421 float_inverted_index: Self::merge_float_inverted_index_type(
1422 default.float_inverted_index.as_ref(),
1423 user.float_inverted_index.as_ref(),
1424 )?,
1425 })),
1426 (Some(default), None) => Ok(Some(default.clone())),
1427 (None, Some(user)) => Ok(Some(user.clone())),
1428 (None, None) => Ok(None),
1429 }
1430 }
1431
1432 fn merge_int_type(
1434 default: Option<&IntValueType>,
1435 user: Option<&IntValueType>,
1436 ) -> Result<Option<IntValueType>, SchemaError> {
1437 match (default, user) {
1438 (Some(default), Some(user)) => Ok(Some(IntValueType {
1439 int_inverted_index: Self::merge_int_inverted_index_type(
1440 default.int_inverted_index.as_ref(),
1441 user.int_inverted_index.as_ref(),
1442 )?,
1443 })),
1444 (Some(default), None) => Ok(Some(default.clone())),
1445 (None, Some(user)) => Ok(Some(user.clone())),
1446 (None, None) => Ok(None),
1447 }
1448 }
1449
1450 fn merge_bool_type(
1452 default: Option<&BoolValueType>,
1453 user: Option<&BoolValueType>,
1454 ) -> Result<Option<BoolValueType>, SchemaError> {
1455 match (default, user) {
1456 (Some(default), Some(user)) => Ok(Some(BoolValueType {
1457 bool_inverted_index: Self::merge_bool_inverted_index_type(
1458 default.bool_inverted_index.as_ref(),
1459 user.bool_inverted_index.as_ref(),
1460 )?,
1461 })),
1462 (Some(default), None) => Ok(Some(default.clone())),
1463 (None, Some(user)) => Ok(Some(user.clone())),
1464 (None, None) => Ok(None),
1465 }
1466 }
1467
1468 fn merge_float_list_type(
1470 default: Option<&FloatListValueType>,
1471 user: Option<&FloatListValueType>,
1472 knn_index: KnnIndex,
1473 ) -> Result<Option<FloatListValueType>, SchemaError> {
1474 match (default, user) {
1475 (Some(default), Some(user)) => Ok(Some(FloatListValueType {
1476 vector_index: Self::merge_vector_index_type(
1477 default.vector_index.as_ref(),
1478 user.vector_index.as_ref(),
1479 knn_index,
1480 )?,
1481 })),
1482 (Some(default), None) => Ok(Some(default.clone())),
1483 (None, Some(user)) => Ok(Some(user.clone())),
1484 (None, None) => Ok(None),
1485 }
1486 }
1487
1488 fn merge_sparse_vector_type(
1490 default: Option<&SparseVectorValueType>,
1491 user: Option<&SparseVectorValueType>,
1492 ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1493 match (default, user) {
1494 (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1495 sparse_vector_index: Self::merge_sparse_vector_index_type(
1496 default.sparse_vector_index.as_ref(),
1497 user.sparse_vector_index.as_ref(),
1498 )?,
1499 })),
1500 (Some(default), None) => Ok(Some(default.clone())),
1501 (None, Some(user)) => Ok(Some(user.clone())),
1502 (None, None) => Ok(None),
1503 }
1504 }
1505
1506 fn merge_string_inverted_index_type(
1508 default: Option<&StringInvertedIndexType>,
1509 user: Option<&StringInvertedIndexType>,
1510 ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1511 match (default, user) {
1512 (Some(_default), Some(user)) => {
1513 Ok(Some(StringInvertedIndexType {
1514 enabled: user.enabled, config: user.config.clone(), }))
1517 }
1518 (Some(default), None) => Ok(Some(default.clone())),
1519 (None, Some(user)) => Ok(Some(user.clone())),
1520 (None, None) => Ok(None),
1521 }
1522 }
1523
1524 fn merge_fts_index_type(
1525 default: Option<&FtsIndexType>,
1526 user: Option<&FtsIndexType>,
1527 ) -> Result<Option<FtsIndexType>, SchemaError> {
1528 match (default, user) {
1529 (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1530 enabled: user.enabled,
1531 config: user.config.clone(),
1532 })),
1533 (Some(default), None) => Ok(Some(default.clone())),
1534 (None, Some(user)) => Ok(Some(user.clone())),
1535 (None, None) => Ok(None),
1536 }
1537 }
1538
1539 fn merge_float_inverted_index_type(
1540 default: Option<&FloatInvertedIndexType>,
1541 user: Option<&FloatInvertedIndexType>,
1542 ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1543 match (default, user) {
1544 (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1545 enabled: user.enabled,
1546 config: user.config.clone(),
1547 })),
1548 (Some(default), None) => Ok(Some(default.clone())),
1549 (None, Some(user)) => Ok(Some(user.clone())),
1550 (None, None) => Ok(None),
1551 }
1552 }
1553
1554 fn merge_int_inverted_index_type(
1555 default: Option<&IntInvertedIndexType>,
1556 user: Option<&IntInvertedIndexType>,
1557 ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1558 match (default, user) {
1559 (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1560 enabled: user.enabled,
1561 config: user.config.clone(),
1562 })),
1563 (Some(default), None) => Ok(Some(default.clone())),
1564 (None, Some(user)) => Ok(Some(user.clone())),
1565 (None, None) => Ok(None),
1566 }
1567 }
1568
1569 fn merge_bool_inverted_index_type(
1570 default: Option<&BoolInvertedIndexType>,
1571 user: Option<&BoolInvertedIndexType>,
1572 ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1573 match (default, user) {
1574 (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1575 enabled: user.enabled,
1576 config: user.config.clone(),
1577 })),
1578 (Some(default), None) => Ok(Some(default.clone())),
1579 (None, Some(user)) => Ok(Some(user.clone())),
1580 (None, None) => Ok(None),
1581 }
1582 }
1583
1584 fn merge_vector_index_type(
1585 default: Option<&VectorIndexType>,
1586 user: Option<&VectorIndexType>,
1587 knn_index: KnnIndex,
1588 ) -> Result<Option<VectorIndexType>, SchemaError> {
1589 match (default, user) {
1590 (Some(default), Some(user)) => Ok(Some(VectorIndexType {
1591 enabled: user.enabled,
1592 config: Self::merge_vector_index_config(&default.config, &user.config, knn_index)?,
1593 })),
1594 (Some(default), None) => Ok(Some(default.clone())),
1595 (None, Some(user)) => Ok(Some(user.clone())),
1596 (None, None) => Ok(None),
1597 }
1598 }
1599
1600 fn merge_sparse_vector_index_type(
1601 default: Option<&SparseVectorIndexType>,
1602 user: Option<&SparseVectorIndexType>,
1603 ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1604 match (default, user) {
1605 (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1606 enabled: user.enabled,
1607 config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1608 })),
1609 (Some(default), None) => Ok(Some(default.clone())),
1610 (None, Some(user)) => Ok(Some(user.clone())),
1611 (None, None) => Ok(None),
1612 }
1613 }
1614
1615 fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1618 if let Some(vector_index) = &float_list.vector_index {
1619 if let Some(hnsw) = &vector_index.config.hnsw {
1620 hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1621 }
1622 if let Some(spann) = &vector_index.config.spann {
1623 spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1624 }
1625 }
1626 Ok(())
1627 }
1628
1629 fn merge_vector_index_config(
1631 default: &VectorIndexConfig,
1632 user: &VectorIndexConfig,
1633 knn_index: KnnIndex,
1634 ) -> Result<VectorIndexConfig, SchemaError> {
1635 match knn_index {
1636 KnnIndex::Hnsw => Ok(VectorIndexConfig {
1637 space: user.space.clone().or(default.space.clone()),
1638 embedding_function: user
1639 .embedding_function
1640 .clone()
1641 .or(default.embedding_function.clone()),
1642 source_key: user.source_key.clone().or(default.source_key.clone()),
1643 hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1644 spann: None,
1645 }),
1646 KnnIndex::Spann => Ok(VectorIndexConfig {
1647 space: user.space.clone().or(default.space.clone()),
1648 embedding_function: user
1649 .embedding_function
1650 .clone()
1651 .or(default.embedding_function.clone()),
1652 source_key: user.source_key.clone().or(default.source_key.clone()),
1653 hnsw: None,
1654 spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref())?,
1655 }),
1656 }
1657 }
1658
1659 fn merge_sparse_vector_index_config(
1661 default: &SparseVectorIndexConfig,
1662 user: &SparseVectorIndexConfig,
1663 ) -> SparseVectorIndexConfig {
1664 SparseVectorIndexConfig {
1665 embedding_function: user
1666 .embedding_function
1667 .clone()
1668 .or(default.embedding_function.clone()),
1669 source_key: user.source_key.clone().or(default.source_key.clone()),
1670 bm25: user.bm25.or(default.bm25),
1671 }
1672 }
1673
1674 fn merge_hnsw_configs(
1676 default_hnsw: Option<&HnswIndexConfig>,
1677 user_hnsw: Option<&HnswIndexConfig>,
1678 ) -> Option<HnswIndexConfig> {
1679 match (default_hnsw, user_hnsw) {
1680 (Some(default), Some(user)) => Some(HnswIndexConfig {
1681 ef_construction: user.ef_construction.or(default.ef_construction),
1682 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1683 ef_search: user.ef_search.or(default.ef_search),
1684 num_threads: user.num_threads.or(default.num_threads),
1685 batch_size: user.batch_size.or(default.batch_size),
1686 sync_threshold: user.sync_threshold.or(default.sync_threshold),
1687 resize_factor: user.resize_factor.or(default.resize_factor),
1688 }),
1689 (Some(default), None) => Some(default.clone()),
1690 (None, Some(user)) => Some(user.clone()),
1691 (None, None) => None,
1692 }
1693 }
1694
1695 fn merge_spann_configs(
1697 default_spann: Option<&SpannIndexConfig>,
1698 user_spann: Option<&SpannIndexConfig>,
1699 ) -> Result<Option<SpannIndexConfig>, SchemaError> {
1700 match (default_spann, user_spann) {
1701 (Some(default), Some(user)) => {
1702 if !matches!(user.quantize, Quantization::None)
1704 || !matches!(default.quantize, Quantization::None)
1705 {
1706 return Err(SchemaError::InvalidUserInput {
1707 reason: "quantize field cannot be set in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1708 });
1709 }
1710 Ok(Some(SpannIndexConfig {
1711 search_nprobe: user.search_nprobe.or(default.search_nprobe),
1712 search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1713 search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1714 nreplica_count: user.nreplica_count.or(default.nreplica_count),
1715 write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1716 write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1717 split_threshold: user.split_threshold.or(default.split_threshold),
1718 num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1719 initial_lambda: user.initial_lambda.or(default.initial_lambda),
1720 reassign_neighbor_count: user
1721 .reassign_neighbor_count
1722 .or(default.reassign_neighbor_count),
1723 merge_threshold: user.merge_threshold.or(default.merge_threshold),
1724 num_centers_to_merge_to: user
1725 .num_centers_to_merge_to
1726 .or(default.num_centers_to_merge_to),
1727 write_nprobe: user.write_nprobe.or(default.write_nprobe),
1728 ef_construction: user.ef_construction.or(default.ef_construction),
1729 ef_search: user.ef_search.or(default.ef_search),
1730 max_neighbors: user.max_neighbors.or(default.max_neighbors),
1731 center_drift_threshold: user
1732 .center_drift_threshold
1733 .or(default.center_drift_threshold),
1734 quantize: Quantization::None, }))
1736 }
1737 (Some(default), None) => {
1738 if !matches!(default.quantize, Quantization::None) {
1740 return Err(SchemaError::InvalidUserInput {
1741 reason: "quantize field cannot be set in default schema. Quantization can only be enabled via frontend configuration.".to_string(),
1742 });
1743 }
1744 Ok(Some(default.clone()))
1745 }
1746 (None, Some(user)) => {
1747 if !matches!(user.quantize, Quantization::None) {
1749 return Err(SchemaError::InvalidUserInput {
1750 reason: "quantize field cannot be set in user schema. Quantization can only be enabled via frontend configuration.".to_string(),
1751 });
1752 }
1753 Ok(Some(user.clone()))
1754 }
1755 (None, None) => Ok(None),
1756 }
1757 }
1758
1759 pub fn reconcile_with_collection_config(
1767 schema: &Schema,
1768 collection_config: &InternalCollectionConfiguration,
1769 default_knn_index: KnnIndex,
1770 ) -> Result<Schema, SchemaError> {
1771 if collection_config.is_default() {
1773 if schema.is_default() {
1774 let mut new_schema = Schema::new_default(default_knn_index);
1777
1778 if collection_config.embedding_function.is_some() {
1779 if let Some(float_list) = &mut new_schema.defaults.float_list {
1780 if let Some(vector_index) = &mut float_list.vector_index {
1781 vector_index.config.embedding_function =
1782 collection_config.embedding_function.clone();
1783 }
1784 }
1785 if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1786 if let Some(float_list) = &mut embedding_types.float_list {
1787 if let Some(vector_index) = &mut float_list.vector_index {
1788 vector_index.config.embedding_function =
1789 collection_config.embedding_function.clone();
1790 }
1791 }
1792 }
1793 }
1794 return Ok(new_schema);
1795 } else {
1796 return Ok(schema.clone());
1798 }
1799 }
1800
1801 Self::try_from(collection_config)
1804 }
1805
1806 pub fn reconcile_schema_and_config(
1807 schema: Option<&Schema>,
1808 configuration: Option<&InternalCollectionConfiguration>,
1809 knn_index: KnnIndex,
1810 ) -> Result<Schema, SchemaError> {
1811 if let (Some(user_schema), Some(config)) = (schema, configuration) {
1813 if !user_schema.is_default() && !config.is_default() {
1814 return Err(SchemaError::ConfigAndSchemaConflict);
1815 }
1816 }
1817
1818 let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1819 if let Some(config) = configuration {
1820 Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1821 } else {
1822 Ok(reconciled_schema)
1823 }
1824 }
1825
1826 pub fn default_with_embedding_function(
1827 embedding_function: EmbeddingFunctionConfiguration,
1828 ) -> Schema {
1829 let mut schema = Schema::new_default(KnnIndex::Spann);
1830 if let Some(float_list) = &mut schema.defaults.float_list {
1831 if let Some(vector_index) = &mut float_list.vector_index {
1832 vector_index.config.embedding_function = Some(embedding_function.clone());
1833 }
1834 }
1835 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1836 if let Some(float_list) = &mut embedding_types.float_list {
1837 if let Some(vector_index) = &mut float_list.vector_index {
1838 vector_index.config.embedding_function = Some(embedding_function);
1839 }
1840 }
1841 }
1842 schema
1843 }
1844
1845 pub fn is_default(&self) -> bool {
1847 if !Self::is_value_types_default(&self.defaults) {
1849 return false;
1850 }
1851
1852 for key in self.keys.keys() {
1853 if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1854 return false;
1855 }
1856 }
1857
1858 if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1860 if !Self::is_embedding_value_types_default(embedding_value) {
1861 return false;
1862 }
1863 }
1864
1865 if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1867 if !Self::is_document_value_types_default(document_value) {
1868 return false;
1869 }
1870 }
1871
1872 if self.cmek.is_some() {
1874 return false;
1875 }
1876
1877 true
1878 }
1879
1880 fn is_value_types_default(value_types: &ValueTypes) -> bool {
1882 if let Some(string) = &value_types.string {
1884 if let Some(string_inverted) = &string.string_inverted_index {
1885 if !string_inverted.enabled {
1886 return false;
1887 }
1888 }
1890 if let Some(fts) = &string.fts_index {
1891 if fts.enabled {
1892 return false;
1893 }
1894 }
1896 }
1897
1898 if let Some(float) = &value_types.float {
1900 if let Some(float_inverted) = &float.float_inverted_index {
1901 if !float_inverted.enabled {
1902 return false;
1903 }
1904 }
1906 }
1907
1908 if let Some(int) = &value_types.int {
1910 if let Some(int_inverted) = &int.int_inverted_index {
1911 if !int_inverted.enabled {
1912 return false;
1913 }
1914 }
1916 }
1917
1918 if let Some(boolean) = &value_types.boolean {
1920 if let Some(bool_inverted) = &boolean.bool_inverted_index {
1921 if !bool_inverted.enabled {
1922 return false;
1923 }
1924 }
1926 }
1927
1928 if let Some(float_list) = &value_types.float_list {
1930 if let Some(vector_index) = &float_list.vector_index {
1931 if vector_index.enabled {
1932 return false;
1933 }
1934 if !is_embedding_function_default(&vector_index.config.embedding_function) {
1935 return false;
1936 }
1937 if !is_space_default(&vector_index.config.space) {
1938 return false;
1939 }
1940 if vector_index.config.source_key.is_some() {
1942 return false;
1943 }
1944 match (&vector_index.config.hnsw, &vector_index.config.spann) {
1947 (Some(hnsw_config), None) => {
1948 if !hnsw_config.is_default() {
1949 return false;
1950 }
1951 }
1952 (None, Some(spann_config)) => {
1953 if !spann_config.is_default() {
1954 return false;
1955 }
1956 }
1957 (Some(_), Some(_)) => return false, (None, None) => {}
1959 }
1960 }
1961 }
1962
1963 if let Some(sparse_vector) = &value_types.sparse_vector {
1965 if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1966 if sparse_index.enabled {
1967 return false;
1968 }
1969 if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1971 return false;
1972 }
1973 if sparse_index.config.source_key.is_some() {
1974 return false;
1975 }
1976 if let Some(bm25) = &sparse_index.config.bm25 {
1977 if bm25 != &false {
1978 return false;
1979 }
1980 }
1981 }
1982 }
1983
1984 true
1985 }
1986
1987 fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1989 if value_types.string.is_some()
1991 || value_types.float.is_some()
1992 || value_types.int.is_some()
1993 || value_types.boolean.is_some()
1994 || value_types.sparse_vector.is_some()
1995 {
1996 return false;
1997 }
1998
1999 if let Some(float_list) = &value_types.float_list {
2001 if let Some(vector_index) = &float_list.vector_index {
2002 if !vector_index.enabled {
2003 return false;
2004 }
2005 if !is_space_default(&vector_index.config.space) {
2006 return false;
2007 }
2008 if !is_embedding_function_default(&vector_index.config.embedding_function) {
2010 return false;
2011 }
2012 if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
2014 return false;
2015 }
2016 match (&vector_index.config.hnsw, &vector_index.config.spann) {
2019 (Some(hnsw_config), None) => {
2020 if !hnsw_config.is_default() {
2021 return false;
2022 }
2023 }
2024 (None, Some(spann_config)) => {
2025 if !spann_config.is_default() {
2026 return false;
2027 }
2028 }
2029 (Some(_), Some(_)) => return false, (None, None) => {}
2031 }
2032 }
2033 }
2034
2035 true
2036 }
2037
2038 fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
2040 if value_types.float_list.is_some()
2042 || value_types.float.is_some()
2043 || value_types.int.is_some()
2044 || value_types.boolean.is_some()
2045 || value_types.sparse_vector.is_some()
2046 {
2047 return false;
2048 }
2049
2050 if let Some(string) = &value_types.string {
2052 if let Some(fts) = &string.fts_index {
2053 if !fts.enabled {
2054 return false;
2055 }
2056 }
2058 if let Some(string_inverted) = &string.string_inverted_index {
2059 if string_inverted.enabled {
2060 return false;
2061 }
2062 }
2064 }
2065
2066 true
2067 }
2068
2069 pub fn is_metadata_type_index_enabled(
2071 &self,
2072 key: &str,
2073 value_type: MetadataValueType,
2074 ) -> Result<bool, SchemaError> {
2075 let v_type = self.keys.get(key).unwrap_or(&self.defaults);
2076
2077 match value_type {
2078 MetadataValueType::Bool => match &v_type.boolean {
2079 Some(bool_type) => match &bool_type.bool_inverted_index {
2080 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2081 None => Err(SchemaError::MissingIndexConfiguration {
2082 key: key.to_string(),
2083 value_type: "bool".to_string(),
2084 }),
2085 },
2086 None => match &self.defaults.boolean {
2087 Some(bool_type) => match &bool_type.bool_inverted_index {
2088 Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
2089 None => Err(SchemaError::MissingIndexConfiguration {
2090 key: key.to_string(),
2091 value_type: "bool".to_string(),
2092 }),
2093 },
2094 None => Err(SchemaError::MissingIndexConfiguration {
2095 key: key.to_string(),
2096 value_type: "bool".to_string(),
2097 }),
2098 },
2099 },
2100 MetadataValueType::Int => match &v_type.int {
2101 Some(int_type) => match &int_type.int_inverted_index {
2102 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2103 None => Err(SchemaError::MissingIndexConfiguration {
2104 key: key.to_string(),
2105 value_type: "int".to_string(),
2106 }),
2107 },
2108 None => match &self.defaults.int {
2109 Some(int_type) => match &int_type.int_inverted_index {
2110 Some(int_inverted_index) => Ok(int_inverted_index.enabled),
2111 None => Err(SchemaError::MissingIndexConfiguration {
2112 key: key.to_string(),
2113 value_type: "int".to_string(),
2114 }),
2115 },
2116 None => Err(SchemaError::MissingIndexConfiguration {
2117 key: key.to_string(),
2118 value_type: "int".to_string(),
2119 }),
2120 },
2121 },
2122 MetadataValueType::Float => match &v_type.float {
2123 Some(float_type) => match &float_type.float_inverted_index {
2124 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2125 None => Err(SchemaError::MissingIndexConfiguration {
2126 key: key.to_string(),
2127 value_type: "float".to_string(),
2128 }),
2129 },
2130 None => match &self.defaults.float {
2131 Some(float_type) => match &float_type.float_inverted_index {
2132 Some(float_inverted_index) => Ok(float_inverted_index.enabled),
2133 None => Err(SchemaError::MissingIndexConfiguration {
2134 key: key.to_string(),
2135 value_type: "float".to_string(),
2136 }),
2137 },
2138 None => Err(SchemaError::MissingIndexConfiguration {
2139 key: key.to_string(),
2140 value_type: "float".to_string(),
2141 }),
2142 },
2143 },
2144 MetadataValueType::Str => match &v_type.string {
2145 Some(string_type) => match &string_type.string_inverted_index {
2146 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2147 None => Err(SchemaError::MissingIndexConfiguration {
2148 key: key.to_string(),
2149 value_type: "string".to_string(),
2150 }),
2151 },
2152 None => match &self.defaults.string {
2153 Some(string_type) => match &string_type.string_inverted_index {
2154 Some(string_inverted_index) => Ok(string_inverted_index.enabled),
2155 None => Err(SchemaError::MissingIndexConfiguration {
2156 key: key.to_string(),
2157 value_type: "string".to_string(),
2158 }),
2159 },
2160 None => Err(SchemaError::MissingIndexConfiguration {
2161 key: key.to_string(),
2162 value_type: "string".to_string(),
2163 }),
2164 },
2165 },
2166 MetadataValueType::SparseVector => match &v_type.sparse_vector {
2167 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2168 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2169 None => Err(SchemaError::MissingIndexConfiguration {
2170 key: key.to_string(),
2171 value_type: "sparse_vector".to_string(),
2172 }),
2173 },
2174 None => match &self.defaults.sparse_vector {
2175 Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
2176 Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
2177 None => Err(SchemaError::MissingIndexConfiguration {
2178 key: key.to_string(),
2179 value_type: "sparse_vector".to_string(),
2180 }),
2181 },
2182 None => Err(SchemaError::MissingIndexConfiguration {
2183 key: key.to_string(),
2184 value_type: "sparse_vector".to_string(),
2185 }),
2186 },
2187 },
2188 MetadataValueType::BoolArray => {
2190 self.is_metadata_type_index_enabled(key, MetadataValueType::Bool)
2191 }
2192 MetadataValueType::IntArray => {
2193 self.is_metadata_type_index_enabled(key, MetadataValueType::Int)
2194 }
2195 MetadataValueType::FloatArray => {
2196 self.is_metadata_type_index_enabled(key, MetadataValueType::Float)
2197 }
2198 MetadataValueType::StringArray => {
2199 self.is_metadata_type_index_enabled(key, MetadataValueType::Str)
2200 }
2201 }
2202 }
2203
2204 pub fn is_metadata_where_indexing_enabled(
2205 &self,
2206 where_clause: &Where,
2207 ) -> Result<(), FilterValidationError> {
2208 match where_clause {
2209 Where::Composite(composite) => {
2210 for child in &composite.children {
2211 self.is_metadata_where_indexing_enabled(child)?;
2212 }
2213 Ok(())
2214 }
2215 Where::Document(_) => {
2216 if !self.is_fts_enabled() {
2217 return Err(FilterValidationError::FtsDisabled);
2218 }
2219 Ok(())
2220 }
2221 Where::Metadata(expression) => {
2222 let value_type = match &expression.comparison {
2223 MetadataComparison::Primitive(_, value) => value.value_type(),
2224 MetadataComparison::Set(_, set_value) => set_value.value_type(),
2225 MetadataComparison::ArrayContains(_, value) => value.value_type(),
2226 };
2227 let is_enabled = self
2228 .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
2229 .map_err(FilterValidationError::Schema)?;
2230 if !is_enabled {
2231 return Err(FilterValidationError::IndexingDisabled {
2232 key: expression.key.clone(),
2233 value_type,
2234 });
2235 }
2236 Ok(())
2237 }
2238 }
2239 }
2240
2241 pub fn is_knn_key_indexing_enabled(
2242 &self,
2243 key: &str,
2244 query: &QueryVector,
2245 ) -> Result<(), FilterValidationError> {
2246 match query {
2247 QueryVector::Sparse(_) => {
2248 let is_enabled = self
2249 .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
2250 .map_err(FilterValidationError::Schema)?;
2251 if !is_enabled {
2252 return Err(FilterValidationError::IndexingDisabled {
2253 key: key.to_string(),
2254 value_type: MetadataValueType::SparseVector,
2255 });
2256 }
2257 Ok(())
2258 }
2259 QueryVector::Dense(_) => {
2260 Ok(())
2263 }
2264 }
2265 }
2266
2267 pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2268 if key.starts_with(CHROMA_KEY) {
2269 return false;
2270 }
2271 let value_types = self.keys.entry(key.to_string()).or_default();
2272 match value_type {
2273 MetadataValueType::Bool => {
2274 if value_types.boolean.is_none() {
2275 value_types.boolean = self.defaults.boolean.clone();
2276 return true;
2277 }
2278 }
2279 MetadataValueType::Int => {
2280 if value_types.int.is_none() {
2281 value_types.int = self.defaults.int.clone();
2282 return true;
2283 }
2284 }
2285 MetadataValueType::Float => {
2286 if value_types.float.is_none() {
2287 value_types.float = self.defaults.float.clone();
2288 return true;
2289 }
2290 }
2291 MetadataValueType::Str => {
2292 if value_types.string.is_none() {
2293 value_types.string = self.defaults.string.clone();
2294 return true;
2295 }
2296 }
2297 MetadataValueType::SparseVector => {
2298 if value_types.sparse_vector.is_none() {
2299 value_types.sparse_vector = self.defaults.sparse_vector.clone();
2300 return true;
2301 }
2302 }
2303 MetadataValueType::BoolArray => {
2305 if value_types.boolean.is_none() {
2306 value_types.boolean = self.defaults.boolean.clone();
2307 return true;
2308 }
2309 }
2310 MetadataValueType::IntArray => {
2311 if value_types.int.is_none() {
2312 value_types.int = self.defaults.int.clone();
2313 return true;
2314 }
2315 }
2316 MetadataValueType::FloatArray => {
2317 if value_types.float.is_none() {
2318 value_types.float = self.defaults.float.clone();
2319 return true;
2320 }
2321 }
2322 MetadataValueType::StringArray => {
2323 if value_types.string.is_none() {
2324 value_types.string = self.defaults.string.clone();
2325 return true;
2326 }
2327 }
2328 }
2329 false
2330 }
2331
2332 pub fn create_index(
2372 mut self,
2373 key: Option<&str>,
2374 config: IndexConfig,
2375 ) -> Result<Self, SchemaBuilderError> {
2376 match &config {
2378 IndexConfig::Vector(cfg) => {
2379 if let Some(k) = key {
2381 return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2382 }
2383 self._set_vector_index_config_builder(cfg.clone());
2384 return Ok(self);
2385 }
2386 IndexConfig::Fts(_) => {
2387 if key != Some(DOCUMENT_KEY) {
2389 return Err(SchemaBuilderError::FtsIndexOnlyOnDocument);
2390 }
2391 }
2393 IndexConfig::SparseVector(_) => {
2394 if key.is_none() {
2396 return Err(SchemaBuilderError::SparseVectorRequiresKey);
2397 }
2398 }
2400 _ => {}
2401 }
2402
2403 if let Some(k) = key {
2405 if k == EMBEDDING_KEY {
2406 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2407 key: k.to_string(),
2408 });
2409 }
2410 if k == DOCUMENT_KEY && !matches!(config, IndexConfig::Fts(_)) {
2411 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2412 key: k.to_string(),
2413 });
2414 }
2415 if k.starts_with('#') && k != DOCUMENT_KEY {
2416 return Err(SchemaBuilderError::ReservedKeyPrefix { key: k.to_string() });
2417 }
2418 }
2419
2420 match key {
2422 Some(k) => self._set_index_for_key_builder(k, config, true)?,
2423 None => self._set_index_in_defaults_builder(config, true)?,
2424 }
2425
2426 Ok(self)
2427 }
2428
2429 pub fn delete_index(
2457 mut self,
2458 key: Option<&str>,
2459 config: IndexConfig,
2460 ) -> Result<Self, SchemaBuilderError> {
2461 match &config {
2463 IndexConfig::Vector(_) => {
2464 return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2466 }
2467 IndexConfig::Fts(_) => {
2468 if key != Some(DOCUMENT_KEY) {
2470 return Err(SchemaBuilderError::FtsIndexDeletionOnlyOnDocument);
2471 }
2472 }
2474 IndexConfig::SparseVector(_) => {
2475 return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2477 }
2478 _ => {}
2479 }
2480
2481 if let Some(k) = key {
2483 if k == EMBEDDING_KEY {
2484 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2485 key: k.to_string(),
2486 });
2487 }
2488 if k == DOCUMENT_KEY && !matches!(config, IndexConfig::Fts(_)) {
2489 return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2490 key: k.to_string(),
2491 });
2492 }
2493 if k.starts_with('#') && k != DOCUMENT_KEY {
2494 return Err(SchemaBuilderError::ReservedKeyPrefix { key: k.to_string() });
2495 }
2496 }
2497
2498 match key {
2500 Some(k) => self._set_index_for_key_builder(k, config, false)?,
2501 None => self._set_index_in_defaults_builder(config, false)?,
2502 }
2503
2504 Ok(self)
2505 }
2506
2507 pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2525 self.cmek = Some(cmek);
2526 self
2527 }
2528
2529 fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2531 if let Some(float_list) = &mut self.defaults.float_list {
2533 if let Some(vector_index) = &mut float_list.vector_index {
2534 vector_index.config = config.clone();
2535 }
2536 }
2537
2538 if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2540 if let Some(float_list) = &mut embedding_types.float_list {
2541 if let Some(vector_index) = &mut float_list.vector_index {
2542 let mut updated_config = config;
2543 updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2545 vector_index.config = updated_config;
2546 }
2547 }
2548 }
2549 }
2550
2551 fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2553 if let Some(string) = &mut self.defaults.string {
2555 if let Some(fts_index) = &mut string.fts_index {
2556 fts_index.config = config.clone();
2557 }
2558 }
2559
2560 if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2562 if let Some(string) = &mut document_types.string {
2563 if let Some(fts_index) = &mut string.fts_index {
2564 fts_index.config = config;
2565 }
2566 }
2567 }
2568 }
2569
2570 fn _set_index_for_key_builder(
2572 &mut self,
2573 key: &str,
2574 config: IndexConfig,
2575 enabled: bool,
2576 ) -> Result<(), SchemaBuilderError> {
2577 if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2579 let existing_key = self
2581 .keys
2582 .iter()
2583 .find(|(k, v)| {
2584 k.as_str() != key
2585 && v.sparse_vector
2586 .as_ref()
2587 .and_then(|sv| sv.sparse_vector_index.as_ref())
2588 .map(|idx| idx.enabled)
2589 .unwrap_or(false)
2590 })
2591 .map(|(k, _)| k.clone());
2592
2593 if let Some(existing_key) = existing_key {
2594 return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2595 }
2596 }
2597
2598 let value_types = self.keys.entry(key.to_string()).or_default();
2600
2601 match config {
2603 IndexConfig::Vector(_) => {
2604 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2605 key: key.to_string(),
2606 });
2607 }
2608 IndexConfig::Fts(cfg) => {
2609 if let Some(string) = value_types.string.as_mut() {
2611 if let Some(fts_index) = string.fts_index.as_mut() {
2612 fts_index.enabled = enabled;
2613 fts_index.config = cfg;
2614 }
2615 }
2616 }
2617 IndexConfig::SparseVector(cfg) => {
2618 value_types.sparse_vector = Some(SparseVectorValueType {
2619 sparse_vector_index: Some(SparseVectorIndexType {
2620 enabled,
2621 config: cfg,
2622 }),
2623 });
2624 }
2625 IndexConfig::StringInverted(cfg) => {
2626 if value_types.string.is_none() {
2627 value_types.string = Some(StringValueType {
2628 fts_index: None,
2629 string_inverted_index: None,
2630 });
2631 }
2632 if let Some(string) = &mut value_types.string {
2633 string.string_inverted_index = Some(StringInvertedIndexType {
2634 enabled,
2635 config: cfg,
2636 });
2637 }
2638 }
2639 IndexConfig::IntInverted(cfg) => {
2640 value_types.int = Some(IntValueType {
2641 int_inverted_index: Some(IntInvertedIndexType {
2642 enabled,
2643 config: cfg,
2644 }),
2645 });
2646 }
2647 IndexConfig::FloatInverted(cfg) => {
2648 value_types.float = Some(FloatValueType {
2649 float_inverted_index: Some(FloatInvertedIndexType {
2650 enabled,
2651 config: cfg,
2652 }),
2653 });
2654 }
2655 IndexConfig::BoolInverted(cfg) => {
2656 value_types.boolean = Some(BoolValueType {
2657 bool_inverted_index: Some(BoolInvertedIndexType {
2658 enabled,
2659 config: cfg,
2660 }),
2661 });
2662 }
2663 }
2664
2665 Ok(())
2666 }
2667
2668 fn _set_index_in_defaults_builder(
2670 &mut self,
2671 config: IndexConfig,
2672 enabled: bool,
2673 ) -> Result<(), SchemaBuilderError> {
2674 match config {
2675 IndexConfig::Vector(_) => {
2676 return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2677 key: "defaults".to_string(),
2678 });
2679 }
2680 IndexConfig::Fts(_) => {
2681 return Err(SchemaBuilderError::FtsIndexOnlyOnDocument);
2683 }
2684 IndexConfig::SparseVector(cfg) => {
2685 self.defaults.sparse_vector = Some(SparseVectorValueType {
2686 sparse_vector_index: Some(SparseVectorIndexType {
2687 enabled,
2688 config: cfg,
2689 }),
2690 });
2691 }
2692 IndexConfig::StringInverted(cfg) => {
2693 if self.defaults.string.is_none() {
2694 self.defaults.string = Some(StringValueType {
2695 fts_index: None,
2696 string_inverted_index: None,
2697 });
2698 }
2699 if let Some(string) = &mut self.defaults.string {
2700 string.string_inverted_index = Some(StringInvertedIndexType {
2701 enabled,
2702 config: cfg,
2703 });
2704 }
2705 }
2706 IndexConfig::IntInverted(cfg) => {
2707 self.defaults.int = Some(IntValueType {
2708 int_inverted_index: Some(IntInvertedIndexType {
2709 enabled,
2710 config: cfg,
2711 }),
2712 });
2713 }
2714 IndexConfig::FloatInverted(cfg) => {
2715 self.defaults.float = Some(FloatValueType {
2716 float_inverted_index: Some(FloatInvertedIndexType {
2717 enabled,
2718 config: cfg,
2719 }),
2720 });
2721 }
2722 IndexConfig::BoolInverted(cfg) => {
2723 self.defaults.boolean = Some(BoolValueType {
2724 bool_inverted_index: Some(BoolInvertedIndexType {
2725 enabled,
2726 config: cfg,
2727 }),
2728 });
2729 }
2730 }
2731
2732 Ok(())
2733 }
2734}
2735
2736#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2741#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2742#[serde(deny_unknown_fields)]
2743pub struct VectorIndexConfig {
2744 #[serde(skip_serializing_if = "Option::is_none")]
2746 pub space: Option<Space>,
2747 #[serde(skip_serializing_if = "Option::is_none")]
2749 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2750 #[serde(skip_serializing_if = "Option::is_none")]
2752 pub source_key: Option<String>,
2753 #[serde(skip_serializing_if = "Option::is_none")]
2755 pub hnsw: Option<HnswIndexConfig>,
2756 #[serde(skip_serializing_if = "Option::is_none")]
2758 pub spann: Option<SpannIndexConfig>,
2759}
2760
2761#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2763#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2764#[serde(deny_unknown_fields)]
2765pub struct HnswIndexConfig {
2766 #[serde(skip_serializing_if = "Option::is_none")]
2767 pub ef_construction: Option<usize>,
2768 #[serde(skip_serializing_if = "Option::is_none")]
2769 pub max_neighbors: Option<usize>,
2770 #[serde(skip_serializing_if = "Option::is_none")]
2771 pub ef_search: Option<usize>,
2772 #[serde(skip_serializing_if = "Option::is_none")]
2773 pub num_threads: Option<usize>,
2774 #[serde(skip_serializing_if = "Option::is_none")]
2775 #[validate(range(min = 2))]
2776 pub batch_size: Option<usize>,
2777 #[serde(skip_serializing_if = "Option::is_none")]
2778 #[validate(range(min = 2))]
2779 pub sync_threshold: Option<usize>,
2780 #[serde(skip_serializing_if = "Option::is_none")]
2781 pub resize_factor: Option<f64>,
2782}
2783
2784impl HnswIndexConfig {
2785 pub fn is_default(&self) -> bool {
2789 if let Some(ef_construction) = self.ef_construction {
2790 if ef_construction != default_construction_ef() {
2791 return false;
2792 }
2793 }
2794 if let Some(max_neighbors) = self.max_neighbors {
2795 if max_neighbors != default_m() {
2796 return false;
2797 }
2798 }
2799 if let Some(ef_search) = self.ef_search {
2800 if ef_search != default_search_ef() {
2801 return false;
2802 }
2803 }
2804 if let Some(batch_size) = self.batch_size {
2805 if batch_size != default_batch_size() {
2806 return false;
2807 }
2808 }
2809 if let Some(sync_threshold) = self.sync_threshold {
2810 if sync_threshold != default_sync_threshold() {
2811 return false;
2812 }
2813 }
2814 if let Some(resize_factor) = self.resize_factor {
2815 if resize_factor != default_resize_factor() {
2816 return false;
2817 }
2818 }
2819 true
2821 }
2822}
2823
2824#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
2826#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2827#[serde(rename_all = "snake_case")]
2828pub enum Quantization {
2829 #[default]
2830 None,
2831 FourBitRabitQWithUSearch,
2832}
2833
2834fn is_default_quantization(v: &Quantization) -> bool {
2835 matches!(v, Quantization::None)
2836}
2837
2838#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2840#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2841#[serde(deny_unknown_fields)]
2842pub struct SpannIndexConfig {
2843 #[serde(skip_serializing_if = "Option::is_none")]
2844 #[validate(range(max = 128))]
2845 pub search_nprobe: Option<u32>,
2846 #[serde(skip_serializing_if = "Option::is_none")]
2847 #[validate(range(min = 1.0, max = 1.0))]
2848 pub search_rng_factor: Option<f32>,
2849 #[serde(skip_serializing_if = "Option::is_none")]
2850 #[validate(range(min = 5.0, max = 10.0))]
2851 pub search_rng_epsilon: Option<f32>,
2852 #[serde(skip_serializing_if = "Option::is_none")]
2853 #[validate(range(max = 8))]
2854 pub nreplica_count: Option<u32>,
2855 #[serde(skip_serializing_if = "Option::is_none")]
2856 #[validate(range(min = 1.0, max = 1.0))]
2857 pub write_rng_factor: Option<f32>,
2858 #[serde(skip_serializing_if = "Option::is_none")]
2859 #[validate(range(min = 5.0, max = 10.0))]
2860 pub write_rng_epsilon: Option<f32>,
2861 #[serde(skip_serializing_if = "Option::is_none")]
2862 #[validate(range(min = 50, max = 200))]
2863 pub split_threshold: Option<u32>,
2864 #[serde(skip_serializing_if = "Option::is_none")]
2865 #[validate(range(max = 1000))]
2866 pub num_samples_kmeans: Option<usize>,
2867 #[serde(skip_serializing_if = "Option::is_none")]
2868 #[validate(range(min = 100.0, max = 100.0))]
2869 pub initial_lambda: Option<f32>,
2870 #[serde(skip_serializing_if = "Option::is_none")]
2871 #[validate(range(max = 64))]
2872 pub reassign_neighbor_count: Option<u32>,
2873 #[serde(skip_serializing_if = "Option::is_none")]
2874 #[validate(range(min = 25, max = 100))]
2875 pub merge_threshold: Option<u32>,
2876 #[serde(skip_serializing_if = "Option::is_none")]
2877 #[validate(range(max = 8))]
2878 pub num_centers_to_merge_to: Option<u32>,
2879 #[serde(skip_serializing_if = "Option::is_none")]
2880 #[validate(range(max = 64))]
2881 pub write_nprobe: Option<u32>,
2882 #[serde(skip_serializing_if = "Option::is_none")]
2883 #[validate(range(max = 200))]
2884 pub ef_construction: Option<usize>,
2885 #[serde(skip_serializing_if = "Option::is_none")]
2886 #[validate(range(max = 200))]
2887 pub ef_search: Option<usize>,
2888 #[serde(skip_serializing_if = "Option::is_none")]
2889 #[validate(range(max = 64))]
2890 pub max_neighbors: Option<usize>,
2891 #[serde(skip_serializing_if = "Option::is_none")]
2892 #[validate(range(min = 0.1, max = 1.0))]
2893 pub center_drift_threshold: Option<f32>,
2894 #[serde(default, skip_serializing_if = "is_default_quantization")]
2896 pub quantize: Quantization,
2897}
2898
2899impl SpannIndexConfig {
2900 pub fn is_default(&self) -> bool {
2903 if let Some(search_nprobe) = self.search_nprobe {
2904 if search_nprobe != default_search_nprobe() {
2905 return false;
2906 }
2907 }
2908 if let Some(search_rng_factor) = self.search_rng_factor {
2909 if search_rng_factor != default_search_rng_factor() {
2910 return false;
2911 }
2912 }
2913 if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2914 if search_rng_epsilon != default_search_rng_epsilon() {
2915 return false;
2916 }
2917 }
2918 if let Some(nreplica_count) = self.nreplica_count {
2919 if nreplica_count != default_nreplica_count() {
2920 return false;
2921 }
2922 }
2923 if let Some(write_rng_factor) = self.write_rng_factor {
2924 if write_rng_factor != default_write_rng_factor() {
2925 return false;
2926 }
2927 }
2928 if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2929 if write_rng_epsilon != default_write_rng_epsilon() {
2930 return false;
2931 }
2932 }
2933 if let Some(split_threshold) = self.split_threshold {
2934 if split_threshold != default_split_threshold() {
2935 return false;
2936 }
2937 }
2938 if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2939 if num_samples_kmeans != default_num_samples_kmeans() {
2940 return false;
2941 }
2942 }
2943 if let Some(initial_lambda) = self.initial_lambda {
2944 if initial_lambda != default_initial_lambda() {
2945 return false;
2946 }
2947 }
2948 if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2949 if reassign_neighbor_count != default_reassign_neighbor_count() {
2950 return false;
2951 }
2952 }
2953 if let Some(merge_threshold) = self.merge_threshold {
2954 if merge_threshold != default_merge_threshold() {
2955 return false;
2956 }
2957 }
2958 if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2959 if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2960 return false;
2961 }
2962 }
2963 if let Some(write_nprobe) = self.write_nprobe {
2964 if write_nprobe != default_write_nprobe() {
2965 return false;
2966 }
2967 }
2968 if let Some(ef_construction) = self.ef_construction {
2969 if ef_construction != default_construction_ef_spann() {
2970 return false;
2971 }
2972 }
2973 if let Some(ef_search) = self.ef_search {
2974 if ef_search != default_search_ef_spann() {
2975 return false;
2976 }
2977 }
2978 if let Some(max_neighbors) = self.max_neighbors {
2979 if max_neighbors != default_m_spann() {
2980 return false;
2981 }
2982 }
2983 if let Some(center_drift_threshold) = self.center_drift_threshold {
2984 if center_drift_threshold != default_center_drift_threshold() {
2985 return false;
2986 }
2987 }
2988 if !matches!(self.quantize, Quantization::None) {
2989 return false;
2990 }
2991 true
2992 }
2993}
2994
2995#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2996#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2997#[serde(deny_unknown_fields)]
2998pub struct SparseVectorIndexConfig {
2999 #[serde(skip_serializing_if = "Option::is_none")]
3001 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
3002 #[serde(skip_serializing_if = "Option::is_none")]
3004 pub source_key: Option<String>,
3005 #[serde(skip_serializing_if = "Option::is_none")]
3007 pub bm25: Option<bool>,
3008}
3009
3010#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3011#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3012#[serde(deny_unknown_fields)]
3013pub struct FtsIndexConfig {
3014 }
3016
3017#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3018#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3019#[serde(deny_unknown_fields)]
3020pub struct StringInvertedIndexConfig {
3021 }
3023
3024#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3025#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3026#[serde(deny_unknown_fields)]
3027pub struct IntInvertedIndexConfig {
3028 }
3030
3031#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3032#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3033#[serde(deny_unknown_fields)]
3034pub struct FloatInvertedIndexConfig {
3035 }
3037
3038#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
3039#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
3040#[serde(deny_unknown_fields)]
3041pub struct BoolInvertedIndexConfig {
3042 }
3044
3045#[derive(Clone, Debug)]
3051#[allow(clippy::large_enum_variant)]
3052pub enum IndexConfig {
3053 Vector(VectorIndexConfig),
3054 SparseVector(SparseVectorIndexConfig),
3055 Fts(FtsIndexConfig),
3056 StringInverted(StringInvertedIndexConfig),
3057 IntInverted(IntInvertedIndexConfig),
3058 FloatInverted(FloatInvertedIndexConfig),
3059 BoolInverted(BoolInvertedIndexConfig),
3060}
3061
3062impl From<VectorIndexConfig> for IndexConfig {
3064 fn from(config: VectorIndexConfig) -> Self {
3065 IndexConfig::Vector(config)
3066 }
3067}
3068
3069impl From<SparseVectorIndexConfig> for IndexConfig {
3070 fn from(config: SparseVectorIndexConfig) -> Self {
3071 IndexConfig::SparseVector(config)
3072 }
3073}
3074
3075impl From<FtsIndexConfig> for IndexConfig {
3076 fn from(config: FtsIndexConfig) -> Self {
3077 IndexConfig::Fts(config)
3078 }
3079}
3080
3081impl From<StringInvertedIndexConfig> for IndexConfig {
3082 fn from(config: StringInvertedIndexConfig) -> Self {
3083 IndexConfig::StringInverted(config)
3084 }
3085}
3086
3087impl From<IntInvertedIndexConfig> for IndexConfig {
3088 fn from(config: IntInvertedIndexConfig) -> Self {
3089 IndexConfig::IntInverted(config)
3090 }
3091}
3092
3093impl From<FloatInvertedIndexConfig> for IndexConfig {
3094 fn from(config: FloatInvertedIndexConfig) -> Self {
3095 IndexConfig::FloatInverted(config)
3096 }
3097}
3098
3099impl From<BoolInvertedIndexConfig> for IndexConfig {
3100 fn from(config: BoolInvertedIndexConfig) -> Self {
3101 IndexConfig::BoolInverted(config)
3102 }
3103}
3104
3105impl TryFrom<&InternalCollectionConfiguration> for Schema {
3106 type Error = SchemaError;
3107
3108 fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
3109 let mut schema = match &config.vector_index {
3111 VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
3112 VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
3113 };
3114 let vector_config = match &config.vector_index {
3116 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
3117 space: Some(hnsw_config.space.clone()),
3118 embedding_function: config.embedding_function.clone(),
3119 source_key: None,
3120 hnsw: Some(HnswIndexConfig {
3121 ef_construction: Some(hnsw_config.ef_construction),
3122 max_neighbors: Some(hnsw_config.max_neighbors),
3123 ef_search: Some(hnsw_config.ef_search),
3124 num_threads: Some(hnsw_config.num_threads),
3125 batch_size: Some(hnsw_config.batch_size),
3126 sync_threshold: Some(hnsw_config.sync_threshold),
3127 resize_factor: Some(hnsw_config.resize_factor),
3128 }),
3129 spann: None,
3130 },
3131 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
3132 space: Some(spann_config.space.clone()),
3133 embedding_function: config.embedding_function.clone(),
3134 source_key: None,
3135 hnsw: None,
3136 spann: Some(SpannIndexConfig {
3137 search_nprobe: Some(spann_config.search_nprobe),
3138 search_rng_factor: Some(spann_config.search_rng_factor),
3139 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
3140 nreplica_count: Some(spann_config.nreplica_count),
3141 write_rng_factor: Some(spann_config.write_rng_factor),
3142 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
3143 split_threshold: Some(spann_config.split_threshold),
3144 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
3145 initial_lambda: Some(spann_config.initial_lambda),
3146 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
3147 merge_threshold: Some(spann_config.merge_threshold),
3148 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
3149 write_nprobe: Some(spann_config.write_nprobe),
3150 ef_construction: Some(spann_config.ef_construction),
3151 ef_search: Some(spann_config.ef_search),
3152 max_neighbors: Some(spann_config.max_neighbors),
3153 center_drift_threshold: None,
3154 quantize: Quantization::None,
3155 }),
3156 },
3157 };
3158
3159 if let Some(float_list) = &mut schema.defaults.float_list {
3162 if let Some(vector_index) = &mut float_list.vector_index {
3163 vector_index.config = vector_config.clone();
3164 }
3165 }
3166
3167 if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
3171 if let Some(float_list) = &mut embedding_types.float_list {
3172 if let Some(vector_index) = &mut float_list.vector_index {
3173 let mut vector_config = vector_config;
3174 vector_config.source_key = Some(DOCUMENT_KEY.to_string());
3175 vector_index.config = vector_config;
3176 }
3177 }
3178 }
3179
3180 Ok(schema)
3181 }
3182}
3183
3184#[cfg(test)]
3185mod tests {
3186 use super::*;
3187 use crate::hnsw_configuration::Space;
3188 use crate::metadata::SparseVector;
3189 use crate::{
3190 EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
3191 };
3192 use serde_json::json;
3193
3194 #[test]
3195 fn test_reconcile_with_defaults_none_user_schema() {
3196 let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
3198 let expected = Schema::new_default(KnnIndex::Spann);
3199 assert_eq!(result, expected);
3200 }
3201
3202 #[test]
3203 fn test_reconcile_with_defaults_empty_user_schema() {
3204 let user_schema = Schema {
3206 defaults: ValueTypes::default(),
3207 keys: HashMap::new(),
3208 cmek: None,
3209 source_attached_function_id: None,
3210 };
3211
3212 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3213 let expected = Schema::new_default(KnnIndex::Spann);
3214 assert_eq!(result, expected);
3215 }
3216
3217 #[test]
3218 fn test_reconcile_with_defaults_user_overrides_string_enabled() {
3219 let mut user_schema = Schema {
3221 defaults: ValueTypes::default(),
3222 keys: HashMap::new(),
3223 cmek: None,
3224 source_attached_function_id: None,
3225 };
3226
3227 user_schema.defaults.string = Some(StringValueType {
3228 string_inverted_index: Some(StringInvertedIndexType {
3229 enabled: false, config: StringInvertedIndexConfig {},
3231 }),
3232 fts_index: None,
3233 });
3234
3235 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3236
3237 assert!(
3239 !result
3240 .defaults
3241 .string
3242 .as_ref()
3243 .unwrap()
3244 .string_inverted_index
3245 .as_ref()
3246 .unwrap()
3247 .enabled
3248 );
3249 assert!(result.defaults.float.is_some());
3251 assert!(result.defaults.int.is_some());
3252 }
3253
3254 #[test]
3255 fn test_reconcile_with_defaults_user_overrides_vector_config() {
3256 let mut user_schema = Schema {
3258 defaults: ValueTypes::default(),
3259 keys: HashMap::new(),
3260 cmek: None,
3261 source_attached_function_id: None,
3262 };
3263
3264 user_schema.defaults.float_list = Some(FloatListValueType {
3265 vector_index: Some(VectorIndexType {
3266 enabled: true, config: VectorIndexConfig {
3268 space: Some(Space::L2), embedding_function: None, source_key: Some("custom_key".to_string()), hnsw: Some(HnswIndexConfig {
3272 ef_construction: Some(500), max_neighbors: None, ef_search: None, num_threads: None,
3276 batch_size: None,
3277 sync_threshold: None,
3278 resize_factor: None,
3279 }),
3280 spann: None,
3281 },
3282 }),
3283 });
3284
3285 let result = {
3287 let default_schema = Schema::new_default(KnnIndex::Hnsw);
3288 let merged_defaults = Schema::merge_value_types(
3289 &default_schema.defaults,
3290 &user_schema.defaults,
3291 KnnIndex::Hnsw,
3292 )
3293 .unwrap();
3294 let mut merged_keys = default_schema.keys.clone();
3295 for (key, user_value_types) in user_schema.keys {
3296 if let Some(default_value_types) = merged_keys.get(&key) {
3297 let merged_value_types = Schema::merge_value_types(
3298 default_value_types,
3299 &user_value_types,
3300 KnnIndex::Hnsw,
3301 )
3302 .unwrap();
3303 merged_keys.insert(key, merged_value_types);
3304 } else {
3305 merged_keys.insert(key, user_value_types);
3306 }
3307 }
3308 Schema {
3309 defaults: merged_defaults,
3310 keys: merged_keys,
3311 cmek: None,
3312 source_attached_function_id: None,
3313 }
3314 };
3315
3316 let vector_config = &result
3317 .defaults
3318 .float_list
3319 .as_ref()
3320 .unwrap()
3321 .vector_index
3322 .as_ref()
3323 .unwrap()
3324 .config;
3325
3326 assert_eq!(vector_config.space, Some(Space::L2));
3328 assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
3329 assert_eq!(
3330 vector_config.hnsw.as_ref().unwrap().ef_construction,
3331 Some(500)
3332 );
3333
3334 assert_eq!(vector_config.embedding_function, None);
3336 assert_eq!(
3338 vector_config.hnsw.as_ref().unwrap().max_neighbors,
3339 Some(default_m())
3340 );
3341 }
3342
3343 #[test]
3344 fn test_reconcile_with_defaults_keys() {
3345 let mut user_schema = Schema {
3347 defaults: ValueTypes::default(),
3348 keys: HashMap::new(),
3349 cmek: None,
3350 source_attached_function_id: None,
3351 };
3352
3353 let custom_key_types = ValueTypes {
3355 string: Some(StringValueType {
3356 fts_index: Some(FtsIndexType {
3357 enabled: true,
3358 config: FtsIndexConfig {},
3359 }),
3360 string_inverted_index: Some(StringInvertedIndexType {
3361 enabled: false,
3362 config: StringInvertedIndexConfig {},
3363 }),
3364 }),
3365 ..Default::default()
3366 };
3367 user_schema
3368 .keys
3369 .insert("custom_key".to_string(), custom_key_types);
3370
3371 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3372
3373 assert!(result.keys.contains_key(EMBEDDING_KEY));
3375 assert!(result.keys.contains_key(DOCUMENT_KEY));
3376
3377 assert!(result.keys.contains_key("custom_key"));
3379 let custom_override = result.keys.get("custom_key").unwrap();
3380 assert!(
3381 custom_override
3382 .string
3383 .as_ref()
3384 .unwrap()
3385 .fts_index
3386 .as_ref()
3387 .unwrap()
3388 .enabled
3389 );
3390 }
3391
3392 #[test]
3393 fn test_reconcile_with_defaults_override_existing_key() {
3394 let mut user_schema = Schema {
3396 defaults: ValueTypes::default(),
3397 keys: HashMap::new(),
3398 cmek: None,
3399 source_attached_function_id: None,
3400 };
3401
3402 let embedding_override = ValueTypes {
3404 float_list: Some(FloatListValueType {
3405 vector_index: Some(VectorIndexType {
3406 enabled: false, config: VectorIndexConfig {
3408 space: Some(Space::Ip), embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3410 source_key: Some("custom_embedding_key".to_string()),
3411 hnsw: None,
3412 spann: None,
3413 },
3414 }),
3415 }),
3416 ..Default::default()
3417 };
3418 user_schema
3419 .keys
3420 .insert(EMBEDDING_KEY.to_string(), embedding_override);
3421
3422 let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3423
3424 let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3425 let vector_config = &embedding_config
3426 .float_list
3427 .as_ref()
3428 .unwrap()
3429 .vector_index
3430 .as_ref()
3431 .unwrap();
3432
3433 assert!(!vector_config.enabled);
3435 assert_eq!(vector_config.config.space, Some(Space::Ip));
3436 assert_eq!(
3437 vector_config.config.source_key,
3438 Some("custom_embedding_key".to_string())
3439 );
3440 }
3441
3442 #[test]
3443 fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3444 let collection_config = InternalCollectionConfiguration {
3445 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3446 space: Space::Cosine,
3447 ef_construction: 128,
3448 ef_search: 96,
3449 max_neighbors: 42,
3450 num_threads: 8,
3451 resize_factor: 1.5,
3452 sync_threshold: 2_000,
3453 batch_size: 256,
3454 }),
3455 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3456 EmbeddingFunctionNewConfiguration {
3457 name: "custom".to_string(),
3458 config: json!({"alpha": 1}),
3459 },
3460 )),
3461 };
3462
3463 let schema = Schema::try_from(&collection_config).unwrap();
3464 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3465
3466 assert_eq!(reconstructed, collection_config);
3467 }
3468
3469 #[test]
3470 fn test_convert_schema_to_collection_config_spann_roundtrip() {
3471 let spann_config = InternalSpannConfiguration {
3472 space: Space::Cosine,
3473 search_nprobe: 11,
3474 search_rng_factor: 1.7,
3475 write_nprobe: 5,
3476 nreplica_count: 3,
3477 split_threshold: 150,
3478 merge_threshold: 80,
3479 ef_construction: 120,
3480 ef_search: 90,
3481 max_neighbors: 40,
3482 ..Default::default()
3483 };
3484
3485 let collection_config = InternalCollectionConfiguration {
3486 vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3487 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3488 EmbeddingFunctionNewConfiguration {
3489 name: "custom".to_string(),
3490 config: json!({"beta": true}),
3491 },
3492 )),
3493 };
3494
3495 let schema = Schema::try_from(&collection_config).unwrap();
3496 let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3497
3498 assert_eq!(reconstructed, collection_config);
3499 }
3500
3501 #[test]
3502 fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3503 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3504 if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3505 if let Some(float_list) = &mut embedding.float_list {
3506 if let Some(vector_index) = &mut float_list.vector_index {
3507 vector_index.config.spann = Some(SpannIndexConfig {
3508 search_nprobe: Some(1),
3509 search_rng_factor: Some(1.0),
3510 search_rng_epsilon: Some(0.1),
3511 nreplica_count: Some(1),
3512 write_rng_factor: Some(1.0),
3513 write_rng_epsilon: Some(0.1),
3514 split_threshold: Some(100),
3515 num_samples_kmeans: Some(10),
3516 initial_lambda: Some(0.5),
3517 reassign_neighbor_count: Some(10),
3518 merge_threshold: Some(50),
3519 num_centers_to_merge_to: Some(3),
3520 write_nprobe: Some(1),
3521 ef_construction: Some(50),
3522 ef_search: Some(40),
3523 max_neighbors: Some(20),
3524 center_drift_threshold: None,
3525 quantize: Quantization::None,
3526 });
3527 }
3528 }
3529 }
3530
3531 let result = InternalCollectionConfiguration::try_from(&schema);
3532 assert!(result.is_err());
3533 }
3534
3535 #[test]
3536 fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3537 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3538 let before = schema.clone();
3539 let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3540 assert!(!modified);
3541 assert_eq!(schema, before);
3542 }
3543
3544 #[test]
3545 fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3546 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3547 assert!(!schema.keys.contains_key("custom_field"));
3548
3549 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3550
3551 assert!(modified);
3552 let entry = schema
3553 .keys
3554 .get("custom_field")
3555 .expect("expected new key override to be inserted");
3556 assert_eq!(entry.boolean, schema.defaults.boolean);
3557 assert!(entry.string.is_none());
3558 assert!(entry.int.is_none());
3559 assert!(entry.float.is_none());
3560 assert!(entry.float_list.is_none());
3561 assert!(entry.sparse_vector.is_none());
3562 }
3563
3564 #[test]
3565 fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3566 let mut schema = Schema::new_default(KnnIndex::Hnsw);
3567 let initial_len = schema.keys.len();
3568 schema.keys.insert(
3569 "custom_field".to_string(),
3570 ValueTypes {
3571 string: schema.defaults.string.clone(),
3572 ..Default::default()
3573 },
3574 );
3575
3576 let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3577
3578 assert!(modified);
3579 assert_eq!(schema.keys.len(), initial_len + 1);
3580 let entry = schema
3581 .keys
3582 .get("custom_field")
3583 .expect("expected key override to exist after ensure call");
3584 assert!(entry.string.is_some());
3585 assert_eq!(entry.boolean, schema.defaults.boolean);
3586 }
3587
3588 #[test]
3589 fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3590 let schema = Schema::new_default(KnnIndex::Spann);
3591 let result = schema.is_knn_key_indexing_enabled(
3592 "custom_sparse",
3593 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3594 );
3595
3596 let err = result.expect_err("expected indexing disabled error");
3597 match err {
3598 FilterValidationError::IndexingDisabled { key, value_type } => {
3599 assert_eq!(key, "custom_sparse");
3600 assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3601 }
3602 other => panic!("unexpected error variant: {other:?}"),
3603 }
3604 }
3605
3606 #[test]
3607 fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3608 let mut schema = Schema::new_default(KnnIndex::Spann);
3609 schema.keys.insert(
3610 "sparse_enabled".to_string(),
3611 ValueTypes {
3612 sparse_vector: Some(SparseVectorValueType {
3613 sparse_vector_index: Some(SparseVectorIndexType {
3614 enabled: true,
3615 config: SparseVectorIndexConfig {
3616 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3617 source_key: None,
3618 bm25: None,
3619 },
3620 }),
3621 }),
3622 ..Default::default()
3623 },
3624 );
3625
3626 let result = schema.is_knn_key_indexing_enabled(
3627 "sparse_enabled",
3628 &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3629 );
3630
3631 assert!(result.is_ok());
3632 }
3633
3634 #[test]
3635 fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3636 let schema = Schema::new_default(KnnIndex::Spann);
3637 let result = schema.is_knn_key_indexing_enabled(
3638 EMBEDDING_KEY,
3639 &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3640 );
3641
3642 assert!(result.is_ok());
3643 }
3644
3645 #[test]
3646 fn test_merge_hnsw_configs_field_level() {
3647 let default_hnsw = HnswIndexConfig {
3649 ef_construction: Some(200),
3650 max_neighbors: Some(16),
3651 ef_search: Some(10),
3652 num_threads: Some(4),
3653 batch_size: Some(100),
3654 sync_threshold: Some(1000),
3655 resize_factor: Some(1.2),
3656 };
3657
3658 let user_hnsw = HnswIndexConfig {
3659 ef_construction: Some(300), max_neighbors: None, ef_search: Some(20), num_threads: None, batch_size: None, sync_threshold: Some(2000), resize_factor: None, };
3667
3668 let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3669
3670 assert_eq!(result.ef_construction, Some(300));
3672 assert_eq!(result.ef_search, Some(20));
3673 assert_eq!(result.sync_threshold, Some(2000));
3674
3675 assert_eq!(result.max_neighbors, Some(16));
3677 assert_eq!(result.num_threads, Some(4));
3678 assert_eq!(result.batch_size, Some(100));
3679 assert_eq!(result.resize_factor, Some(1.2));
3680 }
3681
3682 #[test]
3683 fn test_merge_spann_configs_field_level() {
3684 let default_spann = SpannIndexConfig {
3686 search_nprobe: Some(10),
3687 search_rng_factor: Some(1.0), search_rng_epsilon: Some(7.0), nreplica_count: Some(3),
3690 write_rng_factor: Some(1.0), write_rng_epsilon: Some(6.0), split_threshold: Some(100), num_samples_kmeans: Some(100),
3694 initial_lambda: Some(100.0), reassign_neighbor_count: Some(50),
3696 merge_threshold: Some(50), num_centers_to_merge_to: Some(4), write_nprobe: Some(5),
3699 ef_construction: Some(100),
3700 ef_search: Some(10),
3701 max_neighbors: Some(16),
3702 center_drift_threshold: None,
3703 quantize: Quantization::None,
3704 };
3705
3706 let user_spann = SpannIndexConfig {
3707 search_nprobe: Some(20), search_rng_factor: None, search_rng_epsilon: Some(8.0), nreplica_count: None, write_rng_factor: None,
3712 write_rng_epsilon: None,
3713 split_threshold: Some(150), num_samples_kmeans: None,
3715 initial_lambda: None,
3716 reassign_neighbor_count: None,
3717 merge_threshold: None,
3718 num_centers_to_merge_to: None,
3719 write_nprobe: None,
3720 ef_construction: None,
3721 ef_search: None,
3722 max_neighbors: None,
3723 center_drift_threshold: None,
3724 quantize: Quantization::None,
3725 };
3726
3727 let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann))
3728 .unwrap()
3729 .unwrap();
3730
3731 assert_eq!(result.search_nprobe, Some(20));
3733 assert_eq!(result.search_rng_epsilon, Some(8.0));
3734 assert_eq!(result.split_threshold, Some(150));
3735
3736 assert_eq!(result.search_rng_factor, Some(1.0));
3738 assert_eq!(result.nreplica_count, Some(3));
3739 assert_eq!(result.initial_lambda, Some(100.0));
3740 }
3741
3742 #[test]
3743 fn test_merge_spann_configs_rejects_quantize_true() {
3744 let default_spann = SpannIndexConfig {
3746 search_nprobe: Some(10),
3747 search_rng_factor: Some(1.0),
3748 search_rng_epsilon: Some(7.0),
3749 nreplica_count: Some(3),
3750 write_rng_factor: Some(1.0),
3751 write_rng_epsilon: Some(6.0),
3752 split_threshold: Some(100),
3753 num_samples_kmeans: Some(100),
3754 initial_lambda: Some(100.0),
3755 reassign_neighbor_count: Some(50),
3756 merge_threshold: Some(50),
3757 num_centers_to_merge_to: Some(4),
3758 write_nprobe: Some(5),
3759 ef_construction: Some(100),
3760 ef_search: Some(10),
3761 max_neighbors: Some(16),
3762 center_drift_threshold: None,
3763 quantize: Quantization::None,
3764 };
3765
3766 let user_spann_with_quantize = SpannIndexConfig {
3767 search_nprobe: Some(20),
3768 search_rng_factor: None,
3769 search_rng_epsilon: Some(8.0),
3770 nreplica_count: None,
3771 write_rng_factor: None,
3772 write_rng_epsilon: None,
3773 split_threshold: Some(150),
3774 num_samples_kmeans: None,
3775 initial_lambda: None,
3776 reassign_neighbor_count: None,
3777 merge_threshold: None,
3778 num_centers_to_merge_to: None,
3779 write_nprobe: None,
3780 ef_construction: None,
3781 ef_search: None,
3782 max_neighbors: None,
3783 center_drift_threshold: None,
3784 quantize: Quantization::FourBitRabitQWithUSearch, };
3786
3787 let result =
3789 Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann_with_quantize));
3790 assert!(result.is_err());
3791 match result {
3792 Err(SchemaError::InvalidUserInput { reason }) => {
3793 assert!(reason.contains("quantize field cannot be set"));
3794 }
3795 _ => panic!("Expected InvalidUserInput error"),
3796 }
3797
3798 let default_spann_with_quantize = SpannIndexConfig {
3800 search_nprobe: Some(10),
3801 search_rng_factor: Some(1.0),
3802 search_rng_epsilon: Some(7.0),
3803 nreplica_count: Some(3),
3804 write_rng_factor: Some(1.0),
3805 write_rng_epsilon: Some(6.0),
3806 split_threshold: Some(100),
3807 num_samples_kmeans: Some(100),
3808 initial_lambda: Some(100.0),
3809 reassign_neighbor_count: Some(50),
3810 merge_threshold: Some(50),
3811 num_centers_to_merge_to: Some(4),
3812 write_nprobe: Some(5),
3813 ef_construction: Some(100),
3814 ef_search: Some(10),
3815 max_neighbors: Some(16),
3816 center_drift_threshold: None,
3817 quantize: Quantization::FourBitRabitQWithUSearch, };
3819
3820 let result = Schema::merge_spann_configs(Some(&default_spann_with_quantize), None);
3821 assert!(result.is_err());
3822 match result {
3823 Err(SchemaError::InvalidUserInput { reason }) => {
3824 assert!(reason.contains("quantize field cannot be set"));
3825 }
3826 _ => panic!("Expected InvalidUserInput error"),
3827 }
3828
3829 let result = Schema::merge_spann_configs(None, Some(&user_spann_with_quantize));
3831 assert!(result.is_err());
3832 match result {
3833 Err(SchemaError::InvalidUserInput { reason }) => {
3834 assert!(reason.contains("quantize field cannot be set"));
3835 }
3836 _ => panic!("Expected InvalidUserInput error"),
3837 }
3838 }
3839
3840 #[test]
3841 fn test_spann_index_config_into_internal_configuration() {
3842 let config = SpannIndexConfig {
3843 search_nprobe: Some(33),
3844 search_rng_factor: Some(1.2),
3845 search_rng_epsilon: None,
3846 nreplica_count: None,
3847 write_rng_factor: Some(1.5),
3848 write_rng_epsilon: None,
3849 split_threshold: Some(75),
3850 num_samples_kmeans: None,
3851 initial_lambda: Some(0.9),
3852 reassign_neighbor_count: Some(40),
3853 merge_threshold: None,
3854 num_centers_to_merge_to: Some(4),
3855 write_nprobe: Some(60),
3856 ef_construction: Some(180),
3857 ef_search: Some(170),
3858 max_neighbors: Some(32),
3859 center_drift_threshold: None,
3860 quantize: Quantization::None,
3861 };
3862
3863 let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3864 assert_eq!(with_space.space, Space::Cosine);
3865 assert_eq!(with_space.search_nprobe, 33);
3866 assert_eq!(with_space.search_rng_factor, 1.2);
3867 assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3868 assert_eq!(with_space.write_rng_factor, 1.5);
3869 assert_eq!(with_space.write_nprobe, 60);
3870 assert_eq!(with_space.ef_construction, 180);
3871 assert_eq!(with_space.ef_search, 170);
3872 assert_eq!(with_space.max_neighbors, 32);
3873 assert_eq!(with_space.merge_threshold, default_merge_threshold());
3874
3875 let default_space_config: InternalSpannConfiguration = (None, &config).into();
3876 assert_eq!(default_space_config.space, default_space());
3877 }
3878
3879 #[test]
3880 fn test_merge_string_type_combinations() {
3881 let default = StringValueType {
3885 string_inverted_index: Some(StringInvertedIndexType {
3886 enabled: true,
3887 config: StringInvertedIndexConfig {},
3888 }),
3889 fts_index: Some(FtsIndexType {
3890 enabled: false,
3891 config: FtsIndexConfig {},
3892 }),
3893 };
3894
3895 let user = StringValueType {
3896 string_inverted_index: Some(StringInvertedIndexType {
3897 enabled: false, config: StringInvertedIndexConfig {},
3899 }),
3900 fts_index: None, };
3902
3903 let result = Schema::merge_string_type(Some(&default), Some(&user))
3904 .unwrap()
3905 .unwrap();
3906 assert!(!result.string_inverted_index.as_ref().unwrap().enabled); assert!(!result.fts_index.as_ref().unwrap().enabled); let result = Schema::merge_string_type(Some(&default), None)
3911 .unwrap()
3912 .unwrap();
3913 assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3914
3915 let result = Schema::merge_string_type(None, Some(&user))
3917 .unwrap()
3918 .unwrap();
3919 assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3920
3921 let result = Schema::merge_string_type(None, None).unwrap();
3923 assert!(result.is_none());
3924 }
3925
3926 #[test]
3927 fn test_merge_vector_index_config_comprehensive() {
3928 let default_config = VectorIndexConfig {
3930 space: Some(Space::Cosine),
3931 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3932 source_key: Some("default_key".to_string()),
3933 hnsw: Some(HnswIndexConfig {
3934 ef_construction: Some(200),
3935 max_neighbors: Some(16),
3936 ef_search: Some(10),
3937 num_threads: Some(4),
3938 batch_size: Some(100),
3939 sync_threshold: Some(1000),
3940 resize_factor: Some(1.2),
3941 }),
3942 spann: None,
3943 };
3944
3945 let user_config = VectorIndexConfig {
3946 space: Some(Space::L2), embedding_function: None, source_key: Some("user_key".to_string()), hnsw: Some(HnswIndexConfig {
3950 ef_construction: Some(300), max_neighbors: None, ef_search: None, num_threads: None,
3954 batch_size: None,
3955 sync_threshold: None,
3956 resize_factor: None,
3957 }),
3958 spann: Some(SpannIndexConfig {
3959 search_nprobe: Some(15),
3960 search_rng_factor: None,
3961 search_rng_epsilon: None,
3962 nreplica_count: None,
3963 write_rng_factor: None,
3964 write_rng_epsilon: None,
3965 split_threshold: None,
3966 num_samples_kmeans: None,
3967 initial_lambda: None,
3968 reassign_neighbor_count: None,
3969 merge_threshold: None,
3970 num_centers_to_merge_to: None,
3971 write_nprobe: None,
3972 ef_construction: None,
3973 ef_search: None,
3974 max_neighbors: None,
3975 center_drift_threshold: None,
3976 quantize: Quantization::None,
3977 }), };
3979
3980 let result =
3981 Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw)
3982 .expect("merge should succeed");
3983
3984 assert_eq!(result.space, Some(Space::L2)); assert_eq!(
3987 result.embedding_function,
3988 Some(EmbeddingFunctionConfiguration::Legacy)
3989 ); assert_eq!(result.source_key, Some("user_key".to_string())); assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); assert!(result.spann.is_none());
3998 }
3999
4000 #[test]
4001 fn test_merge_sparse_vector_index_config() {
4002 let default_config = SparseVectorIndexConfig {
4004 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4005 source_key: Some("default_sparse_key".to_string()),
4006 bm25: None,
4007 };
4008
4009 let user_config = SparseVectorIndexConfig {
4010 embedding_function: None, source_key: Some("user_sparse_key".to_string()), bm25: None,
4013 };
4014
4015 let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
4016
4017 assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
4019 assert_eq!(
4021 result.embedding_function,
4022 Some(EmbeddingFunctionConfiguration::Legacy)
4023 );
4024 }
4025
4026 #[test]
4027 fn test_complex_nested_merging_scenario() {
4028 let mut user_schema = Schema {
4030 defaults: ValueTypes::default(),
4031 keys: HashMap::new(),
4032 cmek: None,
4033 source_attached_function_id: None,
4034 };
4035
4036 user_schema.defaults.string = Some(StringValueType {
4038 string_inverted_index: Some(StringInvertedIndexType {
4039 enabled: false,
4040 config: StringInvertedIndexConfig {},
4041 }),
4042 fts_index: Some(FtsIndexType {
4043 enabled: true,
4044 config: FtsIndexConfig {},
4045 }),
4046 });
4047
4048 user_schema.defaults.float_list = Some(FloatListValueType {
4049 vector_index: Some(VectorIndexType {
4050 enabled: true,
4051 config: VectorIndexConfig {
4052 space: Some(Space::Ip),
4053 embedding_function: None, source_key: Some("custom_vector_key".to_string()),
4055 hnsw: Some(HnswIndexConfig {
4056 ef_construction: Some(400),
4057 max_neighbors: Some(32),
4058 ef_search: None, num_threads: None,
4060 batch_size: None,
4061 sync_threshold: None,
4062 resize_factor: None,
4063 }),
4064 spann: None,
4065 },
4066 }),
4067 });
4068
4069 let custom_key_override = ValueTypes {
4071 string: Some(StringValueType {
4072 fts_index: Some(FtsIndexType {
4073 enabled: true,
4074 config: FtsIndexConfig {},
4075 }),
4076 string_inverted_index: None,
4077 }),
4078 ..Default::default()
4079 };
4080 user_schema
4081 .keys
4082 .insert("custom_field".to_string(), custom_key_override);
4083
4084 let result = {
4086 let default_schema = Schema::new_default(KnnIndex::Hnsw);
4087 let merged_defaults = Schema::merge_value_types(
4088 &default_schema.defaults,
4089 &user_schema.defaults,
4090 KnnIndex::Hnsw,
4091 )
4092 .unwrap();
4093 let mut merged_keys = default_schema.keys.clone();
4094 for (key, user_value_types) in user_schema.keys {
4095 if let Some(default_value_types) = merged_keys.get(&key) {
4096 let merged_value_types = Schema::merge_value_types(
4097 default_value_types,
4098 &user_value_types,
4099 KnnIndex::Hnsw,
4100 )
4101 .unwrap();
4102 merged_keys.insert(key, merged_value_types);
4103 } else {
4104 merged_keys.insert(key, user_value_types);
4105 }
4106 }
4107 Schema {
4108 defaults: merged_defaults,
4109 keys: merged_keys,
4110 cmek: None,
4111 source_attached_function_id: None,
4112 }
4113 };
4114
4115 assert!(
4119 !result
4120 .defaults
4121 .string
4122 .as_ref()
4123 .unwrap()
4124 .string_inverted_index
4125 .as_ref()
4126 .unwrap()
4127 .enabled
4128 );
4129 assert!(
4130 result
4131 .defaults
4132 .string
4133 .as_ref()
4134 .unwrap()
4135 .fts_index
4136 .as_ref()
4137 .unwrap()
4138 .enabled
4139 );
4140
4141 let vector_config = &result
4142 .defaults
4143 .float_list
4144 .as_ref()
4145 .unwrap()
4146 .vector_index
4147 .as_ref()
4148 .unwrap()
4149 .config;
4150 assert_eq!(vector_config.space, Some(Space::Ip));
4151 assert_eq!(vector_config.embedding_function, None); assert_eq!(
4153 vector_config.source_key,
4154 Some("custom_vector_key".to_string())
4155 );
4156 assert_eq!(
4157 vector_config.hnsw.as_ref().unwrap().ef_construction,
4158 Some(400)
4159 );
4160 assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
4161 assert_eq!(
4162 vector_config.hnsw.as_ref().unwrap().ef_search,
4163 Some(default_search_ef())
4164 ); assert!(result.keys.contains_key(EMBEDDING_KEY)); assert!(result.keys.contains_key(DOCUMENT_KEY)); assert!(result.keys.contains_key("custom_field")); let custom_override = result.keys.get("custom_field").unwrap();
4172 assert!(
4173 custom_override
4174 .string
4175 .as_ref()
4176 .unwrap()
4177 .fts_index
4178 .as_ref()
4179 .unwrap()
4180 .enabled
4181 );
4182 assert!(custom_override
4183 .string
4184 .as_ref()
4185 .unwrap()
4186 .string_inverted_index
4187 .is_none());
4188 }
4189
4190 #[test]
4191 fn test_reconcile_with_collection_config_default_config() {
4192 let collection_config = InternalCollectionConfiguration::default_hnsw();
4194 let schema = Schema::try_from(&collection_config).unwrap();
4195
4196 let result =
4197 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4198 .unwrap();
4199 assert_eq!(result, schema);
4200 }
4201
4202 #[test]
4204 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
4205 let collection_config = InternalCollectionConfiguration::default_hnsw();
4206 let schema = Schema::new_default(KnnIndex::Hnsw);
4207 let result =
4208 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4209 .unwrap();
4210
4211 assert!(result.defaults.float_list.is_some());
4213 assert!(result
4214 .defaults
4215 .float_list
4216 .as_ref()
4217 .unwrap()
4218 .vector_index
4219 .as_ref()
4220 .unwrap()
4221 .config
4222 .hnsw
4223 .is_some());
4224 assert!(result
4225 .defaults
4226 .float_list
4227 .as_ref()
4228 .unwrap()
4229 .vector_index
4230 .as_ref()
4231 .unwrap()
4232 .config
4233 .spann
4234 .is_none());
4235 }
4236
4237 #[test]
4238 fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
4239 let collection_config = InternalCollectionConfiguration::default_hnsw();
4240 let schema = Schema::new_default(KnnIndex::Hnsw);
4241 let result =
4242 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4243 .unwrap();
4244
4245 assert!(result.defaults.float_list.is_some());
4247 assert!(result
4248 .defaults
4249 .float_list
4250 .as_ref()
4251 .unwrap()
4252 .vector_index
4253 .as_ref()
4254 .unwrap()
4255 .config
4256 .spann
4257 .is_some());
4258 assert!(result
4259 .defaults
4260 .float_list
4261 .as_ref()
4262 .unwrap()
4263 .vector_index
4264 .as_ref()
4265 .unwrap()
4266 .config
4267 .hnsw
4268 .is_none());
4269 }
4270
4271 #[test]
4272 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
4273 let collection_config = InternalCollectionConfiguration::default_hnsw();
4274 let schema = Schema::new_default(KnnIndex::Spann);
4275 let result =
4276 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4277 .unwrap();
4278
4279 assert!(result.defaults.float_list.is_some());
4281 assert!(result
4282 .defaults
4283 .float_list
4284 .as_ref()
4285 .unwrap()
4286 .vector_index
4287 .as_ref()
4288 .unwrap()
4289 .config
4290 .hnsw
4291 .is_some());
4292 assert!(result
4293 .defaults
4294 .float_list
4295 .as_ref()
4296 .unwrap()
4297 .vector_index
4298 .as_ref()
4299 .unwrap()
4300 .config
4301 .spann
4302 .is_none());
4303 }
4304
4305 #[test]
4306 fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
4307 let collection_config = InternalCollectionConfiguration::default_hnsw();
4308 let schema = Schema::new_default(KnnIndex::Spann);
4309 let result =
4310 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4311 .unwrap();
4312
4313 assert!(result.defaults.float_list.is_some());
4315 assert!(result
4316 .defaults
4317 .float_list
4318 .as_ref()
4319 .unwrap()
4320 .vector_index
4321 .as_ref()
4322 .unwrap()
4323 .config
4324 .spann
4325 .is_some());
4326 assert!(result
4327 .defaults
4328 .float_list
4329 .as_ref()
4330 .unwrap()
4331 .vector_index
4332 .as_ref()
4333 .unwrap()
4334 .config
4335 .hnsw
4336 .is_none());
4337 }
4338
4339 #[test]
4340 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
4341 let collection_config = InternalCollectionConfiguration::default_spann();
4342 let schema = Schema::new_default(KnnIndex::Spann);
4343 let result =
4344 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4345 .unwrap();
4346
4347 assert!(result.defaults.float_list.is_some());
4349 assert!(result
4350 .defaults
4351 .float_list
4352 .as_ref()
4353 .unwrap()
4354 .vector_index
4355 .as_ref()
4356 .unwrap()
4357 .config
4358 .hnsw
4359 .is_some());
4360 assert!(result
4361 .defaults
4362 .float_list
4363 .as_ref()
4364 .unwrap()
4365 .vector_index
4366 .as_ref()
4367 .unwrap()
4368 .config
4369 .spann
4370 .is_none());
4371 }
4372
4373 #[test]
4374 fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
4375 let collection_config = InternalCollectionConfiguration::default_spann();
4376 let schema = Schema::new_default(KnnIndex::Spann);
4377 let result =
4378 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4379 .unwrap();
4380
4381 assert!(result.defaults.float_list.is_some());
4383 assert!(result
4384 .defaults
4385 .float_list
4386 .as_ref()
4387 .unwrap()
4388 .vector_index
4389 .as_ref()
4390 .unwrap()
4391 .config
4392 .spann
4393 .is_some());
4394 assert!(result
4395 .defaults
4396 .float_list
4397 .as_ref()
4398 .unwrap()
4399 .vector_index
4400 .as_ref()
4401 .unwrap()
4402 .config
4403 .hnsw
4404 .is_none());
4405 assert_eq!(
4407 result
4408 .defaults
4409 .float_list
4410 .as_ref()
4411 .unwrap()
4412 .vector_index
4413 .as_ref()
4414 .unwrap()
4415 .config
4416 .source_key,
4417 None
4418 );
4419 }
4420
4421 #[test]
4422 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
4423 let collection_config = InternalCollectionConfiguration::default_spann();
4424 let schema = Schema::new_default(KnnIndex::Hnsw);
4425 let result =
4426 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4427 .unwrap();
4428
4429 assert!(result.defaults.float_list.is_some());
4431 assert!(result
4432 .defaults
4433 .float_list
4434 .as_ref()
4435 .unwrap()
4436 .vector_index
4437 .as_ref()
4438 .unwrap()
4439 .config
4440 .hnsw
4441 .is_some());
4442 assert!(result
4443 .defaults
4444 .float_list
4445 .as_ref()
4446 .unwrap()
4447 .vector_index
4448 .as_ref()
4449 .unwrap()
4450 .config
4451 .spann
4452 .is_none());
4453 }
4454
4455 #[test]
4456 fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
4457 let collection_config = InternalCollectionConfiguration::default_spann();
4458 let schema = Schema::new_default(KnnIndex::Hnsw);
4459 let result =
4460 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4461 .unwrap();
4462
4463 assert!(result.defaults.float_list.is_some());
4465 assert!(result
4466 .defaults
4467 .float_list
4468 .as_ref()
4469 .unwrap()
4470 .vector_index
4471 .as_ref()
4472 .unwrap()
4473 .config
4474 .spann
4475 .is_some());
4476 assert!(result
4477 .defaults
4478 .float_list
4479 .as_ref()
4480 .unwrap()
4481 .vector_index
4482 .as_ref()
4483 .unwrap()
4484 .config
4485 .hnsw
4486 .is_none());
4487 }
4488
4489 #[test]
4490 fn test_defaults_source_key_not_document() {
4491 let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4493 let schema_spann = Schema::new_default(KnnIndex::Spann);
4494
4495 let defaults_hnsw = schema_hnsw
4497 .defaults
4498 .float_list
4499 .as_ref()
4500 .unwrap()
4501 .vector_index
4502 .as_ref()
4503 .unwrap();
4504 assert_eq!(defaults_hnsw.config.source_key, None);
4505
4506 let defaults_spann = schema_spann
4508 .defaults
4509 .float_list
4510 .as_ref()
4511 .unwrap()
4512 .vector_index
4513 .as_ref()
4514 .unwrap();
4515 assert_eq!(defaults_spann.config.source_key, None);
4516
4517 let collection_config_hnsw = InternalCollectionConfiguration {
4520 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4521 ef_construction: 300,
4522 max_neighbors: 32,
4523 ef_search: 50,
4524 num_threads: 8,
4525 batch_size: 200,
4526 sync_threshold: 2000,
4527 resize_factor: 1.5,
4528 space: Space::L2,
4529 }),
4530 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4531 };
4532 let result_hnsw = Schema::reconcile_with_collection_config(
4533 &schema_hnsw,
4534 &collection_config_hnsw,
4535 KnnIndex::Hnsw,
4536 )
4537 .unwrap();
4538 let reconciled_defaults_hnsw = result_hnsw
4539 .defaults
4540 .float_list
4541 .as_ref()
4542 .unwrap()
4543 .vector_index
4544 .as_ref()
4545 .unwrap();
4546 assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4547
4548 let collection_config_spann = InternalCollectionConfiguration {
4549 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4550 search_nprobe: 20,
4551 search_rng_factor: 3.0,
4552 search_rng_epsilon: 0.2,
4553 nreplica_count: 5,
4554 write_rng_factor: 2.0,
4555 write_rng_epsilon: 0.1,
4556 split_threshold: 2000,
4557 num_samples_kmeans: 200,
4558 initial_lambda: 0.8,
4559 reassign_neighbor_count: 100,
4560 merge_threshold: 800,
4561 num_centers_to_merge_to: 20,
4562 write_nprobe: 10,
4563 ef_construction: 400,
4564 ef_search: 60,
4565 max_neighbors: 24,
4566 space: Space::Cosine,
4567 }),
4568 embedding_function: None,
4569 };
4570 let result_spann = Schema::reconcile_with_collection_config(
4571 &schema_spann,
4572 &collection_config_spann,
4573 KnnIndex::Spann,
4574 )
4575 .unwrap();
4576 let reconciled_defaults_spann = result_spann
4577 .defaults
4578 .float_list
4579 .as_ref()
4580 .unwrap()
4581 .vector_index
4582 .as_ref()
4583 .unwrap();
4584 assert_eq!(reconciled_defaults_spann.config.source_key, None);
4585
4586 let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4588 let embedding_vector_index_hnsw = embedding_hnsw
4589 .float_list
4590 .as_ref()
4591 .unwrap()
4592 .vector_index
4593 .as_ref()
4594 .unwrap();
4595 assert_eq!(
4596 embedding_vector_index_hnsw.config.source_key,
4597 Some(DOCUMENT_KEY.to_string())
4598 );
4599
4600 let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4601 let embedding_vector_index_spann = embedding_spann
4602 .float_list
4603 .as_ref()
4604 .unwrap()
4605 .vector_index
4606 .as_ref()
4607 .unwrap();
4608 assert_eq!(
4609 embedding_vector_index_spann.config.source_key,
4610 Some(DOCUMENT_KEY.to_string())
4611 );
4612 }
4613
4614 #[test]
4615 fn test_try_from_source_key() {
4616 let collection_config_hnsw = InternalCollectionConfiguration {
4621 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4622 ef_construction: 300,
4623 max_neighbors: 32,
4624 ef_search: 50,
4625 num_threads: 8,
4626 batch_size: 200,
4627 sync_threshold: 2000,
4628 resize_factor: 1.5,
4629 space: Space::L2,
4630 }),
4631 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4632 };
4633 let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4634
4635 let defaults_hnsw = schema_hnsw
4637 .defaults
4638 .float_list
4639 .as_ref()
4640 .unwrap()
4641 .vector_index
4642 .as_ref()
4643 .unwrap();
4644 assert_eq!(defaults_hnsw.config.source_key, None);
4645
4646 let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4648 let embedding_vector_index_hnsw = embedding_hnsw
4649 .float_list
4650 .as_ref()
4651 .unwrap()
4652 .vector_index
4653 .as_ref()
4654 .unwrap();
4655 assert_eq!(
4656 embedding_vector_index_hnsw.config.source_key,
4657 Some(DOCUMENT_KEY.to_string())
4658 );
4659
4660 let collection_config_spann = InternalCollectionConfiguration {
4662 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4663 search_nprobe: 20,
4664 search_rng_factor: 3.0,
4665 search_rng_epsilon: 0.2,
4666 nreplica_count: 5,
4667 write_rng_factor: 2.0,
4668 write_rng_epsilon: 0.1,
4669 split_threshold: 2000,
4670 num_samples_kmeans: 200,
4671 initial_lambda: 0.8,
4672 reassign_neighbor_count: 100,
4673 merge_threshold: 800,
4674 num_centers_to_merge_to: 20,
4675 write_nprobe: 10,
4676 ef_construction: 400,
4677 ef_search: 60,
4678 max_neighbors: 24,
4679 space: Space::Cosine,
4680 }),
4681 embedding_function: None,
4682 };
4683 let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4684
4685 let defaults_spann = schema_spann
4687 .defaults
4688 .float_list
4689 .as_ref()
4690 .unwrap()
4691 .vector_index
4692 .as_ref()
4693 .unwrap();
4694 assert_eq!(defaults_spann.config.source_key, None);
4695
4696 let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4698 let embedding_vector_index_spann = embedding_spann
4699 .float_list
4700 .as_ref()
4701 .unwrap()
4702 .vector_index
4703 .as_ref()
4704 .unwrap();
4705 assert_eq!(
4706 embedding_vector_index_spann.config.source_key,
4707 Some(DOCUMENT_KEY.to_string())
4708 );
4709 }
4710
4711 #[test]
4712 fn test_default_hnsw_with_default_embedding_function() {
4713 use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4717
4718 let collection_config = InternalCollectionConfiguration {
4719 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4720 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4721 EmbeddingFunctionNewConfiguration {
4722 name: "default".to_string(),
4723 config: serde_json::json!({}),
4724 },
4725 )),
4726 };
4727
4728 assert!(collection_config.is_default());
4730
4731 let schema = Schema::new_default(KnnIndex::Hnsw);
4732 let result =
4733 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4734 .unwrap();
4735
4736 let defaults = result
4738 .defaults
4739 .float_list
4740 .as_ref()
4741 .unwrap()
4742 .vector_index
4743 .as_ref()
4744 .unwrap();
4745 assert_eq!(defaults.config.source_key, None);
4746
4747 let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4749 let embedding_vector_index = embedding
4750 .float_list
4751 .as_ref()
4752 .unwrap()
4753 .vector_index
4754 .as_ref()
4755 .unwrap();
4756 assert_eq!(
4757 embedding_vector_index.config.source_key,
4758 Some(DOCUMENT_KEY.to_string())
4759 );
4760
4761 let vector_index_config = defaults.config.clone();
4763 assert!(vector_index_config.spann.is_some());
4764 assert!(vector_index_config.hnsw.is_none());
4765
4766 assert_eq!(
4768 embedding_vector_index.config.embedding_function,
4769 Some(EmbeddingFunctionConfiguration::Known(
4770 EmbeddingFunctionNewConfiguration {
4771 name: "default".to_string(),
4772 config: serde_json::json!({}),
4773 },
4774 ))
4775 );
4776 assert_eq!(
4777 defaults.config.embedding_function,
4778 Some(EmbeddingFunctionConfiguration::Known(
4779 EmbeddingFunctionNewConfiguration {
4780 name: "default".to_string(),
4781 config: serde_json::json!({}),
4782 },
4783 ))
4784 );
4785 }
4786
4787 #[test]
4788 fn test_reconcile_with_collection_config_both_non_default() {
4789 let mut schema = Schema::new_default(KnnIndex::Hnsw);
4791 schema.defaults.string = Some(StringValueType {
4792 fts_index: Some(FtsIndexType {
4793 enabled: true,
4794 config: FtsIndexConfig {},
4795 }),
4796 string_inverted_index: None,
4797 });
4798
4799 let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4800 if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4802 {
4803 hnsw_config.ef_construction = 500; }
4805
4806 let result = Schema::reconcile_schema_and_config(
4808 Some(&schema),
4809 Some(&collection_config),
4810 KnnIndex::Spann,
4811 );
4812 assert!(result.is_err());
4813 assert!(matches!(
4814 result.unwrap_err(),
4815 SchemaError::ConfigAndSchemaConflict
4816 ));
4817 }
4818
4819 #[test]
4820 fn test_reconcile_with_collection_config_hnsw_override() {
4821 let schema = Schema::new_default(KnnIndex::Hnsw); let collection_config = InternalCollectionConfiguration {
4825 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4826 ef_construction: 300,
4827 max_neighbors: 32,
4828 ef_search: 50,
4829 num_threads: 8,
4830 batch_size: 200,
4831 sync_threshold: 2000,
4832 resize_factor: 1.5,
4833 space: Space::L2,
4834 }),
4835 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4836 };
4837
4838 let result =
4839 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4840 .unwrap();
4841
4842 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4844 let vector_index = embedding_override
4845 .float_list
4846 .as_ref()
4847 .unwrap()
4848 .vector_index
4849 .as_ref()
4850 .unwrap();
4851
4852 assert!(vector_index.enabled);
4853 assert_eq!(vector_index.config.space, Some(Space::L2));
4854 assert_eq!(
4855 vector_index.config.embedding_function,
4856 Some(EmbeddingFunctionConfiguration::Legacy)
4857 );
4858 assert_eq!(
4859 vector_index.config.source_key,
4860 Some(DOCUMENT_KEY.to_string())
4861 );
4862
4863 let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4864 assert_eq!(hnsw_config.ef_construction, Some(300));
4865 assert_eq!(hnsw_config.max_neighbors, Some(32));
4866 assert_eq!(hnsw_config.ef_search, Some(50));
4867 assert_eq!(hnsw_config.num_threads, Some(8));
4868 assert_eq!(hnsw_config.batch_size, Some(200));
4869 assert_eq!(hnsw_config.sync_threshold, Some(2000));
4870 assert_eq!(hnsw_config.resize_factor, Some(1.5));
4871
4872 assert!(vector_index.config.spann.is_none());
4873 }
4874
4875 #[test]
4876 fn test_reconcile_with_collection_config_spann_override() {
4877 let schema = Schema::new_default(KnnIndex::Spann); let collection_config = InternalCollectionConfiguration {
4881 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4882 search_nprobe: 20,
4883 search_rng_factor: 3.0,
4884 search_rng_epsilon: 0.2,
4885 nreplica_count: 5,
4886 write_rng_factor: 2.0,
4887 write_rng_epsilon: 0.1,
4888 split_threshold: 2000,
4889 num_samples_kmeans: 200,
4890 initial_lambda: 0.8,
4891 reassign_neighbor_count: 100,
4892 merge_threshold: 800,
4893 num_centers_to_merge_to: 20,
4894 write_nprobe: 10,
4895 ef_construction: 400,
4896 ef_search: 60,
4897 max_neighbors: 24,
4898 space: Space::Cosine,
4899 }),
4900 embedding_function: None,
4901 };
4902
4903 let result =
4904 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4905 .unwrap();
4906
4907 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4909 let vector_index = embedding_override
4910 .float_list
4911 .as_ref()
4912 .unwrap()
4913 .vector_index
4914 .as_ref()
4915 .unwrap();
4916
4917 assert!(vector_index.enabled);
4918 assert_eq!(vector_index.config.space, Some(Space::Cosine));
4919 assert_eq!(vector_index.config.embedding_function, None);
4920 assert_eq!(
4921 vector_index.config.source_key,
4922 Some(DOCUMENT_KEY.to_string())
4923 );
4924
4925 assert!(vector_index.config.hnsw.is_none());
4926
4927 let spann_config = vector_index.config.spann.as_ref().unwrap();
4928 assert_eq!(spann_config.search_nprobe, Some(20));
4929 assert_eq!(spann_config.search_rng_factor, Some(3.0));
4930 assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4931 assert_eq!(spann_config.nreplica_count, Some(5));
4932 assert_eq!(spann_config.write_rng_factor, Some(2.0));
4933 assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4934 assert_eq!(spann_config.split_threshold, Some(2000));
4935 assert_eq!(spann_config.num_samples_kmeans, Some(200));
4936 assert_eq!(spann_config.initial_lambda, Some(0.8));
4937 assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4938 assert_eq!(spann_config.merge_threshold, Some(800));
4939 assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4940 assert_eq!(spann_config.write_nprobe, Some(10));
4941 assert_eq!(spann_config.ef_construction, Some(400));
4942 assert_eq!(spann_config.ef_search, Some(60));
4943 assert_eq!(spann_config.max_neighbors, Some(24));
4944 }
4945
4946 #[test]
4947 fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4948 let schema = Schema::new_default(KnnIndex::Hnsw);
4951
4952 let collection_config = InternalCollectionConfiguration {
4953 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4954 ef_construction: 300,
4955 max_neighbors: 32,
4956 ef_search: 50,
4957 num_threads: 8,
4958 batch_size: 200,
4959 sync_threshold: 2000,
4960 resize_factor: 1.5,
4961 space: Space::L2,
4962 }),
4963 embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4964 };
4965
4966 let result =
4967 Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4968 .unwrap();
4969
4970 let defaults_vector_index = result
4972 .defaults
4973 .float_list
4974 .as_ref()
4975 .unwrap()
4976 .vector_index
4977 .as_ref()
4978 .unwrap();
4979
4980 assert!(!defaults_vector_index.enabled);
4982 assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4984 assert_eq!(
4985 defaults_vector_index.config.embedding_function,
4986 Some(EmbeddingFunctionConfiguration::Legacy)
4987 );
4988 assert_eq!(defaults_vector_index.config.source_key, None);
4989 let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4990 assert_eq!(defaults_hnsw.ef_construction, Some(300));
4991 assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4992
4993 let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4995 let embedding_vector_index = embedding_override
4996 .float_list
4997 .as_ref()
4998 .unwrap()
4999 .vector_index
5000 .as_ref()
5001 .unwrap();
5002
5003 assert!(embedding_vector_index.enabled);
5005 assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
5007 assert_eq!(
5008 embedding_vector_index.config.embedding_function,
5009 Some(EmbeddingFunctionConfiguration::Legacy)
5010 );
5011 assert_eq!(
5012 embedding_vector_index.config.source_key,
5013 Some(DOCUMENT_KEY.to_string())
5014 );
5015 let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
5016 assert_eq!(embedding_hnsw.ef_construction, Some(300));
5017 assert_eq!(embedding_hnsw.max_neighbors, Some(32));
5018 }
5019
5020 #[test]
5021 fn test_is_schema_default() {
5022 let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
5024 assert!(default_hnsw_schema.is_default());
5025
5026 let default_spann_schema = Schema::new_default(KnnIndex::Spann);
5027 assert!(default_spann_schema.is_default());
5028
5029 let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
5031 if let Some(ref mut string_type) = modified_schema.defaults.string {
5033 if let Some(ref mut string_inverted) = string_type.string_inverted_index {
5034 string_inverted.enabled = false; }
5036 }
5037 assert!(!modified_schema.is_default());
5038
5039 let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
5041 schema_with_extra_overrides
5042 .keys
5043 .insert("custom_key".to_string(), ValueTypes::default());
5044 assert!(!schema_with_extra_overrides.is_default());
5045 }
5046
5047 #[test]
5048 fn test_is_schema_default_with_space() {
5049 let schema = Schema::new_default(KnnIndex::Hnsw);
5050 assert!(schema.is_default());
5051
5052 let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
5053 if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
5054 if let Some(ref mut vector_index) = float_list.vector_index {
5055 vector_index.config.space = Some(Space::Cosine);
5056 }
5057 }
5058 assert!(!schema_with_space.is_default());
5059
5060 let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
5061 if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
5062 .keys
5063 .get_mut(EMBEDDING_KEY)
5064 {
5065 if let Some(ref mut float_list) = embedding_key.float_list {
5066 if let Some(ref mut vector_index) = float_list.vector_index {
5067 vector_index.config.space = Some(Space::Cosine);
5068 }
5069 }
5070 }
5071 assert!(!schema_with_space_in_embedding_key.is_default());
5072 }
5073
5074 #[test]
5075 fn test_is_schema_default_with_embedding_function() {
5076 let schema = Schema::new_default(KnnIndex::Hnsw);
5077 assert!(schema.is_default());
5078
5079 let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
5080 if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
5081 if let Some(ref mut vector_index) = float_list.vector_index {
5082 vector_index.config.embedding_function =
5083 Some(EmbeddingFunctionConfiguration::Legacy);
5084 }
5085 }
5086 assert!(!schema_with_embedding_function.is_default());
5087
5088 let mut schema_with_embedding_function_in_embedding_key =
5089 Schema::new_default(KnnIndex::Spann);
5090 if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
5091 .keys
5092 .get_mut(EMBEDDING_KEY)
5093 {
5094 if let Some(ref mut float_list) = embedding_key.float_list {
5095 if let Some(ref mut vector_index) = float_list.vector_index {
5096 vector_index.config.embedding_function =
5097 Some(EmbeddingFunctionConfiguration::Legacy);
5098 }
5099 }
5100 }
5101 assert!(!schema_with_embedding_function_in_embedding_key.is_default());
5102 }
5103
5104 #[test]
5105 fn test_add_merges_keys_by_value_type() {
5106 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
5107 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5108
5109 let string_override = ValueTypes {
5110 string: Some(StringValueType {
5111 string_inverted_index: Some(StringInvertedIndexType {
5112 enabled: true,
5113 config: StringInvertedIndexConfig {},
5114 }),
5115 fts_index: None,
5116 }),
5117 ..Default::default()
5118 };
5119 schema_a
5120 .keys
5121 .insert("custom_field".to_string(), string_override);
5122
5123 let float_override = ValueTypes {
5124 float: Some(FloatValueType {
5125 float_inverted_index: Some(FloatInvertedIndexType {
5126 enabled: true,
5127 config: FloatInvertedIndexConfig {},
5128 }),
5129 }),
5130 ..Default::default()
5131 };
5132 schema_b
5133 .keys
5134 .insert("custom_field".to_string(), float_override);
5135
5136 let merged = schema_a.merge(&schema_b).unwrap();
5137 let merged_override = merged.keys.get("custom_field").unwrap();
5138
5139 assert!(merged_override.string.is_some());
5140 assert!(merged_override.float.is_some());
5141 assert!(
5142 merged_override
5143 .string
5144 .as_ref()
5145 .unwrap()
5146 .string_inverted_index
5147 .as_ref()
5148 .unwrap()
5149 .enabled
5150 );
5151 assert!(
5152 merged_override
5153 .float
5154 .as_ref()
5155 .unwrap()
5156 .float_inverted_index
5157 .as_ref()
5158 .unwrap()
5159 .enabled
5160 );
5161 }
5162
5163 #[test]
5164 fn test_add_rejects_different_defaults() {
5165 let schema_a = Schema::new_default(KnnIndex::Hnsw);
5166 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5167
5168 if let Some(string_type) = schema_b.defaults.string.as_mut() {
5169 if let Some(string_index) = string_type.string_inverted_index.as_mut() {
5170 string_index.enabled = false;
5171 }
5172 }
5173
5174 let err = schema_a.merge(&schema_b).unwrap_err();
5175 assert!(matches!(err, SchemaError::DefaultsMismatch));
5176 }
5177
5178 #[test]
5179 fn test_add_detects_conflicting_value_type_configuration() {
5180 let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
5181 let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
5182
5183 let string_override_enabled = ValueTypes {
5184 string: Some(StringValueType {
5185 string_inverted_index: Some(StringInvertedIndexType {
5186 enabled: true,
5187 config: StringInvertedIndexConfig {},
5188 }),
5189 fts_index: None,
5190 }),
5191 ..Default::default()
5192 };
5193 schema_a
5194 .keys
5195 .insert("custom_field".to_string(), string_override_enabled);
5196
5197 let string_override_disabled = ValueTypes {
5198 string: Some(StringValueType {
5199 string_inverted_index: Some(StringInvertedIndexType {
5200 enabled: false,
5201 config: StringInvertedIndexConfig {},
5202 }),
5203 fts_index: None,
5204 }),
5205 ..Default::default()
5206 };
5207 schema_b
5208 .keys
5209 .insert("custom_field".to_string(), string_override_disabled);
5210
5211 let err = schema_a.merge(&schema_b).unwrap_err();
5212 assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
5213 }
5214
5215 #[test]
5217 fn test_backward_compatibility_aliases() {
5218 let old_format_json = r###"{
5220 "defaults": {
5221 "#string": {
5222 "$fts_index": {
5223 "enabled": true,
5224 "config": {}
5225 }
5226 },
5227 "#int": {
5228 "$int_inverted_index": {
5229 "enabled": true,
5230 "config": {}
5231 }
5232 },
5233 "#float_list": {
5234 "$vector_index": {
5235 "enabled": true,
5236 "config": {
5237 "spann": {
5238 "search_nprobe": 10
5239 }
5240 }
5241 }
5242 }
5243 },
5244 "key_overrides": {
5245 "#document": {
5246 "#string": {
5247 "$fts_index": {
5248 "enabled": false,
5249 "config": {}
5250 }
5251 }
5252 }
5253 }
5254 }"###;
5255
5256 let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
5257
5258 let new_format_json = r###"{
5260 "defaults": {
5261 "string": {
5262 "fts_index": {
5263 "enabled": true,
5264 "config": {}
5265 }
5266 },
5267 "int": {
5268 "int_inverted_index": {
5269 "enabled": true,
5270 "config": {}
5271 }
5272 },
5273 "float_list": {
5274 "vector_index": {
5275 "enabled": true,
5276 "config": {
5277 "spann": {
5278 "search_nprobe": 10
5279 }
5280 }
5281 }
5282 }
5283 },
5284 "keys": {
5285 "#document": {
5286 "string": {
5287 "fts_index": {
5288 "enabled": false,
5289 "config": {}
5290 }
5291 }
5292 }
5293 }
5294 }"###;
5295
5296 let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
5297
5298 assert_eq!(schema_from_old, schema_from_new);
5300
5301 assert!(schema_from_old.defaults.string.is_some());
5303 assert!(schema_from_old
5304 .defaults
5305 .string
5306 .as_ref()
5307 .unwrap()
5308 .fts_index
5309 .is_some());
5310 assert!(
5311 schema_from_old
5312 .defaults
5313 .string
5314 .as_ref()
5315 .unwrap()
5316 .fts_index
5317 .as_ref()
5318 .unwrap()
5319 .enabled
5320 );
5321
5322 assert!(schema_from_old.defaults.int.is_some());
5323 assert!(schema_from_old
5324 .defaults
5325 .int
5326 .as_ref()
5327 .unwrap()
5328 .int_inverted_index
5329 .is_some());
5330
5331 assert!(schema_from_old.defaults.float_list.is_some());
5332 assert!(schema_from_old
5333 .defaults
5334 .float_list
5335 .as_ref()
5336 .unwrap()
5337 .vector_index
5338 .is_some());
5339
5340 assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
5341 let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
5342 assert!(doc_override.string.is_some());
5343 assert!(
5344 !doc_override
5345 .string
5346 .as_ref()
5347 .unwrap()
5348 .fts_index
5349 .as_ref()
5350 .unwrap()
5351 .enabled
5352 );
5353
5354 let serialized = serde_json::to_string(&schema_from_old).unwrap();
5356
5357 assert!(serialized.contains(r#""keys":"#));
5359 assert!(serialized.contains(r#""string":"#));
5360 assert!(serialized.contains(r#""fts_index":"#));
5361 assert!(serialized.contains(r#""int_inverted_index":"#));
5362 assert!(serialized.contains(r#""vector_index":"#));
5363
5364 assert!(!serialized.contains(r#""key_overrides":"#));
5366 assert!(!serialized.contains(r###""#string":"###));
5367 assert!(!serialized.contains(r###""$fts_index":"###));
5368 assert!(!serialized.contains(r###""$int_inverted_index":"###));
5369 assert!(!serialized.contains(r###""$vector_index":"###));
5370 }
5371
5372 #[test]
5373 fn test_hnsw_index_config_validation() {
5374 use validator::Validate;
5375
5376 let valid_config = HnswIndexConfig {
5378 batch_size: Some(10),
5379 sync_threshold: Some(100),
5380 ef_construction: Some(100),
5381 max_neighbors: Some(16),
5382 ..Default::default()
5383 };
5384 assert!(valid_config.validate().is_ok());
5385
5386 let invalid_batch_size = HnswIndexConfig {
5388 batch_size: Some(1),
5389 ..Default::default()
5390 };
5391 assert!(invalid_batch_size.validate().is_err());
5392
5393 let invalid_sync_threshold = HnswIndexConfig {
5395 sync_threshold: Some(1),
5396 ..Default::default()
5397 };
5398 assert!(invalid_sync_threshold.validate().is_err());
5399
5400 let boundary_config = HnswIndexConfig {
5402 batch_size: Some(2),
5403 sync_threshold: Some(2),
5404 ..Default::default()
5405 };
5406 assert!(boundary_config.validate().is_ok());
5407
5408 let all_none_config = HnswIndexConfig {
5410 ..Default::default()
5411 };
5412 assert!(all_none_config.validate().is_ok());
5413
5414 let other_fields_config = HnswIndexConfig {
5416 ef_construction: Some(1),
5417 max_neighbors: Some(1),
5418 ef_search: Some(1),
5419 num_threads: Some(1),
5420 resize_factor: Some(0.1),
5421 ..Default::default()
5422 };
5423 assert!(other_fields_config.validate().is_ok());
5424 }
5425
5426 #[test]
5427 fn test_spann_index_config_validation() {
5428 use validator::Validate;
5429
5430 let valid_config = SpannIndexConfig {
5432 write_nprobe: Some(32),
5433 nreplica_count: Some(4),
5434 split_threshold: Some(100),
5435 merge_threshold: Some(50),
5436 reassign_neighbor_count: Some(32),
5437 num_centers_to_merge_to: Some(4),
5438 ef_construction: Some(100),
5439 ef_search: Some(100),
5440 max_neighbors: Some(32),
5441 search_rng_factor: Some(1.0),
5442 write_rng_factor: Some(1.0),
5443 search_rng_epsilon: Some(7.5),
5444 write_rng_epsilon: Some(7.5),
5445 ..Default::default()
5446 };
5447 assert!(valid_config.validate().is_ok());
5448
5449 let invalid_write_nprobe = SpannIndexConfig {
5451 write_nprobe: Some(200),
5452 ..Default::default()
5453 };
5454 assert!(invalid_write_nprobe.validate().is_err());
5455
5456 let invalid_split_threshold = SpannIndexConfig {
5458 split_threshold: Some(10),
5459 ..Default::default()
5460 };
5461 assert!(invalid_split_threshold.validate().is_err());
5462
5463 let invalid_split_threshold_high = SpannIndexConfig {
5465 split_threshold: Some(250),
5466 ..Default::default()
5467 };
5468 assert!(invalid_split_threshold_high.validate().is_err());
5469
5470 let invalid_nreplica = SpannIndexConfig {
5472 nreplica_count: Some(10),
5473 ..Default::default()
5474 };
5475 assert!(invalid_nreplica.validate().is_err());
5476
5477 let invalid_reassign = SpannIndexConfig {
5479 reassign_neighbor_count: Some(100),
5480 ..Default::default()
5481 };
5482 assert!(invalid_reassign.validate().is_err());
5483
5484 let invalid_merge_threshold_low = SpannIndexConfig {
5486 merge_threshold: Some(5),
5487 ..Default::default()
5488 };
5489 assert!(invalid_merge_threshold_low.validate().is_err());
5490
5491 let invalid_merge_threshold_high = SpannIndexConfig {
5492 merge_threshold: Some(150),
5493 ..Default::default()
5494 };
5495 assert!(invalid_merge_threshold_high.validate().is_err());
5496
5497 let invalid_num_centers = SpannIndexConfig {
5499 num_centers_to_merge_to: Some(10),
5500 ..Default::default()
5501 };
5502 assert!(invalid_num_centers.validate().is_err());
5503
5504 let invalid_ef_construction = SpannIndexConfig {
5506 ef_construction: Some(300),
5507 ..Default::default()
5508 };
5509 assert!(invalid_ef_construction.validate().is_err());
5510
5511 let invalid_ef_search = SpannIndexConfig {
5513 ef_search: Some(300),
5514 ..Default::default()
5515 };
5516 assert!(invalid_ef_search.validate().is_err());
5517
5518 let invalid_max_neighbors = SpannIndexConfig {
5520 max_neighbors: Some(100),
5521 ..Default::default()
5522 };
5523 assert!(invalid_max_neighbors.validate().is_err());
5524
5525 let invalid_search_nprobe = SpannIndexConfig {
5527 search_nprobe: Some(200),
5528 ..Default::default()
5529 };
5530 assert!(invalid_search_nprobe.validate().is_err());
5531
5532 let invalid_search_rng_factor_low = SpannIndexConfig {
5534 search_rng_factor: Some(0.9),
5535 ..Default::default()
5536 };
5537 assert!(invalid_search_rng_factor_low.validate().is_err());
5538
5539 let invalid_search_rng_factor_high = SpannIndexConfig {
5540 search_rng_factor: Some(1.1),
5541 ..Default::default()
5542 };
5543 assert!(invalid_search_rng_factor_high.validate().is_err());
5544
5545 let valid_search_rng_factor = SpannIndexConfig {
5547 search_rng_factor: Some(1.0),
5548 ..Default::default()
5549 };
5550 assert!(valid_search_rng_factor.validate().is_ok());
5551
5552 let invalid_search_rng_epsilon_low = SpannIndexConfig {
5554 search_rng_epsilon: Some(4.0),
5555 ..Default::default()
5556 };
5557 assert!(invalid_search_rng_epsilon_low.validate().is_err());
5558
5559 let invalid_search_rng_epsilon_high = SpannIndexConfig {
5560 search_rng_epsilon: Some(11.0),
5561 ..Default::default()
5562 };
5563 assert!(invalid_search_rng_epsilon_high.validate().is_err());
5564
5565 let valid_search_rng_epsilon = SpannIndexConfig {
5567 search_rng_epsilon: Some(7.5),
5568 ..Default::default()
5569 };
5570 assert!(valid_search_rng_epsilon.validate().is_ok());
5571
5572 let invalid_write_rng_factor_low = SpannIndexConfig {
5574 write_rng_factor: Some(0.9),
5575 ..Default::default()
5576 };
5577 assert!(invalid_write_rng_factor_low.validate().is_err());
5578
5579 let invalid_write_rng_factor_high = SpannIndexConfig {
5580 write_rng_factor: Some(1.1),
5581 ..Default::default()
5582 };
5583 assert!(invalid_write_rng_factor_high.validate().is_err());
5584
5585 let valid_write_rng_factor = SpannIndexConfig {
5587 write_rng_factor: Some(1.0),
5588 ..Default::default()
5589 };
5590 assert!(valid_write_rng_factor.validate().is_ok());
5591
5592 let invalid_write_rng_epsilon_low = SpannIndexConfig {
5594 write_rng_epsilon: Some(4.0),
5595 ..Default::default()
5596 };
5597 assert!(invalid_write_rng_epsilon_low.validate().is_err());
5598
5599 let invalid_write_rng_epsilon_high = SpannIndexConfig {
5600 write_rng_epsilon: Some(11.0),
5601 ..Default::default()
5602 };
5603 assert!(invalid_write_rng_epsilon_high.validate().is_err());
5604
5605 let valid_write_rng_epsilon = SpannIndexConfig {
5607 write_rng_epsilon: Some(7.5),
5608 ..Default::default()
5609 };
5610 assert!(valid_write_rng_epsilon.validate().is_ok());
5611
5612 let invalid_num_samples_kmeans = SpannIndexConfig {
5614 num_samples_kmeans: Some(1500),
5615 ..Default::default()
5616 };
5617 assert!(invalid_num_samples_kmeans.validate().is_err());
5618
5619 let valid_num_samples_kmeans = SpannIndexConfig {
5621 num_samples_kmeans: Some(500),
5622 ..Default::default()
5623 };
5624 assert!(valid_num_samples_kmeans.validate().is_ok());
5625
5626 let invalid_initial_lambda_high = SpannIndexConfig {
5628 initial_lambda: Some(150.0),
5629 ..Default::default()
5630 };
5631 assert!(invalid_initial_lambda_high.validate().is_err());
5632
5633 let invalid_initial_lambda_low = SpannIndexConfig {
5634 initial_lambda: Some(50.0),
5635 ..Default::default()
5636 };
5637 assert!(invalid_initial_lambda_low.validate().is_err());
5638
5639 let valid_initial_lambda = SpannIndexConfig {
5641 initial_lambda: Some(100.0),
5642 ..Default::default()
5643 };
5644 assert!(valid_initial_lambda.validate().is_ok());
5645
5646 let all_none_config = SpannIndexConfig {
5648 ..Default::default()
5649 };
5650 assert!(all_none_config.validate().is_ok());
5651 }
5652
5653 #[test]
5654 fn test_builder_pattern_crud_workflow() {
5655 let schema = Schema::new_default(KnnIndex::Hnsw)
5659 .create_index(
5660 None,
5661 IndexConfig::Vector(VectorIndexConfig {
5662 space: Some(Space::Cosine),
5663 embedding_function: None,
5664 source_key: None,
5665 hnsw: Some(HnswIndexConfig {
5666 ef_construction: Some(200),
5667 max_neighbors: Some(32),
5668 ef_search: Some(50),
5669 num_threads: None,
5670 batch_size: None,
5671 sync_threshold: None,
5672 resize_factor: None,
5673 }),
5674 spann: None,
5675 }),
5676 )
5677 .expect("vector config should succeed")
5678 .create_index(
5679 Some("category"),
5680 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5681 )
5682 .expect("string inverted on key should succeed")
5683 .create_index(
5684 Some("year"),
5685 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5686 )
5687 .expect("int inverted on key should succeed")
5688 .create_index(
5689 Some("rating"),
5690 IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5691 )
5692 .expect("float inverted on key should succeed")
5693 .create_index(
5694 Some("is_active"),
5695 IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5696 )
5697 .expect("bool inverted on key should succeed");
5698
5699 assert!(schema.keys.contains_key(EMBEDDING_KEY));
5702 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5703 assert!(embedding.float_list.is_some());
5704 let vector_index = embedding
5705 .float_list
5706 .as_ref()
5707 .unwrap()
5708 .vector_index
5709 .as_ref()
5710 .unwrap();
5711 assert!(vector_index.enabled);
5712 assert_eq!(vector_index.config.space, Some(Space::Cosine));
5713 assert_eq!(
5714 vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5715 Some(200)
5716 );
5717
5718 assert!(schema.keys.contains_key("category"));
5720 assert!(schema.keys.contains_key("year"));
5721 assert!(schema.keys.contains_key("rating"));
5722 assert!(schema.keys.contains_key("is_active"));
5723
5724 let category = schema.keys.get("category").unwrap();
5726 assert!(category.string.is_some());
5727 let string_idx = category
5728 .string
5729 .as_ref()
5730 .unwrap()
5731 .string_inverted_index
5732 .as_ref()
5733 .unwrap();
5734 assert!(string_idx.enabled);
5735
5736 let year = schema.keys.get("year").unwrap();
5738 assert!(year.int.is_some());
5739 let int_idx = year
5740 .int
5741 .as_ref()
5742 .unwrap()
5743 .int_inverted_index
5744 .as_ref()
5745 .unwrap();
5746 assert!(int_idx.enabled);
5747
5748 let schema = schema
5750 .delete_index(
5751 Some("category"),
5752 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5753 )
5754 .expect("delete string inverted should succeed")
5755 .delete_index(
5756 Some("year"),
5757 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5758 )
5759 .expect("delete int inverted should succeed");
5760
5761 let category = schema.keys.get("category").unwrap();
5763 let string_idx = category
5764 .string
5765 .as_ref()
5766 .unwrap()
5767 .string_inverted_index
5768 .as_ref()
5769 .unwrap();
5770 assert!(!string_idx.enabled); let year = schema.keys.get("year").unwrap();
5773 let int_idx = year
5774 .int
5775 .as_ref()
5776 .unwrap()
5777 .int_inverted_index
5778 .as_ref()
5779 .unwrap();
5780 assert!(!int_idx.enabled); let rating = schema.keys.get("rating").unwrap();
5784 let float_idx = rating
5785 .float
5786 .as_ref()
5787 .unwrap()
5788 .float_inverted_index
5789 .as_ref()
5790 .unwrap();
5791 assert!(float_idx.enabled); let is_active = schema.keys.get("is_active").unwrap();
5794 let bool_idx = is_active
5795 .boolean
5796 .as_ref()
5797 .unwrap()
5798 .bool_inverted_index
5799 .as_ref()
5800 .unwrap();
5801 assert!(bool_idx.enabled); }
5803
5804 #[test]
5805 fn test_builder_create_index_validation_errors() {
5806 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5813 Some("my_vectors"),
5814 IndexConfig::Vector(VectorIndexConfig {
5815 space: Some(Space::L2),
5816 embedding_function: None,
5817 source_key: None,
5818 hnsw: None,
5819 spann: None,
5820 }),
5821 );
5822 assert!(result.is_err());
5823 assert!(matches!(
5824 result.unwrap_err(),
5825 SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5826 ));
5827
5828 let result = Schema::new_default(KnnIndex::Hnsw)
5830 .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5831 assert!(result.is_err());
5832 assert!(matches!(
5833 result.unwrap_err(),
5834 SchemaBuilderError::FtsIndexOnlyOnDocument
5835 ));
5836
5837 let schema = Schema::new_default(KnnIndex::Hnsw)
5839 .create_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
5840 .expect("FTS on #document should succeed");
5841 assert!(schema.is_fts_enabled());
5842
5843 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5845 Some(DOCUMENT_KEY),
5846 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5847 );
5848 assert!(result.is_err());
5849 assert!(matches!(
5850 result.unwrap_err(),
5851 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5852 ));
5853
5854 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5856 Some(EMBEDDING_KEY),
5857 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5858 );
5859 assert!(result.is_err());
5860 assert!(matches!(
5861 result.unwrap_err(),
5862 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5863 ));
5864
5865 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5867 None,
5868 IndexConfig::SparseVector(SparseVectorIndexConfig {
5869 embedding_function: None,
5870 source_key: None,
5871 bm25: None,
5872 }),
5873 );
5874 assert!(result.is_err());
5875 assert!(matches!(
5876 result.unwrap_err(),
5877 SchemaBuilderError::SparseVectorRequiresKey
5878 ));
5879
5880 let result = Schema::new_default(KnnIndex::Hnsw)
5882 .create_index(
5883 Some("sparse1"),
5884 IndexConfig::SparseVector(SparseVectorIndexConfig {
5885 embedding_function: None,
5886 source_key: None,
5887 bm25: None,
5888 }),
5889 )
5890 .expect("first sparse should succeed")
5891 .create_index(
5892 Some("sparse2"),
5893 IndexConfig::SparseVector(SparseVectorIndexConfig {
5894 embedding_function: None,
5895 source_key: None,
5896 bm25: None,
5897 }),
5898 );
5899 assert!(result.is_err());
5900 assert!(matches!(
5901 result.unwrap_err(),
5902 SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5903 ));
5904 }
5905
5906 #[test]
5907 fn test_builder_delete_index_validation_errors() {
5908 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5914 Some(EMBEDDING_KEY),
5915 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5916 );
5917 assert!(result.is_err());
5918 assert!(matches!(
5919 result.unwrap_err(),
5920 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5921 ));
5922
5923 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5925 Some(DOCUMENT_KEY),
5926 IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5927 );
5928 assert!(result.is_err());
5929 assert!(matches!(
5930 result.unwrap_err(),
5931 SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5932 ));
5933
5934 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5936 None,
5937 IndexConfig::Vector(VectorIndexConfig {
5938 space: None,
5939 embedding_function: None,
5940 source_key: None,
5941 hnsw: None,
5942 spann: None,
5943 }),
5944 );
5945 assert!(result.is_err());
5946 assert!(matches!(
5947 result.unwrap_err(),
5948 SchemaBuilderError::VectorIndexDeletionNotSupported
5949 ));
5950
5951 let schema = Schema::new_default(KnnIndex::Hnsw)
5953 .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
5954 .expect("FTS deletion should succeed");
5955 assert!(!schema.is_fts_enabled());
5956
5957 let result = Schema::new_default(KnnIndex::Hnsw)
5959 .create_index(
5960 Some("sparse"),
5961 IndexConfig::SparseVector(SparseVectorIndexConfig {
5962 embedding_function: None,
5963 source_key: None,
5964 bm25: None,
5965 }),
5966 )
5967 .expect("create should succeed")
5968 .delete_index(
5969 Some("sparse"),
5970 IndexConfig::SparseVector(SparseVectorIndexConfig {
5971 embedding_function: None,
5972 source_key: None,
5973 bm25: None,
5974 }),
5975 );
5976 assert!(result.is_err());
5977 assert!(matches!(
5978 result.unwrap_err(),
5979 SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5980 ));
5981 }
5982
5983 #[test]
5984 fn test_fts_create_global_without_key_rejected() {
5985 let result = Schema::new_default(KnnIndex::Hnsw)
5987 .create_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5988 assert!(result.is_err());
5989 assert!(matches!(
5990 result.unwrap_err(),
5991 SchemaBuilderError::FtsIndexOnlyOnDocument
5992 ));
5993 }
5994
5995 #[test]
5996 fn test_fts_delete_global_without_key_rejected() {
5997 let result = Schema::new_default(KnnIndex::Hnsw)
5999 .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
6000 assert!(result.is_err());
6001 assert!(matches!(
6002 result.unwrap_err(),
6003 SchemaBuilderError::FtsIndexDeletionOnlyOnDocument
6004 ));
6005 }
6006
6007 #[test]
6008 fn test_fts_delete_on_custom_key_rejected() {
6009 let result = Schema::new_default(KnnIndex::Hnsw)
6011 .delete_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
6012 assert!(result.is_err());
6013 assert!(matches!(
6014 result.unwrap_err(),
6015 SchemaBuilderError::FtsIndexDeletionOnlyOnDocument
6016 ));
6017 }
6018
6019 #[test]
6020 fn test_reserved_key_prefix_create_index() {
6021 let result = Schema::new_default(KnnIndex::Hnsw).create_index(
6023 Some("#custom_field"),
6024 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
6025 );
6026 assert!(result.is_err());
6027 assert!(matches!(
6028 result.unwrap_err(),
6029 SchemaBuilderError::ReservedKeyPrefix { key } if key == "#custom_field"
6030 ));
6031 }
6032
6033 #[test]
6034 fn test_reserved_key_prefix_delete_index() {
6035 let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
6037 Some("#custom_field"),
6038 IndexConfig::StringInverted(StringInvertedIndexConfig {}),
6039 );
6040 assert!(result.is_err());
6041 assert!(matches!(
6042 result.unwrap_err(),
6043 SchemaBuilderError::ReservedKeyPrefix { key } if key == "#custom_field"
6044 ));
6045 }
6046
6047 #[test]
6048 fn test_is_fts_enabled_backward_compatibility() {
6049 let schema = Schema::new_default(KnnIndex::Hnsw);
6051 assert!(schema.is_fts_enabled());
6052
6053 let empty_schema = Schema {
6055 defaults: ValueTypes::default(),
6056 keys: HashMap::new(),
6057 cmek: None,
6058 source_attached_function_id: None,
6059 };
6060 assert!(empty_schema.is_fts_enabled());
6061 }
6062
6063 #[test]
6064 fn test_is_fts_enabled_after_disable() {
6065 let schema = Schema::new_default(KnnIndex::Hnsw)
6067 .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6068 .expect("FTS deletion should succeed");
6069 assert!(!schema.is_fts_enabled());
6070 }
6071
6072 #[test]
6073 fn test_is_fts_enabled_after_reenable() {
6074 let schema = Schema::new_default(KnnIndex::Hnsw)
6076 .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6077 .expect("FTS deletion should succeed")
6078 .create_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6079 .expect("FTS creation should succeed");
6080 assert!(schema.is_fts_enabled());
6081 }
6082
6083 #[test]
6084 fn test_fts_disabled_blocks_where_document_validation() {
6085 use crate::{DocumentExpression, DocumentOperator};
6086
6087 let schema = Schema::new_default(KnnIndex::Hnsw)
6089 .delete_index(Some(DOCUMENT_KEY), IndexConfig::Fts(FtsIndexConfig {}))
6090 .expect("FTS deletion should succeed");
6091
6092 let where_clause = Where::Document(DocumentExpression {
6094 operator: DocumentOperator::Contains,
6095 pattern: "test query".to_string(),
6096 });
6097 let result = schema.is_metadata_where_indexing_enabled(&where_clause);
6098 assert!(result.is_err());
6099 assert!(matches!(
6100 result.unwrap_err(),
6101 FilterValidationError::FtsDisabled
6102 ));
6103 }
6104
6105 #[test]
6106 fn test_fts_enabled_allows_where_document_validation() {
6107 use crate::{DocumentExpression, DocumentOperator};
6108
6109 let schema = Schema::new_default(KnnIndex::Hnsw);
6111
6112 let where_clause = Where::Document(DocumentExpression {
6114 operator: DocumentOperator::Contains,
6115 pattern: "test query".to_string(),
6116 });
6117 let result = schema.is_metadata_where_indexing_enabled(&where_clause);
6118 assert!(result.is_ok());
6119 }
6120
6121 #[test]
6122 fn test_builder_pattern_chaining() {
6123 let schema = Schema::new_default(KnnIndex::Hnsw)
6125 .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
6126 .unwrap()
6127 .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
6128 .unwrap()
6129 .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
6130 .unwrap()
6131 .create_index(Some("count"), IntInvertedIndexConfig {}.into())
6132 .unwrap()
6133 .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
6134 .unwrap()
6135 .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
6136 .unwrap();
6137
6138 assert!(
6140 schema
6141 .keys
6142 .get("tag1")
6143 .unwrap()
6144 .string
6145 .as_ref()
6146 .unwrap()
6147 .string_inverted_index
6148 .as_ref()
6149 .unwrap()
6150 .enabled
6151 );
6152
6153 assert!(
6155 !schema
6156 .keys
6157 .get("tag2")
6158 .unwrap()
6159 .string
6160 .as_ref()
6161 .unwrap()
6162 .string_inverted_index
6163 .as_ref()
6164 .unwrap()
6165 .enabled
6166 );
6167
6168 assert!(
6170 schema
6171 .keys
6172 .get("tag3")
6173 .unwrap()
6174 .string
6175 .as_ref()
6176 .unwrap()
6177 .string_inverted_index
6178 .as_ref()
6179 .unwrap()
6180 .enabled
6181 );
6182
6183 assert!(
6185 schema
6186 .keys
6187 .get("count")
6188 .unwrap()
6189 .int
6190 .as_ref()
6191 .unwrap()
6192 .int_inverted_index
6193 .as_ref()
6194 .unwrap()
6195 .enabled
6196 );
6197
6198 assert!(
6200 schema
6201 .keys
6202 .get("score")
6203 .unwrap()
6204 .float
6205 .as_ref()
6206 .unwrap()
6207 .float_inverted_index
6208 .as_ref()
6209 .unwrap()
6210 .enabled
6211 );
6212 }
6213
6214 #[test]
6215 fn test_schema_default_matches_python() {
6216 let schema = Schema::default();
6218
6219 assert!(schema.defaults.string.is_some());
6225 let string = schema.defaults.string.as_ref().unwrap();
6226 assert!(!string.fts_index.as_ref().unwrap().enabled);
6227 assert!(string.string_inverted_index.as_ref().unwrap().enabled);
6228
6229 assert!(schema.defaults.float_list.is_some());
6231 let float_list = schema.defaults.float_list.as_ref().unwrap();
6232 assert!(!float_list.vector_index.as_ref().unwrap().enabled);
6233 let vector_config = &float_list.vector_index.as_ref().unwrap().config;
6234 assert_eq!(vector_config.space, None); assert_eq!(vector_config.hnsw, None); assert_eq!(vector_config.spann, None); assert_eq!(vector_config.source_key, None);
6238
6239 assert!(schema.defaults.sparse_vector.is_some());
6241 let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
6242 assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
6243
6244 assert!(schema.defaults.int.is_some());
6246 assert!(
6247 schema
6248 .defaults
6249 .int
6250 .as_ref()
6251 .unwrap()
6252 .int_inverted_index
6253 .as_ref()
6254 .unwrap()
6255 .enabled
6256 );
6257
6258 assert!(schema.defaults.float.is_some());
6260 assert!(
6261 schema
6262 .defaults
6263 .float
6264 .as_ref()
6265 .unwrap()
6266 .float_inverted_index
6267 .as_ref()
6268 .unwrap()
6269 .enabled
6270 );
6271
6272 assert!(schema.defaults.boolean.is_some());
6274 assert!(
6275 schema
6276 .defaults
6277 .boolean
6278 .as_ref()
6279 .unwrap()
6280 .bool_inverted_index
6281 .as_ref()
6282 .unwrap()
6283 .enabled
6284 );
6285
6286 assert!(schema.keys.contains_key(DOCUMENT_KEY));
6292 let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
6293 assert!(doc.string.is_some());
6294 assert!(
6295 doc.string
6296 .as_ref()
6297 .unwrap()
6298 .fts_index
6299 .as_ref()
6300 .unwrap()
6301 .enabled
6302 );
6303 assert!(
6304 !doc.string
6305 .as_ref()
6306 .unwrap()
6307 .string_inverted_index
6308 .as_ref()
6309 .unwrap()
6310 .enabled
6311 );
6312
6313 assert!(schema.keys.contains_key(EMBEDDING_KEY));
6315 let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
6316 assert!(embedding.float_list.is_some());
6317 let vec_idx = embedding
6318 .float_list
6319 .as_ref()
6320 .unwrap()
6321 .vector_index
6322 .as_ref()
6323 .unwrap();
6324 assert!(vec_idx.enabled);
6325 assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
6326 assert_eq!(vec_idx.config.space, None); assert_eq!(vec_idx.config.hnsw, None); assert_eq!(vec_idx.config.spann, None); assert_eq!(schema.keys.len(), 2);
6332 }
6333
6334 #[test]
6335 fn test_schema_default_works_with_builder() {
6336 let schema = Schema::default()
6338 .create_index(Some("category"), StringInvertedIndexConfig {}.into())
6339 .expect("should succeed");
6340
6341 assert!(schema.keys.contains_key("category"));
6343 assert!(schema.keys.contains_key(DOCUMENT_KEY));
6344 assert!(schema.keys.contains_key(EMBEDDING_KEY));
6345 assert_eq!(schema.keys.len(), 3);
6346 }
6347
6348 #[cfg(feature = "testing")]
6349 mod proptests {
6350 use super::*;
6351 use crate::strategies::{
6352 embedding_function_strategy, internal_collection_configuration_strategy,
6353 internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
6354 knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
6355 };
6356 use crate::{
6357 HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
6358 };
6359 use proptest::prelude::*;
6360 use proptest::strategy::BoxedStrategy;
6361 use proptest::string::string_regex;
6362 use serde_json::json;
6363
6364 fn default_embedding_function_strategy(
6365 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6366 proptest::option::of(prop_oneof![
6367 Just(EmbeddingFunctionConfiguration::Unknown),
6368 Just(EmbeddingFunctionConfiguration::Known(
6369 EmbeddingFunctionNewConfiguration {
6370 name: "default".to_string(),
6371 config: json!({ "alpha": 1 }),
6372 }
6373 )),
6374 ])
6375 }
6376
6377 fn sparse_embedding_function_strategy(
6378 ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
6379 let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
6380 EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
6381 name,
6382 config: json!({ "alpha": 1 }),
6383 })
6384 });
6385
6386 proptest::option::of(prop_oneof![
6387 Just(EmbeddingFunctionConfiguration::Unknown),
6388 known_strategy,
6389 ])
6390 }
6391
6392 fn non_default_internal_collection_configuration_strategy(
6393 ) -> impl Strategy<Value = InternalCollectionConfiguration> {
6394 internal_collection_configuration_strategy()
6395 .prop_filter("non-default configuration", |config| !config.is_default())
6396 }
6397
6398 fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6399 (
6400 proptest::option::of(1usize..=512),
6401 proptest::option::of(1usize..=128),
6402 proptest::option::of(1usize..=512),
6403 proptest::option::of(1usize..=64),
6404 proptest::option::of(2usize..=4096),
6405 proptest::option::of(2usize..=4096),
6406 proptest::option::of(prop_oneof![
6407 Just(0.5f64),
6408 Just(1.0f64),
6409 Just(1.5f64),
6410 Just(2.0f64)
6411 ]),
6412 )
6413 .prop_map(
6414 |(
6415 ef_construction,
6416 max_neighbors,
6417 ef_search,
6418 num_threads,
6419 batch_size,
6420 sync_threshold,
6421 resize_factor,
6422 )| HnswIndexConfig {
6423 ef_construction,
6424 max_neighbors,
6425 ef_search,
6426 num_threads,
6427 batch_size,
6428 sync_threshold,
6429 resize_factor,
6430 },
6431 )
6432 }
6433
6434 fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6435 let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
6436 (
6437 (
6438 proptest::option::of(1u32..=128), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy.clone()), proptest::option::of(1u32..=8), proptest::option::of(Just(1.0f32)), proptest::option::of(epsilon_strategy), proptest::option::of(50u32..=200), proptest::option::of(1usize..=1000), ),
6447 (
6448 proptest::option::of(Just(100.0f32)), proptest::option::of(1u32..=64), proptest::option::of(25u32..=100), proptest::option::of(1u32..=8), proptest::option::of(1u32..=64), proptest::option::of(1usize..=200), proptest::option::of(1usize..=200), proptest::option::of(1usize..=64), ),
6457 )
6458 .prop_map(
6459 |(
6460 (
6461 search_nprobe,
6462 search_rng_factor,
6463 search_rng_epsilon,
6464 nreplica_count,
6465 write_rng_factor,
6466 write_rng_epsilon,
6467 split_threshold,
6468 num_samples_kmeans,
6469 ),
6470 (
6471 initial_lambda,
6472 reassign_neighbor_count,
6473 merge_threshold,
6474 num_centers_to_merge_to,
6475 write_nprobe,
6476 ef_construction,
6477 ef_search,
6478 max_neighbors,
6479 ),
6480 )| SpannIndexConfig {
6481 search_nprobe,
6482 search_rng_factor,
6483 search_rng_epsilon,
6484 nreplica_count,
6485 write_rng_factor,
6486 write_rng_epsilon,
6487 split_threshold,
6488 num_samples_kmeans,
6489 initial_lambda,
6490 reassign_neighbor_count,
6491 merge_threshold,
6492 num_centers_to_merge_to,
6493 write_nprobe,
6494 ef_construction,
6495 ef_search,
6496 max_neighbors,
6497 center_drift_threshold: None,
6498 quantize: Quantization::None,
6499 },
6500 )
6501 }
6502
6503 proptest! {
6504 #[test]
6505 fn merge_hnsw_configs_preserves_user_overrides(
6506 base in partial_hnsw_index_config_strategy(),
6507 user in partial_hnsw_index_config_strategy(),
6508 ) {
6509 let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
6510 .expect("merge should return Some when both are Some");
6511
6512 if user.ef_construction.is_some() {
6514 prop_assert_eq!(merged.ef_construction, user.ef_construction);
6515 }
6516 if user.max_neighbors.is_some() {
6517 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6518 }
6519 if user.ef_search.is_some() {
6520 prop_assert_eq!(merged.ef_search, user.ef_search);
6521 }
6522 if user.num_threads.is_some() {
6523 prop_assert_eq!(merged.num_threads, user.num_threads);
6524 }
6525 if user.batch_size.is_some() {
6526 prop_assert_eq!(merged.batch_size, user.batch_size);
6527 }
6528 if user.sync_threshold.is_some() {
6529 prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
6530 }
6531 if user.resize_factor.is_some() {
6532 prop_assert_eq!(merged.resize_factor, user.resize_factor);
6533 }
6534 }
6535
6536 #[test]
6537 fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
6538 base in partial_hnsw_index_config_strategy(),
6539 ) {
6540 let merged = Schema::merge_hnsw_configs(Some(&base), None)
6541 .expect("merge should return Some when base is Some");
6542
6543 prop_assert_eq!(merged, base);
6545 }
6546
6547 #[test]
6548 fn merge_hnsw_configs_returns_user_when_base_is_none(
6549 user in partial_hnsw_index_config_strategy(),
6550 ) {
6551 let merged = Schema::merge_hnsw_configs(None, Some(&user))
6552 .expect("merge should return Some when user is Some");
6553
6554 prop_assert_eq!(merged, user);
6556 }
6557
6558 #[test]
6559 fn merge_spann_configs_preserves_user_overrides(
6560 base in partial_spann_index_config_strategy(),
6561 user in partial_spann_index_config_strategy(),
6562 ) {
6563 let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
6564 .expect("merge should return Ok")
6565 .expect("merge should return Some when both are Some");
6566
6567 if user.search_nprobe.is_some() {
6569 prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
6570 }
6571 if user.search_rng_epsilon.is_some() {
6572 prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
6573 }
6574 if user.split_threshold.is_some() {
6575 prop_assert_eq!(merged.split_threshold, user.split_threshold);
6576 }
6577 if user.ef_construction.is_some() {
6578 prop_assert_eq!(merged.ef_construction, user.ef_construction);
6579 }
6580 if user.ef_search.is_some() {
6581 prop_assert_eq!(merged.ef_search, user.ef_search);
6582 }
6583 if user.max_neighbors.is_some() {
6584 prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
6585 }
6586 }
6587
6588 #[test]
6589 fn merge_spann_configs_falls_back_to_base_when_user_is_none(
6590 base in partial_spann_index_config_strategy(),
6591 ) {
6592 let merged = Schema::merge_spann_configs(Some(&base), None)
6593 .expect("merge should return Ok")
6594 .expect("merge should return Some when base is Some");
6595
6596 prop_assert_eq!(merged, base);
6598 }
6599
6600 #[test]
6601 fn merge_vector_index_config_preserves_user_overrides(
6602 base in vector_index_config_strategy(),
6603 user in vector_index_config_strategy(),
6604 knn in knn_index_strategy(),
6605 ) {
6606 let merged = Schema::merge_vector_index_config(&base, &user, knn)
6607 .expect("merge should succeed");
6608
6609 if user.space.is_some() {
6611 prop_assert_eq!(merged.space, user.space);
6612 }
6613 if user.embedding_function.is_some() {
6614 prop_assert_eq!(merged.embedding_function, user.embedding_function);
6615 }
6616 if user.source_key.is_some() {
6617 prop_assert_eq!(merged.source_key, user.source_key);
6618 }
6619
6620 match knn {
6622 KnnIndex::Hnsw => {
6623 if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6624 let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6625 if user_hnsw.ef_construction.is_some() {
6626 prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6627 }
6628 }
6629 }
6630 KnnIndex::Spann => {
6631 if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6632 let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6633 if user_spann.search_nprobe.is_some() {
6634 prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6635 }
6636 }
6637 }
6638 }
6639 }
6640 }
6641
6642 fn expected_vector_index_config(
6643 config: &InternalCollectionConfiguration,
6644 ) -> VectorIndexConfig {
6645 match &config.vector_index {
6646 VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6647 space: Some(hnsw_config.space.clone()),
6648 embedding_function: config.embedding_function.clone(),
6649 source_key: None,
6650 hnsw: Some(HnswIndexConfig {
6651 ef_construction: Some(hnsw_config.ef_construction),
6652 max_neighbors: Some(hnsw_config.max_neighbors),
6653 ef_search: Some(hnsw_config.ef_search),
6654 num_threads: Some(hnsw_config.num_threads),
6655 batch_size: Some(hnsw_config.batch_size),
6656 sync_threshold: Some(hnsw_config.sync_threshold),
6657 resize_factor: Some(hnsw_config.resize_factor),
6658 }),
6659 spann: None,
6660 },
6661 VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6662 space: Some(spann_config.space.clone()),
6663 embedding_function: config.embedding_function.clone(),
6664 source_key: None,
6665 hnsw: None,
6666 spann: Some(SpannIndexConfig {
6667 search_nprobe: Some(spann_config.search_nprobe),
6668 search_rng_factor: Some(spann_config.search_rng_factor),
6669 search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6670 nreplica_count: Some(spann_config.nreplica_count),
6671 write_rng_factor: Some(spann_config.write_rng_factor),
6672 write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6673 split_threshold: Some(spann_config.split_threshold),
6674 num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6675 initial_lambda: Some(spann_config.initial_lambda),
6676 reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6677 merge_threshold: Some(spann_config.merge_threshold),
6678 num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6679 write_nprobe: Some(spann_config.write_nprobe),
6680 ef_construction: Some(spann_config.ef_construction),
6681 ef_search: Some(spann_config.ef_search),
6682 max_neighbors: Some(spann_config.max_neighbors),
6683 center_drift_threshold: None,
6684 quantize: Quantization::None,
6685 }),
6686 },
6687 }
6688 }
6689
6690 fn non_special_key_strategy() -> BoxedStrategy<String> {
6691 string_regex(TEST_NAME_PATTERN)
6692 .unwrap()
6693 .prop_filter("exclude special keys", |key| {
6694 key != DOCUMENT_KEY && key != EMBEDDING_KEY
6695 })
6696 .boxed()
6697 }
6698
6699 fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6700 proptest::option::of(prop_oneof![
6701 Just(DOCUMENT_KEY.to_string()),
6702 string_regex(TEST_NAME_PATTERN).unwrap(),
6703 ])
6704 .boxed()
6705 }
6706
6707 fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6708 any::<bool>().prop_map(|enabled| FtsIndexType {
6709 enabled,
6710 config: FtsIndexConfig {},
6711 })
6712 }
6713
6714 fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6715 any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6716 enabled,
6717 config: StringInvertedIndexConfig {},
6718 })
6719 }
6720
6721 fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6722 proptest::option::of(
6723 (
6724 proptest::option::of(string_inverted_index_type_strategy()),
6725 proptest::option::of(fts_index_type_strategy()),
6726 )
6727 .prop_map(|(string_inverted_index, fts_index)| {
6728 StringValueType {
6729 string_inverted_index,
6730 fts_index,
6731 }
6732 }),
6733 )
6734 .boxed()
6735 }
6736
6737 fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6738 any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6739 enabled,
6740 config: FloatInvertedIndexConfig {},
6741 })
6742 }
6743
6744 fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6745 proptest::option::of(
6746 proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6747 |float_inverted_index| FloatValueType {
6748 float_inverted_index,
6749 },
6750 ),
6751 )
6752 .boxed()
6753 }
6754
6755 fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6756 any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6757 enabled,
6758 config: IntInvertedIndexConfig {},
6759 })
6760 }
6761
6762 fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6763 proptest::option::of(
6764 proptest::option::of(int_inverted_index_type_strategy())
6765 .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6766 )
6767 .boxed()
6768 }
6769
6770 fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6771 any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6772 enabled,
6773 config: BoolInvertedIndexConfig {},
6774 })
6775 }
6776
6777 fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6778 proptest::option::of(
6779 proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6780 |bool_inverted_index| BoolValueType {
6781 bool_inverted_index,
6782 },
6783 ),
6784 )
6785 .boxed()
6786 }
6787
6788 fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6789 (
6790 sparse_embedding_function_strategy(),
6791 source_key_strategy(),
6792 proptest::option::of(any::<bool>()),
6793 )
6794 .prop_map(|(embedding_function, source_key, bm25)| {
6795 SparseVectorIndexConfig {
6796 embedding_function,
6797 source_key,
6798 bm25,
6799 }
6800 })
6801 }
6802
6803 fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6804 proptest::option::of(
6805 (
6806 any::<bool>(),
6807 proptest::option::of(sparse_vector_index_config_strategy()),
6808 )
6809 .prop_map(|(enabled, config)| SparseVectorValueType {
6810 sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6811 enabled,
6812 config: cfg,
6813 }),
6814 }),
6815 )
6816 .boxed()
6817 }
6818
6819 fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6820 internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6821 ef_construction: Some(config.ef_construction),
6822 max_neighbors: Some(config.max_neighbors),
6823 ef_search: Some(config.ef_search),
6824 num_threads: Some(config.num_threads),
6825 batch_size: Some(config.batch_size),
6826 sync_threshold: Some(config.sync_threshold),
6827 resize_factor: Some(config.resize_factor),
6828 })
6829 }
6830
6831 fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6832 internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6833 search_nprobe: Some(config.search_nprobe),
6834 search_rng_factor: Some(config.search_rng_factor),
6835 search_rng_epsilon: Some(config.search_rng_epsilon),
6836 nreplica_count: Some(config.nreplica_count),
6837 write_rng_factor: Some(config.write_rng_factor),
6838 write_rng_epsilon: Some(config.write_rng_epsilon),
6839 split_threshold: Some(config.split_threshold),
6840 num_samples_kmeans: Some(config.num_samples_kmeans),
6841 initial_lambda: Some(config.initial_lambda),
6842 reassign_neighbor_count: Some(config.reassign_neighbor_count),
6843 merge_threshold: Some(config.merge_threshold),
6844 num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6845 write_nprobe: Some(config.write_nprobe),
6846 ef_construction: Some(config.ef_construction),
6847 ef_search: Some(config.ef_search),
6848 max_neighbors: Some(config.max_neighbors),
6849 center_drift_threshold: None,
6850 quantize: Quantization::None,
6851 })
6852 }
6853
6854 fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6855 (
6856 proptest::option::of(space_strategy()),
6857 embedding_function_strategy(),
6858 source_key_strategy(),
6859 proptest::option::of(hnsw_index_config_strategy()),
6860 proptest::option::of(spann_index_config_strategy()),
6861 )
6862 .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6863 VectorIndexConfig {
6864 space,
6865 embedding_function,
6866 source_key,
6867 hnsw,
6868 spann,
6869 }
6870 })
6871 }
6872
6873 fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6874 (any::<bool>(), vector_index_config_strategy())
6875 .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6876 }
6877
6878 fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6879 proptest::option::of(
6880 proptest::option::of(vector_index_type_strategy())
6881 .prop_map(|vector_index| FloatListValueType { vector_index }),
6882 )
6883 .boxed()
6884 }
6885
6886 fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6887 (
6888 string_value_type_strategy(),
6889 float_list_value_type_strategy(),
6890 sparse_vector_value_type_strategy(),
6891 int_value_type_strategy(),
6892 float_value_type_strategy(),
6893 bool_value_type_strategy(),
6894 )
6895 .prop_map(
6896 |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6897 string,
6898 float_list,
6899 sparse_vector,
6900 int,
6901 float,
6902 boolean,
6903 },
6904 )
6905 .boxed()
6906 }
6907
6908 fn schema_strategy() -> BoxedStrategy<Schema> {
6909 (
6910 value_types_strategy(),
6911 proptest::collection::hash_map(
6912 non_special_key_strategy(),
6913 value_types_strategy(),
6914 0..=3,
6915 ),
6916 proptest::option::of(value_types_strategy()),
6917 proptest::option::of(value_types_strategy()),
6918 )
6919 .prop_map(
6920 |(defaults, mut extra_keys, document_override, embedding_override)| {
6921 if let Some(doc) = document_override {
6922 extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6923 }
6924 if let Some(embed) = embedding_override {
6925 extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6926 }
6927 Schema {
6928 defaults,
6929 keys: extra_keys,
6930 cmek: None,
6931 source_attached_function_id: None,
6932 }
6933 },
6934 )
6935 .boxed()
6936 }
6937
6938 fn force_non_default_schema(mut schema: Schema) -> Schema {
6939 if schema.is_default() {
6940 if let Some(string_value) = schema
6941 .defaults
6942 .string
6943 .as_mut()
6944 .and_then(|string_value| string_value.string_inverted_index.as_mut())
6945 {
6946 string_value.enabled = !string_value.enabled;
6947 } else {
6948 schema.defaults.string = Some(StringValueType {
6949 string_inverted_index: Some(StringInvertedIndexType {
6950 enabled: false,
6951 config: StringInvertedIndexConfig {},
6952 }),
6953 fts_index: None,
6954 });
6955 }
6956 }
6957 schema
6958 }
6959
6960 fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6961 schema_strategy().prop_map(force_non_default_schema).boxed()
6962 }
6963
6964 fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6965 let defaults = schema
6966 .defaults
6967 .float_list
6968 .as_ref()
6969 .and_then(|fl| fl.vector_index.as_ref())
6970 .map(|vi| vi.config.clone())
6971 .expect("defaults vector index missing");
6972
6973 let embedding = schema
6974 .keys
6975 .get(EMBEDDING_KEY)
6976 .and_then(|value_types| value_types.float_list.as_ref())
6977 .and_then(|fl| fl.vector_index.as_ref())
6978 .map(|vi| vi.config.clone())
6979 .expect("#embedding vector index missing");
6980
6981 (defaults, embedding)
6982 }
6983
6984 proptest! {
6985 #[test]
6986 fn reconcile_schema_and_config_matches_convert_for_config_only(
6987 config in internal_collection_configuration_strategy(),
6988 knn in knn_index_strategy(),
6989 ) {
6990 let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6991 .expect("reconciliation should succeed");
6992
6993 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6994 let expected_config = expected_vector_index_config(&config);
6995
6996 prop_assert_eq!(defaults_vi, expected_config.clone());
6997
6998 let mut expected_embedding_config = expected_config;
6999 expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
7000 prop_assert_eq!(embedding_vi, expected_embedding_config);
7001
7002 prop_assert_eq!(result.keys.len(), 2);
7003 }
7004 }
7005
7006 proptest! {
7007 #[test]
7008 fn reconcile_schema_and_config_errors_when_both_non_default(
7009 config in non_default_internal_collection_configuration_strategy(),
7010 knn in knn_index_strategy(),
7011 ) {
7012 let schema = Schema::try_from(&config)
7013 .expect("conversion should succeed");
7014 prop_assume!(!schema.is_default());
7015
7016 let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
7017
7018 prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
7019 }
7020 }
7021
7022 proptest! {
7023 #[test]
7024 fn reconcile_schema_and_config_matches_schema_only_path(
7025 schema in schema_strategy(),
7026 knn in knn_index_strategy(),
7027 ) {
7028 let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
7029 .expect("reconciliation should succeed");
7030
7031 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
7032
7033 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
7035 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
7036 if let Some(schema_space) = &schema_vi.config.space {
7038 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
7039 }
7040 if let Some(schema_ef) = &schema_vi.config.embedding_function {
7041 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
7042 }
7043 match knn {
7045 KnnIndex::Hnsw => {
7046 if let Some(schema_hnsw) = &schema_vi.config.hnsw {
7047 if let Some(merged_hnsw) = &defaults_vi.hnsw {
7048 if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
7049 prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
7050 }
7051 }
7052 }
7053 }
7054 KnnIndex::Spann => {
7055 if let Some(schema_spann) = &schema_vi.config.spann {
7056 if let Some(merged_spann) = &defaults_vi.spann {
7057 if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
7058 prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
7059 }
7060 }
7061 }
7062 }
7063 }
7064 }
7065 }
7066
7067 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
7069 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
7070 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
7071 if let Some(schema_space) = &embedding_vi_type.config.space {
7072 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
7073 }
7074 }
7075 }
7076 }
7077 }
7078 }
7079
7080 proptest! {
7081 #[test]
7082 fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
7083 embedding_function in default_embedding_function_strategy(),
7084 knn in knn_index_strategy(),
7085 ) {
7086 let schema = Schema::new_default(knn);
7087 let mut config = match knn {
7088 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
7089 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
7090 };
7091 config.embedding_function = embedding_function.clone();
7092
7093 let result = Schema::reconcile_schema_and_config(
7094 Some(&schema),
7095 Some(&config),
7096 knn,
7097 )
7098 .expect("reconciliation should succeed");
7099
7100 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
7101
7102 if let Some(ef) = embedding_function {
7104 prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
7105 prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
7106 } else {
7107 prop_assert_eq!(defaults_vi.embedding_function, None);
7109 prop_assert_eq!(embedding_vi.embedding_function, None);
7110 }
7111 }
7112 }
7113
7114 proptest! {
7115 #[test]
7116 fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
7117 schema in non_default_schema_strategy(),
7118 knn in knn_index_strategy(),
7119 ) {
7120 let default_config = match knn {
7121 KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
7122 KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
7123 };
7124
7125 let result = Schema::reconcile_schema_and_config(
7126 Some(&schema),
7127 Some(&default_config),
7128 knn,
7129 )
7130 .expect("reconciliation should succeed");
7131
7132 let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
7133
7134 if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
7137 if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
7138 if let Some(schema_space) = &schema_vi.config.space {
7139 prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
7140 }
7141 if let Some(schema_ef) = &schema_vi.config.embedding_function {
7142 prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
7143 }
7144 }
7145 }
7146
7147 if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
7149 if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
7150 if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
7151 if let Some(schema_space) = &embedding_vi_type.config.space {
7152 prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
7153 }
7154 }
7155 }
7156 }
7157 }
7158 }
7159 }
7160}