1use crate::{
2 collection_schema::is_embedding_function_default, default_batch_size, default_construction_ef,
3 default_construction_ef_spann, default_initial_lambda, default_m, default_m_spann,
4 default_merge_threshold, default_nreplica_count, default_num_centers_to_merge_to,
5 default_num_samples_kmeans, default_num_threads, default_reassign_neighbor_count,
6 default_resize_factor, default_search_ef, default_search_ef_spann, default_search_nprobe,
7 default_search_rng_epsilon, default_search_rng_factor, default_space, default_split_threshold,
8 default_sync_threshold, default_write_nprobe, default_write_rng_epsilon,
9 default_write_rng_factor,
10};
11use crate::{
12 HnswConfiguration, HnswParametersFromSegmentError, InternalHnswConfiguration,
13 InternalSpannConfiguration, Metadata, Schema, Segment, SpannConfiguration,
14 UpdateHnswConfiguration, UpdateSpannConfiguration, VectorIndexConfig, EMBEDDING_KEY,
15};
16use chroma_error::{ChromaError, ErrorCodes};
17use serde::{Deserialize, Serialize};
18use thiserror::Error;
19
20#[derive(Deserialize, Serialize, Clone, Debug, Copy)]
21pub enum KnnIndex {
22 #[serde(alias = "hnsw")]
23 Hnsw,
24 #[serde(alias = "spann")]
25 Spann,
26}
27
28pub fn default_default_knn_index() -> KnnIndex {
29 KnnIndex::Hnsw
30}
31
32#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
33#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
34#[serde(tag = "type")]
35pub enum EmbeddingFunctionConfiguration {
36 #[serde(rename = "legacy")]
37 Legacy,
38 #[serde(rename = "known")]
39 Known(EmbeddingFunctionNewConfiguration),
40 #[serde(rename = "unknown")]
41 Unknown,
42}
43
44impl EmbeddingFunctionConfiguration {
45 pub fn is_default(&self) -> bool {
46 match self {
47 EmbeddingFunctionConfiguration::Legacy => false,
48 EmbeddingFunctionConfiguration::Unknown => true,
49 EmbeddingFunctionConfiguration::Known(config) => config.name == "default",
50 }
51 }
52}
53
54#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
55#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
56pub struct EmbeddingFunctionNewConfiguration {
57 pub name: String,
58 pub config: serde_json::Value,
59}
60
61#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
62#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
63#[serde(rename_all = "snake_case")]
64pub enum VectorIndexConfiguration {
65 Hnsw(InternalHnswConfiguration),
66 Spann(InternalSpannConfiguration),
67}
68
69impl VectorIndexConfiguration {
70 pub fn update(&mut self, vector_index: &VectorIndexConfiguration) {
71 match (self, vector_index) {
72 (VectorIndexConfiguration::Hnsw(hnsw), VectorIndexConfiguration::Hnsw(hnsw_new)) => {
73 *hnsw = hnsw_new.clone();
74 }
75 (
76 VectorIndexConfiguration::Spann(spann),
77 VectorIndexConfiguration::Spann(spann_new),
78 ) => {
79 *spann = spann_new.clone();
80 }
81 (VectorIndexConfiguration::Hnsw(_), VectorIndexConfiguration::Spann(_)) => {
82 }
85 (VectorIndexConfiguration::Spann(_), VectorIndexConfiguration::Hnsw(_)) => {
86 }
89 }
90 }
91}
92impl From<InternalHnswConfiguration> for VectorIndexConfiguration {
93 fn from(config: InternalHnswConfiguration) -> Self {
94 VectorIndexConfiguration::Hnsw(config)
95 }
96}
97
98impl From<InternalSpannConfiguration> for VectorIndexConfiguration {
99 fn from(config: InternalSpannConfiguration) -> Self {
100 VectorIndexConfiguration::Spann(config)
101 }
102}
103
104fn default_vector_index_config() -> VectorIndexConfiguration {
105 VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default())
106}
107
108#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
109#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
110pub struct InternalCollectionConfiguration {
111 #[serde(default = "default_vector_index_config")]
112 pub vector_index: VectorIndexConfiguration,
113 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
114}
115
116impl InternalCollectionConfiguration {
117 pub fn from_legacy_metadata(
118 metadata: Metadata,
119 ) -> Result<Self, HnswParametersFromSegmentError> {
120 let hnsw = InternalHnswConfiguration::from_legacy_segment_metadata(&Some(metadata))?;
121 Ok(Self {
122 vector_index: VectorIndexConfiguration::Hnsw(hnsw),
123 embedding_function: None,
124 })
125 }
126
127 pub fn default_hnsw() -> Self {
128 Self {
129 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
130 embedding_function: None,
131 }
132 }
133
134 pub fn default_spann() -> Self {
135 Self {
136 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration::default()),
137 embedding_function: None,
138 }
139 }
140
141 pub fn is_default(&self) -> bool {
143 if !is_embedding_function_default(&self.embedding_function) {
144 return false;
145 }
146
147 match &self.vector_index {
149 VectorIndexConfiguration::Hnsw(hnsw_config) => {
150 hnsw_config.ef_construction == default_construction_ef()
151 && hnsw_config.ef_search == default_search_ef()
152 && hnsw_config.max_neighbors == default_m()
153 && hnsw_config.num_threads == default_num_threads()
154 && hnsw_config.batch_size == default_batch_size()
155 && hnsw_config.sync_threshold == default_sync_threshold()
156 && hnsw_config.resize_factor == default_resize_factor()
157 && hnsw_config.space == default_space()
158 }
159 VectorIndexConfiguration::Spann(spann_config) => {
160 spann_config.search_nprobe == default_search_nprobe()
161 && spann_config.search_rng_factor == default_search_rng_factor()
162 && spann_config.search_rng_epsilon == default_search_rng_epsilon()
163 && spann_config.write_nprobe == default_write_nprobe()
164 && spann_config.nreplica_count == default_nreplica_count()
165 && spann_config.write_rng_factor == default_write_rng_factor()
166 && spann_config.write_rng_epsilon == default_write_rng_epsilon()
167 && spann_config.split_threshold == default_split_threshold()
168 && spann_config.num_samples_kmeans == default_num_samples_kmeans()
169 && spann_config.initial_lambda == default_initial_lambda()
170 && spann_config.reassign_neighbor_count == default_reassign_neighbor_count()
171 && spann_config.merge_threshold == default_merge_threshold()
172 && spann_config.num_centers_to_merge_to == default_num_centers_to_merge_to()
173 && spann_config.ef_construction == default_construction_ef_spann()
174 && spann_config.ef_search == default_search_ef_spann()
175 && spann_config.max_neighbors == default_m_spann()
176 && spann_config.space == default_space()
177 }
178 }
179 }
180
181 pub fn get_hnsw_config_with_legacy_fallback(
182 &self,
183 segment: &Segment,
184 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
185 self.get_hnsw_config_from_legacy_metadata(&segment.metadata)
186 }
187
188 pub fn get_hnsw_config_from_legacy_metadata(
189 &self,
190 metadata: &Option<Metadata>,
191 ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
192 if let Some(config) = self.get_hnsw_config() {
193 let config_from_metadata =
194 InternalHnswConfiguration::from_legacy_segment_metadata(metadata)?;
195
196 if config == InternalHnswConfiguration::default() && config != config_from_metadata {
197 return Ok(Some(config_from_metadata));
198 }
199
200 return Ok(Some(config));
201 }
202
203 Ok(None)
204 }
205
206 pub fn get_spann_config(&self) -> Option<InternalSpannConfiguration> {
207 match &self.vector_index {
208 VectorIndexConfiguration::Spann(config) => Some(config.clone()),
209 _ => None,
210 }
211 }
212
213 fn get_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
214 match &self.vector_index {
215 VectorIndexConfiguration::Hnsw(config) => Some(config.clone()),
216 _ => None,
217 }
218 }
219
220 pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
221 if let Some(vector_index) = &configuration.vector_index {
224 match vector_index {
225 UpdateVectorIndexConfiguration::Hnsw(hnsw_config) => {
226 if let VectorIndexConfiguration::Hnsw(current_config) = &mut self.vector_index {
227 if let Some(update_config) = hnsw_config {
228 if let Some(ef_search) = update_config.ef_search {
229 current_config.ef_search = ef_search;
230 }
231 if let Some(max_neighbors) = update_config.max_neighbors {
232 current_config.max_neighbors = max_neighbors;
233 }
234 if let Some(num_threads) = update_config.num_threads {
235 current_config.num_threads = num_threads;
236 }
237 if let Some(resize_factor) = update_config.resize_factor {
238 current_config.resize_factor = resize_factor;
239 }
240 if let Some(sync_threshold) = update_config.sync_threshold {
241 current_config.sync_threshold = sync_threshold;
242 }
243 if let Some(batch_size) = update_config.batch_size {
244 current_config.batch_size = batch_size;
245 }
246 }
247 }
248 }
249 UpdateVectorIndexConfiguration::Spann(spann_config) => {
250 if let VectorIndexConfiguration::Spann(current_config) = &mut self.vector_index
251 {
252 if let Some(update_config) = spann_config {
253 if let Some(search_nprobe) = update_config.search_nprobe {
254 current_config.search_nprobe = search_nprobe;
255 }
256 if let Some(ef_search) = update_config.ef_search {
257 current_config.ef_search = ef_search;
258 }
259 }
260 }
261 }
262 }
263 }
264 if let Some(embedding_function) = &configuration.embedding_function {
266 self.embedding_function = Some(embedding_function.clone());
267 }
268 }
269
270 pub fn try_from_config(
271 value: CollectionConfiguration,
272 default_knn_index: KnnIndex,
273 metadata: Option<Metadata>,
274 ) -> Result<Self, CollectionConfigurationToInternalConfigurationError> {
275 let mut hnsw: Option<HnswConfiguration> = value.hnsw;
276 let spann: Option<SpannConfiguration> = value.spann;
277
278 if hnsw.is_none() && spann.is_none() {
282 let hnsw_config_from_metadata =
283 InternalHnswConfiguration::from_legacy_segment_metadata(&metadata).map_err(|e| {
284 CollectionConfigurationToInternalConfigurationError::HnswParametersFromSegmentError(
285 e,
286 )
287 })?;
288 hnsw = Some(hnsw_config_from_metadata.into());
289 }
290
291 match (hnsw, spann) {
292 (Some(_), Some(_)) => Err(CollectionConfigurationToInternalConfigurationError::MultipleVectorIndexConfigurations),
293 (Some(hnsw), None) => {
294 match default_knn_index {
295 KnnIndex::Spann => {
299 let mut hnsw: InternalHnswConfiguration = hnsw.into();
300 let temp_config = InternalCollectionConfiguration {
301 vector_index: VectorIndexConfiguration::Hnsw(hnsw.clone()),
302 embedding_function: None,
303 };
304 let hnsw_params = temp_config.get_hnsw_config_from_legacy_metadata(&metadata)?;
305 if let Some(hnsw_params) = hnsw_params {
306 hnsw = hnsw_params;
307 }
308 let spann_config = InternalSpannConfiguration {
309 space: hnsw.space,
310 ..Default::default()
311 };
312
313 Ok(InternalCollectionConfiguration {
314 vector_index: VectorIndexConfiguration::Spann(spann_config),
315 embedding_function: value.embedding_function,
316 })
317 },
318 KnnIndex::Hnsw => {
319 let hnsw: InternalHnswConfiguration = hnsw.into();
320 let mut internal_config = InternalCollectionConfiguration {
321 vector_index: VectorIndexConfiguration::Hnsw(hnsw),
322 embedding_function: value.embedding_function,
323 };
324 let hnsw_params = internal_config.get_hnsw_config_from_legacy_metadata(&metadata)?;
325 if let Some(hnsw_params) = hnsw_params {
326 internal_config.vector_index = VectorIndexConfiguration::Hnsw(hnsw_params);
327 }
328 Ok(internal_config)
329 }
330 }
331 }
332 (None, Some(spann)) => {
333 match default_knn_index {
334 KnnIndex::Hnsw => {
338 let internal_config = if let Some(space) = spann.space {
339 InternalHnswConfiguration {
340 space,
341 ..Default::default()
342 }
343 } else {
344 InternalHnswConfiguration::default()
345 };
346 Ok(InternalCollectionConfiguration {
347 vector_index: VectorIndexConfiguration::Hnsw(internal_config),
348 embedding_function: value.embedding_function,
349 })
350 }
351 KnnIndex::Spann => {
352 let spann: InternalSpannConfiguration = spann.into();
353 Ok(InternalCollectionConfiguration {
354 vector_index: spann.into(),
355 embedding_function: value.embedding_function,
356 })
357 }
358 }
359 }
360 (None, None) => {
361 let vector_index = match default_knn_index {
362 KnnIndex::Hnsw => InternalHnswConfiguration::default().into(),
363 KnnIndex::Spann => InternalSpannConfiguration::default().into(),
364 };
365 Ok(InternalCollectionConfiguration {
366 vector_index,
367 embedding_function: value.embedding_function,
368 })
369 }
370 }
371 }
372}
373
374impl TryFrom<CollectionConfiguration> for InternalCollectionConfiguration {
375 type Error = CollectionConfigurationToInternalConfigurationError;
376
377 fn try_from(value: CollectionConfiguration) -> Result<Self, Self::Error> {
378 match (value.hnsw, value.spann) {
379 (Some(_), Some(_)) => Err(Self::Error::MultipleVectorIndexConfigurations),
380 (Some(hnsw), None) => {
381 let hnsw: InternalHnswConfiguration = hnsw.into();
382 Ok(InternalCollectionConfiguration {
383 vector_index: hnsw.into(),
384 embedding_function: value.embedding_function,
385 })
386 }
387 (None, Some(spann)) => {
388 let spann: InternalSpannConfiguration = spann.into();
389 Ok(InternalCollectionConfiguration {
390 vector_index: spann.into(),
391 embedding_function: value.embedding_function,
392 })
393 }
394 (None, None) => Ok(InternalCollectionConfiguration {
395 vector_index: InternalHnswConfiguration::default().into(),
396 embedding_function: value.embedding_function,
397 }),
398 }
399 }
400}
401
402impl TryFrom<&Schema> for InternalCollectionConfiguration {
403 type Error = String;
404
405 fn try_from(schema: &Schema) -> Result<Self, Self::Error> {
406 let vector_config = schema
407 .keys
408 .get(EMBEDDING_KEY)
409 .and_then(|value_types| value_types.float_list.as_ref())
410 .and_then(|float_list| float_list.vector_index.as_ref())
411 .map(|vector_index| vector_index.config.clone())
412 .or_else(|| {
413 schema
414 .defaults
415 .float_list
416 .as_ref()
417 .and_then(|float_list| float_list.vector_index.as_ref())
418 .map(|vector_index| vector_index.config.clone())
419 })
420 .ok_or_else(|| "Missing vector index configuration for #embedding".to_string())?;
421
422 let VectorIndexConfig {
423 space,
424 embedding_function,
425 hnsw,
426 spann,
427 ..
428 } = vector_config;
429
430 match (hnsw, spann) {
431 (Some(_), Some(_)) => Err(
432 "Vector index configuration must not contain both HNSW and SPANN settings"
433 .to_string(),
434 ),
435 (Some(hnsw_config), None) => {
436 let internal_hnsw = (space.as_ref(), Some(&hnsw_config)).into();
437 Ok(InternalCollectionConfiguration {
438 vector_index: VectorIndexConfiguration::Hnsw(internal_hnsw),
439 embedding_function,
440 })
441 }
442 (None, Some(spann_config)) => {
443 let internal_spann = (space.as_ref(), &spann_config).into();
444 Ok(InternalCollectionConfiguration {
445 vector_index: VectorIndexConfiguration::Spann(internal_spann),
446 embedding_function,
447 })
448 }
449 (None, None) => {
450 let internal_hnsw = (space.as_ref(), None).into();
451 Ok(InternalCollectionConfiguration {
452 vector_index: VectorIndexConfiguration::Hnsw(internal_hnsw),
453 embedding_function,
454 })
455 }
456 }
457 }
458}
459
460#[derive(Debug, Error)]
461pub enum CollectionConfigurationToInternalConfigurationError {
462 #[error("Multiple vector index configurations provided")]
463 MultipleVectorIndexConfigurations,
464 #[error("Failed to parse hnsw parameters from segment metadata")]
465 HnswParametersFromSegmentError(#[from] HnswParametersFromSegmentError),
466}
467
468impl ChromaError for CollectionConfigurationToInternalConfigurationError {
469 fn code(&self) -> ErrorCodes {
470 match self {
471 Self::MultipleVectorIndexConfigurations => ErrorCodes::InvalidArgument,
472 Self::HnswParametersFromSegmentError(_) => ErrorCodes::InvalidArgument,
473 }
474 }
475}
476
477#[derive(Default, Deserialize, Serialize, Debug, Clone)]
478#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
479#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
480pub struct CollectionConfiguration {
481 pub hnsw: Option<HnswConfiguration>,
482 pub spann: Option<SpannConfiguration>,
483 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
484}
485
486impl From<InternalCollectionConfiguration> for CollectionConfiguration {
487 fn from(value: InternalCollectionConfiguration) -> Self {
488 Self {
489 hnsw: match value.vector_index.clone() {
490 VectorIndexConfiguration::Hnsw(config) => Some(config.into()),
491 _ => None,
492 },
493 spann: match value.vector_index {
494 VectorIndexConfiguration::Spann(config) => Some(config.into()),
495 _ => None,
496 },
497 embedding_function: value.embedding_function,
498 }
499 }
500}
501
502#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
503#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
504#[serde(rename_all = "snake_case")]
505pub enum UpdateVectorIndexConfiguration {
506 Hnsw(Option<UpdateHnswConfiguration>),
507 Spann(Option<UpdateSpannConfiguration>),
508}
509
510impl From<UpdateHnswConfiguration> for UpdateVectorIndexConfiguration {
511 fn from(config: UpdateHnswConfiguration) -> Self {
512 UpdateVectorIndexConfiguration::Hnsw(Some(config))
513 }
514}
515
516impl From<UpdateSpannConfiguration> for UpdateVectorIndexConfiguration {
517 fn from(config: UpdateSpannConfiguration) -> Self {
518 UpdateVectorIndexConfiguration::Spann(Some(config))
519 }
520}
521
522#[derive(Debug, Error)]
523pub enum UpdateCollectionConfigurationToInternalConfigurationError {
524 #[error("Multiple vector index configurations provided")]
525 MultipleVectorIndexConfigurations,
526}
527
528impl ChromaError for UpdateCollectionConfigurationToInternalConfigurationError {
529 fn code(&self) -> ErrorCodes {
530 match self {
531 Self::MultipleVectorIndexConfigurations => ErrorCodes::InvalidArgument,
532 }
533 }
534}
535
536#[derive(Deserialize, Serialize, Debug, Clone)]
537#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
538#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
539pub struct UpdateCollectionConfiguration {
540 pub hnsw: Option<UpdateHnswConfiguration>,
541 pub spann: Option<UpdateSpannConfiguration>,
542 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
543}
544
545#[derive(Deserialize, Serialize, Debug, Clone)]
546#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
547pub struct InternalUpdateCollectionConfiguration {
548 pub vector_index: Option<UpdateVectorIndexConfiguration>,
549 pub embedding_function: Option<EmbeddingFunctionConfiguration>,
550}
551
552#[derive(Debug, Error)]
553pub enum UpdateCollectionConfigurationToInternalUpdateConfigurationError {
554 #[error("Multiple vector index configurations provided")]
555 MultipleVectorIndexConfigurations,
556}
557
558impl ChromaError for UpdateCollectionConfigurationToInternalUpdateConfigurationError {
559 fn code(&self) -> ErrorCodes {
560 match self {
561 Self::MultipleVectorIndexConfigurations => ErrorCodes::InvalidArgument,
562 }
563 }
564}
565
566impl TryFrom<UpdateCollectionConfiguration> for InternalUpdateCollectionConfiguration {
567 type Error = UpdateCollectionConfigurationToInternalUpdateConfigurationError;
568
569 fn try_from(value: UpdateCollectionConfiguration) -> Result<Self, Self::Error> {
570 match (value.hnsw, value.spann) {
571 (Some(_), Some(_)) => Err(Self::Error::MultipleVectorIndexConfigurations),
572 (Some(hnsw), None) => Ok(InternalUpdateCollectionConfiguration {
573 vector_index: Some(UpdateVectorIndexConfiguration::Hnsw(Some(hnsw))),
574 embedding_function: value.embedding_function,
575 }),
576 (None, Some(spann)) => Ok(InternalUpdateCollectionConfiguration {
577 vector_index: Some(UpdateVectorIndexConfiguration::Spann(Some(spann))),
578 embedding_function: value.embedding_function,
579 }),
580 (None, None) => Ok(InternalUpdateCollectionConfiguration {
581 vector_index: None,
582 embedding_function: value.embedding_function,
583 }),
584 }
585 }
586}
587
588#[cfg(test)]
589mod tests {
590
591 use crate::collection_schema::Schema;
592 use crate::hnsw_configuration::HnswConfiguration;
593 use crate::hnsw_configuration::Space;
594 use crate::metadata::MetadataValue;
595 use crate::spann_configuration::SpannConfiguration;
596 use crate::{test_segment, CollectionUuid, Metadata};
597
598 use super::*;
599
600 #[test]
601 fn metadata_overrides_parameter() {
602 let mut metadata = Metadata::new();
603 metadata.insert(
604 "hnsw:construction_ef".to_string(),
605 crate::MetadataValue::Int(1),
606 );
607
608 let mut segment = test_segment(CollectionUuid::new(), crate::SegmentScope::VECTOR);
609 segment.metadata = Some(metadata);
610
611 let config = InternalCollectionConfiguration::default_hnsw();
612 let overridden_config = config
613 .get_hnsw_config_with_legacy_fallback(&segment)
614 .unwrap()
615 .unwrap();
616
617 assert_eq!(overridden_config.ef_construction, 1);
618 }
619
620 #[test]
621 fn metadata_ignored_when_config_is_not_default() {
622 let mut metadata = Metadata::new();
623 metadata.insert(
624 "hnsw:construction_ef".to_string(),
625 crate::MetadataValue::Int(1),
626 );
627
628 let mut segment = test_segment(CollectionUuid::new(), crate::SegmentScope::VECTOR);
629 segment.metadata = Some(metadata);
630
631 let config = InternalCollectionConfiguration {
632 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
633 ef_construction: 2,
634 ..Default::default()
635 }),
636 embedding_function: None,
637 };
638
639 let overridden_config = config
640 .get_hnsw_config_with_legacy_fallback(&segment)
641 .unwrap()
642 .unwrap();
643
644 assert_eq!(overridden_config.ef_construction, 2);
646 }
647
648 #[test]
649 fn metadata_populates_config_when_not_set() {
650 let mut metadata = Metadata::new();
651 metadata.insert("hnsw:sync_threshold".to_string(), MetadataValue::Int(10));
652 metadata.insert("hnsw:batch_size".to_string(), MetadataValue::Int(7));
653
654 let config = InternalCollectionConfiguration::try_from_config(
655 CollectionConfiguration {
656 hnsw: None,
657 spann: None,
658 embedding_function: None,
659 },
660 KnnIndex::Hnsw,
661 Some(metadata),
662 )
663 .expect("config from metadata should succeed");
664
665 match config.vector_index {
666 VectorIndexConfiguration::Hnsw(hnsw) => {
667 assert_eq!(hnsw.sync_threshold, 10);
668 assert_eq!(hnsw.batch_size, 7);
669 }
670 _ => panic!("expected HNSW configuration"),
671 }
672 }
673
674 #[test]
675 fn schema_reconcile_preserves_metadata_overrides() {
676 let mut metadata = Metadata::new();
677 metadata.insert("hnsw:sync_threshold".to_string(), MetadataValue::Int(10));
678 metadata.insert("hnsw:batch_size".to_string(), MetadataValue::Int(7));
679
680 let config = InternalCollectionConfiguration::try_from_config(
681 CollectionConfiguration {
682 hnsw: None,
683 spann: None,
684 embedding_function: None,
685 },
686 KnnIndex::Hnsw,
687 Some(metadata),
688 )
689 .expect("config from metadata should succeed");
690
691 let schema = Schema::reconcile_schema_and_config(None, Some(&config), KnnIndex::Hnsw)
692 .expect("schema reconcile should succeed");
693
694 let hnsw_config = schema
695 .get_internal_hnsw_config()
696 .expect("schema should contain hnsw config");
697 assert_eq!(hnsw_config.sync_threshold, 10);
698 assert_eq!(hnsw_config.batch_size, 7);
699 }
700
701 #[test]
702 fn test_hnsw_config_with_hnsw_default() {
703 let hnsw_config = HnswConfiguration {
704 max_neighbors: Some(16),
705 ef_construction: Some(100),
706 ef_search: Some(10),
707 batch_size: Some(100),
708 num_threads: Some(4),
709 sync_threshold: Some(500),
710 resize_factor: Some(1.2),
711 space: Some(Space::Cosine),
712 };
713
714 let collection_config = CollectionConfiguration {
715 hnsw: Some(hnsw_config.clone()),
716 spann: None,
717 embedding_function: None,
718 };
719
720 let internal_config_result = InternalCollectionConfiguration::try_from_config(
721 collection_config,
722 KnnIndex::Hnsw,
723 None,
724 );
725
726 assert!(internal_config_result.is_ok());
727 let internal_config = internal_config_result.unwrap();
728
729 let expected_vector_index = VectorIndexConfiguration::Hnsw(hnsw_config.into());
730 assert_eq!(internal_config.vector_index, expected_vector_index);
731 }
732
733 #[test]
734 fn test_hnsw_config_with_spann_default() {
735 let hnsw_config = HnswConfiguration {
736 max_neighbors: Some(16),
737 ef_construction: Some(100),
738 ef_search: Some(10),
739 batch_size: Some(100),
740 num_threads: Some(4),
741 sync_threshold: Some(500),
742 resize_factor: Some(1.2),
743 space: Some(Space::Cosine),
744 };
745
746 let collection_config = CollectionConfiguration {
747 hnsw: Some(hnsw_config.clone()),
748 spann: None,
749 embedding_function: None,
750 };
751
752 let internal_config_result = InternalCollectionConfiguration::try_from_config(
753 collection_config,
754 KnnIndex::Spann,
755 None,
756 );
757
758 assert!(internal_config_result.is_ok());
759 let internal_config = internal_config_result.unwrap();
760
761 let expected_vector_index = VectorIndexConfiguration::Spann(InternalSpannConfiguration {
762 space: hnsw_config.space.unwrap_or(Space::L2),
763 ..Default::default()
764 });
765 assert_eq!(internal_config.vector_index, expected_vector_index);
766 }
767
768 #[test]
769 fn test_spann_config_with_spann_default() {
770 let spann_config = SpannConfiguration {
771 ef_construction: Some(100),
772 ef_search: Some(10),
773 max_neighbors: Some(16),
774 search_nprobe: Some(1),
775 write_nprobe: Some(1),
776 space: Some(Space::Cosine),
777 reassign_neighbor_count: Some(64),
778 split_threshold: Some(200),
779 merge_threshold: Some(100),
780 };
781
782 let collection_config = CollectionConfiguration {
783 hnsw: None,
784 spann: Some(spann_config.clone()),
785 embedding_function: None,
786 };
787
788 let internal_config_result = InternalCollectionConfiguration::try_from_config(
789 collection_config,
790 KnnIndex::Spann,
791 None,
792 );
793
794 assert!(internal_config_result.is_ok());
795 let internal_config = internal_config_result.unwrap();
796
797 let expected_vector_index = VectorIndexConfiguration::Spann(spann_config.into());
798 assert_eq!(internal_config.vector_index, expected_vector_index);
799 }
800
801 #[test]
802 fn test_spann_config_with_hnsw_default() {
803 let spann_config = SpannConfiguration {
804 ef_construction: Some(100),
805 ef_search: Some(10),
806 max_neighbors: Some(16),
807 search_nprobe: Some(1),
808 write_nprobe: Some(1),
809 space: Some(Space::Cosine),
810 reassign_neighbor_count: Some(64),
811 split_threshold: Some(200),
812 merge_threshold: Some(100),
813 };
814
815 let collection_config = CollectionConfiguration {
816 hnsw: None,
817 spann: Some(spann_config.clone()),
818 embedding_function: None,
819 };
820
821 let internal_config_result = InternalCollectionConfiguration::try_from_config(
822 collection_config,
823 KnnIndex::Hnsw,
824 None,
825 );
826
827 let expected_vector_index = VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
828 space: spann_config.space.unwrap_or(Space::L2),
829 ..Default::default()
830 });
831 assert_eq!(
832 internal_config_result.unwrap().vector_index,
833 expected_vector_index
834 );
835 }
836
837 #[test]
838 fn test_no_config_with_metadata_default_hnsw() {
839 let metadata = Metadata::new();
840 let collection_config = CollectionConfiguration {
841 hnsw: None,
842 spann: None,
843 embedding_function: None,
844 };
845
846 let internal_config_result = InternalCollectionConfiguration::try_from_config(
847 collection_config,
848 KnnIndex::Hnsw,
849 Some(metadata),
850 );
851
852 assert!(internal_config_result.is_ok());
853 let internal_config = internal_config_result.unwrap();
854
855 assert_eq!(
856 internal_config.vector_index,
857 VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default())
858 );
859 }
860
861 #[test]
862 fn test_no_config_with_metadata_default_spann() {
863 let metadata = Metadata::new();
864 let collection_config = CollectionConfiguration {
865 hnsw: None,
866 spann: None,
867 embedding_function: None,
868 };
869
870 let internal_config_result = InternalCollectionConfiguration::try_from_config(
871 collection_config,
872 KnnIndex::Spann,
873 Some(metadata),
874 );
875
876 assert!(internal_config_result.is_ok());
877 let internal_config = internal_config_result.unwrap();
878
879 assert_eq!(
880 internal_config.vector_index,
881 VectorIndexConfiguration::Spann(InternalSpannConfiguration::default())
882 );
883 }
884
885 #[test]
886 fn test_legacy_metadata_with_hnsw_config() {
887 let mut metadata = Metadata::new();
888 metadata.insert(
889 "hnsw:space".to_string(),
890 crate::MetadataValue::Str("cosine".to_string()),
891 );
892 metadata.insert(
893 "hnsw:construction_ef".to_string(),
894 crate::MetadataValue::Int(1),
895 );
896
897 let collection_config = CollectionConfiguration {
898 hnsw: None,
899 spann: None,
900 embedding_function: None,
901 };
902
903 let internal_config_result = InternalCollectionConfiguration::try_from_config(
904 collection_config,
905 KnnIndex::Hnsw,
906 Some(metadata),
907 );
908
909 assert!(internal_config_result.is_ok());
910 let internal_config = internal_config_result.unwrap();
911
912 assert_eq!(
913 internal_config.vector_index,
914 VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
915 space: Space::Cosine,
916 ef_construction: 1,
917 ..Default::default()
918 })
919 );
920 }
921
922 #[test]
923 fn test_legacy_metadata_with_spann_config() {
924 let mut metadata = Metadata::new();
925 metadata.insert(
926 "hnsw:space".to_string(),
927 crate::MetadataValue::Str("cosine".to_string()),
928 );
929 metadata.insert(
930 "hnsw:construction_ef".to_string(),
931 crate::MetadataValue::Int(1),
932 );
933
934 let collection_config = CollectionConfiguration {
935 hnsw: None,
936 spann: None,
937 embedding_function: None,
938 };
939
940 let internal_config_result = InternalCollectionConfiguration::try_from_config(
941 collection_config,
942 KnnIndex::Spann,
943 Some(metadata),
944 );
945
946 assert!(internal_config_result.is_ok());
947
948 let internal_config = internal_config_result.unwrap();
949
950 assert_eq!(
951 internal_config.vector_index,
952 VectorIndexConfiguration::Spann(InternalSpannConfiguration {
953 space: Space::Cosine,
954 ..Default::default()
955 })
956 );
957 }
958
959 #[test]
960 fn test_update_collection_configuration_with_hnsw() {
961 let mut config = InternalCollectionConfiguration {
962 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
963 space: Space::Cosine,
964 ..Default::default()
965 }),
966 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
967 EmbeddingFunctionNewConfiguration {
968 name: "test".to_string(),
969 config: serde_json::Value::Null,
970 },
971 )),
972 };
973 let update_config = UpdateCollectionConfiguration {
974 hnsw: Some(UpdateHnswConfiguration {
975 ef_search: Some(1),
976 ..Default::default()
977 }),
978 spann: None,
979 embedding_function: None,
980 };
981 config.update(&update_config.try_into().unwrap());
982 assert_eq!(
983 config.vector_index,
984 VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
985 space: Space::Cosine,
986 ef_search: 1,
987 ..Default::default()
988 })
989 );
990
991 assert_eq!(
992 config.embedding_function,
993 Some(EmbeddingFunctionConfiguration::Known(
994 EmbeddingFunctionNewConfiguration {
995 name: "test".to_string(),
996 config: serde_json::Value::Null,
997 },
998 ))
999 );
1000 }
1001
1002 #[test]
1003 fn test_update_collection_configuration_with_spann() {
1004 let mut config = InternalCollectionConfiguration {
1005 vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
1006 space: Space::Cosine,
1007 ..Default::default()
1008 }),
1009 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1010 EmbeddingFunctionNewConfiguration {
1011 name: "test".to_string(),
1012 config: serde_json::Value::Null,
1013 },
1014 )),
1015 };
1016 let update_config = UpdateCollectionConfiguration {
1017 hnsw: None,
1018 spann: Some(UpdateSpannConfiguration {
1019 ef_search: Some(1),
1020 ..Default::default()
1021 }),
1022 embedding_function: None,
1023 };
1024 config.update(&update_config.try_into().unwrap());
1025 assert_eq!(
1026 config.vector_index,
1027 VectorIndexConfiguration::Spann(InternalSpannConfiguration {
1028 space: Space::Cosine,
1029 ef_search: 1,
1030 ..Default::default()
1031 })
1032 );
1033
1034 assert_eq!(
1035 config.embedding_function,
1036 Some(EmbeddingFunctionConfiguration::Known(
1037 EmbeddingFunctionNewConfiguration {
1038 name: "test".to_string(),
1039 config: serde_json::Value::Null,
1040 },
1041 ))
1042 );
1043 }
1044
1045 #[test]
1046 fn test_update_collection_configuration_with_embedding_function() {
1047 let mut config = InternalCollectionConfiguration {
1048 vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
1049 embedding_function: Some(EmbeddingFunctionConfiguration::Known(
1050 EmbeddingFunctionNewConfiguration {
1051 name: "test".to_string(),
1052 config: serde_json::Value::Null,
1053 },
1054 )),
1055 };
1056 let emb_fn_config = EmbeddingFunctionNewConfiguration {
1057 name: "test2".to_string(),
1058 config: serde_json::Value::Object(serde_json::Map::from_iter([(
1059 "test".to_string(),
1060 serde_json::Value::String("test".to_string()),
1061 )])),
1062 };
1063 let update_config = UpdateCollectionConfiguration {
1064 hnsw: None,
1065 spann: None,
1066 embedding_function: Some(EmbeddingFunctionConfiguration::Known(emb_fn_config)),
1067 };
1068 config.update(&update_config.try_into().unwrap());
1069 assert_eq!(
1070 config.embedding_function,
1071 Some(EmbeddingFunctionConfiguration::Known(
1072 EmbeddingFunctionNewConfiguration {
1073 name: "test2".to_string(),
1074 config: serde_json::Value::Object(serde_json::Map::from_iter([(
1075 "test".to_string(),
1076 serde_json::Value::String("test".to_string()),
1077 )])),
1078 },
1079 ))
1080 );
1081 }
1082}