chroma_types/
collection_configuration.rs

1use crate::{
2    collection_schema::is_embedding_function_default, default_batch_size, default_construction_ef,
3    default_construction_ef_spann, default_initial_lambda, default_m, default_m_spann,
4    default_merge_threshold, default_nreplica_count, default_num_centers_to_merge_to,
5    default_num_samples_kmeans, default_num_threads, default_reassign_neighbor_count,
6    default_resize_factor, default_search_ef, default_search_ef_spann, default_search_nprobe,
7    default_search_rng_epsilon, default_search_rng_factor, default_space, default_split_threshold,
8    default_sync_threshold, default_write_nprobe, default_write_rng_epsilon,
9    default_write_rng_factor,
10};
11use crate::{
12    HnswConfiguration, HnswParametersFromSegmentError, InternalHnswConfiguration,
13    InternalSpannConfiguration, Metadata, Schema, Segment, SpannConfiguration,
14    UpdateHnswConfiguration, UpdateSpannConfiguration, VectorIndexConfig, EMBEDDING_KEY,
15};
16use chroma_error::{ChromaError, ErrorCodes};
17use serde::{Deserialize, Serialize};
18use thiserror::Error;
19
20#[derive(Deserialize, Serialize, Clone, Debug, Copy)]
21pub enum KnnIndex {
22    #[serde(alias = "hnsw")]
23    Hnsw,
24    #[serde(alias = "spann")]
25    Spann,
26}
27
28pub fn default_default_knn_index() -> KnnIndex {
29    KnnIndex::Hnsw
30}
31
32#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
33#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
34#[serde(tag = "type")]
35pub enum EmbeddingFunctionConfiguration {
36    #[serde(rename = "legacy")]
37    Legacy,
38    #[serde(rename = "known")]
39    Known(EmbeddingFunctionNewConfiguration),
40    #[serde(rename = "unknown")]
41    Unknown,
42}
43
44impl EmbeddingFunctionConfiguration {
45    pub fn is_default(&self) -> bool {
46        match self {
47            EmbeddingFunctionConfiguration::Legacy => false,
48            EmbeddingFunctionConfiguration::Unknown => true,
49            EmbeddingFunctionConfiguration::Known(config) => config.name == "default",
50        }
51    }
52}
53
54#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
55#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
56pub struct EmbeddingFunctionNewConfiguration {
57    pub name: String,
58    pub config: serde_json::Value,
59}
60
61#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
62#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
63#[serde(rename_all = "snake_case")]
64pub enum VectorIndexConfiguration {
65    Hnsw(InternalHnswConfiguration),
66    Spann(InternalSpannConfiguration),
67}
68
69impl VectorIndexConfiguration {
70    pub fn update(&mut self, vector_index: &VectorIndexConfiguration) {
71        match (self, vector_index) {
72            (VectorIndexConfiguration::Hnsw(hnsw), VectorIndexConfiguration::Hnsw(hnsw_new)) => {
73                *hnsw = hnsw_new.clone();
74            }
75            (
76                VectorIndexConfiguration::Spann(spann),
77                VectorIndexConfiguration::Spann(spann_new),
78            ) => {
79                *spann = spann_new.clone();
80            }
81            (VectorIndexConfiguration::Hnsw(_), VectorIndexConfiguration::Spann(_)) => {
82                // For now, we don't support converting between different index types
83                // This could be implemented in the future if needed
84            }
85            (VectorIndexConfiguration::Spann(_), VectorIndexConfiguration::Hnsw(_)) => {
86                // For now, we don't support converting between different index types
87                // This could be implemented in the future if needed
88            }
89        }
90    }
91}
92impl From<InternalHnswConfiguration> for VectorIndexConfiguration {
93    fn from(config: InternalHnswConfiguration) -> Self {
94        VectorIndexConfiguration::Hnsw(config)
95    }
96}
97
98impl From<InternalSpannConfiguration> for VectorIndexConfiguration {
99    fn from(config: InternalSpannConfiguration) -> Self {
100        VectorIndexConfiguration::Spann(config)
101    }
102}
103
104fn default_vector_index_config() -> VectorIndexConfiguration {
105    VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default())
106}
107
108#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
109#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
110pub struct InternalCollectionConfiguration {
111    #[serde(default = "default_vector_index_config")]
112    pub vector_index: VectorIndexConfiguration,
113    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
114}
115
116impl InternalCollectionConfiguration {
117    pub fn from_legacy_metadata(
118        metadata: Metadata,
119    ) -> Result<Self, HnswParametersFromSegmentError> {
120        let hnsw = InternalHnswConfiguration::from_legacy_segment_metadata(&Some(metadata))?;
121        Ok(Self {
122            vector_index: VectorIndexConfiguration::Hnsw(hnsw),
123            embedding_function: None,
124        })
125    }
126
127    pub fn default_hnsw() -> Self {
128        Self {
129            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
130            embedding_function: None,
131        }
132    }
133
134    pub fn default_spann() -> Self {
135        Self {
136            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration::default()),
137            embedding_function: None,
138        }
139    }
140
141    /// Check if this collection configuration is default
142    pub fn is_default(&self) -> bool {
143        if !is_embedding_function_default(&self.embedding_function) {
144            return false;
145        }
146
147        // Check vector index configuration
148        match &self.vector_index {
149            VectorIndexConfiguration::Hnsw(hnsw_config) => {
150                hnsw_config.ef_construction == default_construction_ef()
151                    && hnsw_config.ef_search == default_search_ef()
152                    && hnsw_config.max_neighbors == default_m()
153                    && hnsw_config.num_threads == default_num_threads()
154                    && hnsw_config.batch_size == default_batch_size()
155                    && hnsw_config.sync_threshold == default_sync_threshold()
156                    && hnsw_config.resize_factor == default_resize_factor()
157                    && hnsw_config.space == default_space()
158            }
159            VectorIndexConfiguration::Spann(spann_config) => {
160                spann_config.search_nprobe == default_search_nprobe()
161                    && spann_config.search_rng_factor == default_search_rng_factor()
162                    && spann_config.search_rng_epsilon == default_search_rng_epsilon()
163                    && spann_config.write_nprobe == default_write_nprobe()
164                    && spann_config.nreplica_count == default_nreplica_count()
165                    && spann_config.write_rng_factor == default_write_rng_factor()
166                    && spann_config.write_rng_epsilon == default_write_rng_epsilon()
167                    && spann_config.split_threshold == default_split_threshold()
168                    && spann_config.num_samples_kmeans == default_num_samples_kmeans()
169                    && spann_config.initial_lambda == default_initial_lambda()
170                    && spann_config.reassign_neighbor_count == default_reassign_neighbor_count()
171                    && spann_config.merge_threshold == default_merge_threshold()
172                    && spann_config.num_centers_to_merge_to == default_num_centers_to_merge_to()
173                    && spann_config.ef_construction == default_construction_ef_spann()
174                    && spann_config.ef_search == default_search_ef_spann()
175                    && spann_config.max_neighbors == default_m_spann()
176                    && spann_config.space == default_space()
177            }
178        }
179    }
180
181    pub fn get_hnsw_config_with_legacy_fallback(
182        &self,
183        segment: &Segment,
184    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
185        self.get_hnsw_config_from_legacy_metadata(&segment.metadata)
186    }
187
188    pub fn get_hnsw_config_from_legacy_metadata(
189        &self,
190        metadata: &Option<Metadata>,
191    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
192        if let Some(config) = self.get_hnsw_config() {
193            let config_from_metadata =
194                InternalHnswConfiguration::from_legacy_segment_metadata(metadata)?;
195
196            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
197                return Ok(Some(config_from_metadata));
198            }
199
200            return Ok(Some(config));
201        }
202
203        Ok(None)
204    }
205
206    pub fn get_spann_config(&self) -> Option<InternalSpannConfiguration> {
207        match &self.vector_index {
208            VectorIndexConfiguration::Spann(config) => Some(config.clone()),
209            _ => None,
210        }
211    }
212
213    fn get_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
214        match &self.vector_index {
215            VectorIndexConfiguration::Hnsw(config) => Some(config.clone()),
216            _ => None,
217        }
218    }
219
220    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
221        // Update vector_index if it exists in the update configuration
222
223        if let Some(vector_index) = &configuration.vector_index {
224            match vector_index {
225                UpdateVectorIndexConfiguration::Hnsw(hnsw_config) => {
226                    if let VectorIndexConfiguration::Hnsw(current_config) = &mut self.vector_index {
227                        if let Some(update_config) = hnsw_config {
228                            if let Some(ef_search) = update_config.ef_search {
229                                current_config.ef_search = ef_search;
230                            }
231                            if let Some(max_neighbors) = update_config.max_neighbors {
232                                current_config.max_neighbors = max_neighbors;
233                            }
234                            if let Some(num_threads) = update_config.num_threads {
235                                current_config.num_threads = num_threads;
236                            }
237                            if let Some(resize_factor) = update_config.resize_factor {
238                                current_config.resize_factor = resize_factor;
239                            }
240                            if let Some(sync_threshold) = update_config.sync_threshold {
241                                current_config.sync_threshold = sync_threshold;
242                            }
243                            if let Some(batch_size) = update_config.batch_size {
244                                current_config.batch_size = batch_size;
245                            }
246                        }
247                    }
248                }
249                UpdateVectorIndexConfiguration::Spann(spann_config) => {
250                    if let VectorIndexConfiguration::Spann(current_config) = &mut self.vector_index
251                    {
252                        if let Some(update_config) = spann_config {
253                            if let Some(search_nprobe) = update_config.search_nprobe {
254                                current_config.search_nprobe = search_nprobe;
255                            }
256                            if let Some(ef_search) = update_config.ef_search {
257                                current_config.ef_search = ef_search;
258                            }
259                        }
260                    }
261                }
262            }
263        }
264        // Update embedding_function if it exists in the update configuration
265        if let Some(embedding_function) = &configuration.embedding_function {
266            self.embedding_function = Some(embedding_function.clone());
267        }
268    }
269
270    pub fn try_from_config(
271        value: CollectionConfiguration,
272        default_knn_index: KnnIndex,
273        metadata: Option<Metadata>,
274    ) -> Result<Self, CollectionConfigurationToInternalConfigurationError> {
275        let mut hnsw: Option<HnswConfiguration> = value.hnsw;
276        let spann: Option<SpannConfiguration> = value.spann;
277
278        // if neither hnsw nor spann is provided, use the collection metadata to build an hnsw configuration
279        // the match then handles cases where hnsw is provided, and correctly routes to either spann or hnsw configuration
280        // based on the default_knn_index
281        if hnsw.is_none() && spann.is_none() {
282            let hnsw_config_from_metadata =
283            InternalHnswConfiguration::from_legacy_segment_metadata(&metadata).map_err(|e| {
284                CollectionConfigurationToInternalConfigurationError::HnswParametersFromSegmentError(
285                    e,
286                )
287            })?;
288            hnsw = Some(hnsw_config_from_metadata.into());
289        }
290
291        match (hnsw, spann) {
292            (Some(_), Some(_)) => Err(CollectionConfigurationToInternalConfigurationError::MultipleVectorIndexConfigurations),
293            (Some(hnsw), None) => {
294                match default_knn_index {
295                    // Create a spann index. Only inherit the space if it exists in the hnsw config.
296                    // This is for backwards compatibility so that users who migrate to distributed
297                    // from local don't break their code.
298                    KnnIndex::Spann => {
299                        let internal_config = if let Some(space) = hnsw.space {
300                            InternalSpannConfiguration {
301                                space,
302                                ..Default::default()
303                            }
304                        } else {
305                            InternalSpannConfiguration::default()
306                        };
307
308                        Ok(InternalCollectionConfiguration {
309                            vector_index: VectorIndexConfiguration::Spann(internal_config),
310                            embedding_function: value.embedding_function,
311                        })
312                    },
313                    KnnIndex::Hnsw => {
314                        let hnsw: InternalHnswConfiguration = hnsw.into();
315                        Ok(InternalCollectionConfiguration {
316                            vector_index: hnsw.into(),
317                            embedding_function: value.embedding_function,
318                        })
319                    }
320                }
321            }
322            (None, Some(spann)) => {
323                match default_knn_index {
324                    // Create a hnsw index. Only inherit the space if it exists in the spann config.
325                    // This is for backwards compatibility so that users who migrate to local
326                    // from distributed don't break their code.
327                    KnnIndex::Hnsw => {
328                        let internal_config = if let Some(space) = spann.space {
329                            InternalHnswConfiguration {
330                                space,
331                                ..Default::default()
332                            }
333                        } else {
334                            InternalHnswConfiguration::default()
335                        };
336                        Ok(InternalCollectionConfiguration {
337                            vector_index: VectorIndexConfiguration::Hnsw(internal_config),
338                            embedding_function: value.embedding_function,
339                        })
340                    }
341                    KnnIndex::Spann => {
342                        let spann: InternalSpannConfiguration = spann.into();
343                        Ok(InternalCollectionConfiguration {
344                            vector_index: spann.into(),
345                            embedding_function: value.embedding_function,
346                        })
347                    }
348                }
349            }
350            (None, None) => {
351                let vector_index = match default_knn_index {
352                    KnnIndex::Hnsw => InternalHnswConfiguration::default().into(),
353                    KnnIndex::Spann => InternalSpannConfiguration::default().into(),
354                };
355                Ok(InternalCollectionConfiguration {
356                    vector_index,
357                    embedding_function: value.embedding_function,
358                })
359            }
360        }
361    }
362}
363
364impl TryFrom<CollectionConfiguration> for InternalCollectionConfiguration {
365    type Error = CollectionConfigurationToInternalConfigurationError;
366
367    fn try_from(value: CollectionConfiguration) -> Result<Self, Self::Error> {
368        match (value.hnsw, value.spann) {
369            (Some(_), Some(_)) => Err(Self::Error::MultipleVectorIndexConfigurations),
370            (Some(hnsw), None) => {
371                let hnsw: InternalHnswConfiguration = hnsw.into();
372                Ok(InternalCollectionConfiguration {
373                    vector_index: hnsw.into(),
374                    embedding_function: value.embedding_function,
375                })
376            }
377            (None, Some(spann)) => {
378                let spann: InternalSpannConfiguration = spann.into();
379                Ok(InternalCollectionConfiguration {
380                    vector_index: spann.into(),
381                    embedding_function: value.embedding_function,
382                })
383            }
384            (None, None) => Ok(InternalCollectionConfiguration {
385                vector_index: InternalHnswConfiguration::default().into(),
386                embedding_function: value.embedding_function,
387            }),
388        }
389    }
390}
391
392impl TryFrom<&Schema> for InternalCollectionConfiguration {
393    type Error = String;
394
395    fn try_from(schema: &Schema) -> Result<Self, Self::Error> {
396        let vector_config = schema
397            .keys
398            .get(EMBEDDING_KEY)
399            .and_then(|value_types| value_types.float_list.as_ref())
400            .and_then(|float_list| float_list.vector_index.as_ref())
401            .map(|vector_index| vector_index.config.clone())
402            .or_else(|| {
403                schema
404                    .defaults
405                    .float_list
406                    .as_ref()
407                    .and_then(|float_list| float_list.vector_index.as_ref())
408                    .map(|vector_index| vector_index.config.clone())
409            })
410            .ok_or_else(|| "Missing vector index configuration for #embedding".to_string())?;
411
412        let VectorIndexConfig {
413            space,
414            embedding_function,
415            hnsw,
416            spann,
417            ..
418        } = vector_config;
419
420        match (hnsw, spann) {
421            (Some(_), Some(_)) => Err(
422                "Vector index configuration must not contain both HNSW and SPANN settings"
423                    .to_string(),
424            ),
425            (Some(hnsw_config), None) => {
426                let internal_hnsw = (space.as_ref(), Some(&hnsw_config)).into();
427                Ok(InternalCollectionConfiguration {
428                    vector_index: VectorIndexConfiguration::Hnsw(internal_hnsw),
429                    embedding_function,
430                })
431            }
432            (None, Some(spann_config)) => {
433                let internal_spann = (space.as_ref(), &spann_config).into();
434                Ok(InternalCollectionConfiguration {
435                    vector_index: VectorIndexConfiguration::Spann(internal_spann),
436                    embedding_function,
437                })
438            }
439            (None, None) => {
440                let internal_hnsw = (space.as_ref(), None).into();
441                Ok(InternalCollectionConfiguration {
442                    vector_index: VectorIndexConfiguration::Hnsw(internal_hnsw),
443                    embedding_function,
444                })
445            }
446        }
447    }
448}
449
450#[derive(Debug, Error)]
451pub enum CollectionConfigurationToInternalConfigurationError {
452    #[error("Multiple vector index configurations provided")]
453    MultipleVectorIndexConfigurations,
454    #[error("Failed to parse hnsw parameters from segment metadata")]
455    HnswParametersFromSegmentError(#[from] HnswParametersFromSegmentError),
456}
457
458impl ChromaError for CollectionConfigurationToInternalConfigurationError {
459    fn code(&self) -> ErrorCodes {
460        match self {
461            Self::MultipleVectorIndexConfigurations => ErrorCodes::InvalidArgument,
462            Self::HnswParametersFromSegmentError(_) => ErrorCodes::InvalidArgument,
463        }
464    }
465}
466
467#[derive(Default, Deserialize, Serialize, Debug, Clone)]
468#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
469#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
470pub struct CollectionConfiguration {
471    pub hnsw: Option<HnswConfiguration>,
472    pub spann: Option<SpannConfiguration>,
473    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
474}
475
476impl From<InternalCollectionConfiguration> for CollectionConfiguration {
477    fn from(value: InternalCollectionConfiguration) -> Self {
478        Self {
479            hnsw: match value.vector_index.clone() {
480                VectorIndexConfiguration::Hnsw(config) => Some(config.into()),
481                _ => None,
482            },
483            spann: match value.vector_index {
484                VectorIndexConfiguration::Spann(config) => Some(config.into()),
485                _ => None,
486            },
487            embedding_function: value.embedding_function,
488        }
489    }
490}
491
492#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
493#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
494#[serde(rename_all = "snake_case")]
495pub enum UpdateVectorIndexConfiguration {
496    Hnsw(Option<UpdateHnswConfiguration>),
497    Spann(Option<UpdateSpannConfiguration>),
498}
499
500impl From<UpdateHnswConfiguration> for UpdateVectorIndexConfiguration {
501    fn from(config: UpdateHnswConfiguration) -> Self {
502        UpdateVectorIndexConfiguration::Hnsw(Some(config))
503    }
504}
505
506impl From<UpdateSpannConfiguration> for UpdateVectorIndexConfiguration {
507    fn from(config: UpdateSpannConfiguration) -> Self {
508        UpdateVectorIndexConfiguration::Spann(Some(config))
509    }
510}
511
512#[derive(Debug, Error)]
513pub enum UpdateCollectionConfigurationToInternalConfigurationError {
514    #[error("Multiple vector index configurations provided")]
515    MultipleVectorIndexConfigurations,
516}
517
518impl ChromaError for UpdateCollectionConfigurationToInternalConfigurationError {
519    fn code(&self) -> ErrorCodes {
520        match self {
521            Self::MultipleVectorIndexConfigurations => ErrorCodes::InvalidArgument,
522        }
523    }
524}
525
526#[derive(Deserialize, Serialize, Debug, Clone)]
527#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
528#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
529pub struct UpdateCollectionConfiguration {
530    pub hnsw: Option<UpdateHnswConfiguration>,
531    pub spann: Option<UpdateSpannConfiguration>,
532    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
533}
534
535#[derive(Deserialize, Serialize, Debug, Clone)]
536#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
537pub struct InternalUpdateCollectionConfiguration {
538    pub vector_index: Option<UpdateVectorIndexConfiguration>,
539    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
540}
541
542#[derive(Debug, Error)]
543pub enum UpdateCollectionConfigurationToInternalUpdateConfigurationError {
544    #[error("Multiple vector index configurations provided")]
545    MultipleVectorIndexConfigurations,
546}
547
548impl ChromaError for UpdateCollectionConfigurationToInternalUpdateConfigurationError {
549    fn code(&self) -> ErrorCodes {
550        match self {
551            Self::MultipleVectorIndexConfigurations => ErrorCodes::InvalidArgument,
552        }
553    }
554}
555
556impl TryFrom<UpdateCollectionConfiguration> for InternalUpdateCollectionConfiguration {
557    type Error = UpdateCollectionConfigurationToInternalUpdateConfigurationError;
558
559    fn try_from(value: UpdateCollectionConfiguration) -> Result<Self, Self::Error> {
560        match (value.hnsw, value.spann) {
561            (Some(_), Some(_)) => Err(Self::Error::MultipleVectorIndexConfigurations),
562            (Some(hnsw), None) => Ok(InternalUpdateCollectionConfiguration {
563                vector_index: Some(UpdateVectorIndexConfiguration::Hnsw(Some(hnsw))),
564                embedding_function: value.embedding_function,
565            }),
566            (None, Some(spann)) => Ok(InternalUpdateCollectionConfiguration {
567                vector_index: Some(UpdateVectorIndexConfiguration::Spann(Some(spann))),
568                embedding_function: value.embedding_function,
569            }),
570            (None, None) => Ok(InternalUpdateCollectionConfiguration {
571                vector_index: None,
572                embedding_function: value.embedding_function,
573            }),
574        }
575    }
576}
577
578#[cfg(test)]
579mod tests {
580
581    use crate::hnsw_configuration::HnswConfiguration;
582    use crate::hnsw_configuration::Space;
583    use crate::spann_configuration::SpannConfiguration;
584    use crate::{test_segment, CollectionUuid, Metadata};
585
586    use super::*;
587
588    #[test]
589    fn metadata_overrides_parameter() {
590        let mut metadata = Metadata::new();
591        metadata.insert(
592            "hnsw:construction_ef".to_string(),
593            crate::MetadataValue::Int(1),
594        );
595
596        let mut segment = test_segment(CollectionUuid::new(), crate::SegmentScope::VECTOR);
597        segment.metadata = Some(metadata);
598
599        let config = InternalCollectionConfiguration::default_hnsw();
600        let overridden_config = config
601            .get_hnsw_config_with_legacy_fallback(&segment)
602            .unwrap()
603            .unwrap();
604
605        assert_eq!(overridden_config.ef_construction, 1);
606    }
607
608    #[test]
609    fn metadata_ignored_when_config_is_not_default() {
610        let mut metadata = Metadata::new();
611        metadata.insert(
612            "hnsw:construction_ef".to_string(),
613            crate::MetadataValue::Int(1),
614        );
615
616        let mut segment = test_segment(CollectionUuid::new(), crate::SegmentScope::VECTOR);
617        segment.metadata = Some(metadata);
618
619        let config = InternalCollectionConfiguration {
620            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
621                ef_construction: 2,
622                ..Default::default()
623            }),
624            embedding_function: None,
625        };
626
627        let overridden_config = config
628            .get_hnsw_config_with_legacy_fallback(&segment)
629            .unwrap()
630            .unwrap();
631
632        // Setting from metadata is ignored since the config is not default
633        assert_eq!(overridden_config.ef_construction, 2);
634    }
635
636    #[test]
637    fn test_hnsw_config_with_hnsw_default() {
638        let hnsw_config = HnswConfiguration {
639            max_neighbors: Some(16),
640            ef_construction: Some(100),
641            ef_search: Some(10),
642            batch_size: Some(100),
643            num_threads: Some(4),
644            sync_threshold: Some(500),
645            resize_factor: Some(1.2),
646            space: Some(Space::Cosine),
647        };
648
649        let collection_config = CollectionConfiguration {
650            hnsw: Some(hnsw_config.clone()),
651            spann: None,
652            embedding_function: None,
653        };
654
655        let internal_config_result = InternalCollectionConfiguration::try_from_config(
656            collection_config,
657            KnnIndex::Hnsw,
658            None,
659        );
660
661        assert!(internal_config_result.is_ok());
662        let internal_config = internal_config_result.unwrap();
663
664        let expected_vector_index = VectorIndexConfiguration::Hnsw(hnsw_config.into());
665        assert_eq!(internal_config.vector_index, expected_vector_index);
666    }
667
668    #[test]
669    fn test_hnsw_config_with_spann_default() {
670        let hnsw_config = HnswConfiguration {
671            max_neighbors: Some(16),
672            ef_construction: Some(100),
673            ef_search: Some(10),
674            batch_size: Some(100),
675            num_threads: Some(4),
676            sync_threshold: Some(500),
677            resize_factor: Some(1.2),
678            space: Some(Space::Cosine),
679        };
680
681        let collection_config = CollectionConfiguration {
682            hnsw: Some(hnsw_config.clone()),
683            spann: None,
684            embedding_function: None,
685        };
686
687        let internal_config_result = InternalCollectionConfiguration::try_from_config(
688            collection_config,
689            KnnIndex::Spann,
690            None,
691        );
692
693        assert!(internal_config_result.is_ok());
694        let internal_config = internal_config_result.unwrap();
695
696        let expected_vector_index = VectorIndexConfiguration::Spann(InternalSpannConfiguration {
697            space: hnsw_config.space.unwrap_or(Space::L2),
698            ..Default::default()
699        });
700        assert_eq!(internal_config.vector_index, expected_vector_index);
701    }
702
703    #[test]
704    fn test_spann_config_with_spann_default() {
705        let spann_config = SpannConfiguration {
706            ef_construction: Some(100),
707            ef_search: Some(10),
708            max_neighbors: Some(16),
709            search_nprobe: Some(1),
710            write_nprobe: Some(1),
711            space: Some(Space::Cosine),
712            reassign_neighbor_count: Some(64),
713            split_threshold: Some(200),
714            merge_threshold: Some(100),
715        };
716
717        let collection_config = CollectionConfiguration {
718            hnsw: None,
719            spann: Some(spann_config.clone()),
720            embedding_function: None,
721        };
722
723        let internal_config_result = InternalCollectionConfiguration::try_from_config(
724            collection_config,
725            KnnIndex::Spann,
726            None,
727        );
728
729        assert!(internal_config_result.is_ok());
730        let internal_config = internal_config_result.unwrap();
731
732        let expected_vector_index = VectorIndexConfiguration::Spann(spann_config.into());
733        assert_eq!(internal_config.vector_index, expected_vector_index);
734    }
735
736    #[test]
737    fn test_spann_config_with_hnsw_default() {
738        let spann_config = SpannConfiguration {
739            ef_construction: Some(100),
740            ef_search: Some(10),
741            max_neighbors: Some(16),
742            search_nprobe: Some(1),
743            write_nprobe: Some(1),
744            space: Some(Space::Cosine),
745            reassign_neighbor_count: Some(64),
746            split_threshold: Some(200),
747            merge_threshold: Some(100),
748        };
749
750        let collection_config = CollectionConfiguration {
751            hnsw: None,
752            spann: Some(spann_config.clone()),
753            embedding_function: None,
754        };
755
756        let internal_config_result = InternalCollectionConfiguration::try_from_config(
757            collection_config,
758            KnnIndex::Hnsw,
759            None,
760        );
761
762        let expected_vector_index = VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
763            space: spann_config.space.unwrap_or(Space::L2),
764            ..Default::default()
765        });
766        assert_eq!(
767            internal_config_result.unwrap().vector_index,
768            expected_vector_index
769        );
770    }
771
772    #[test]
773    fn test_no_config_with_metadata_default_hnsw() {
774        let metadata = Metadata::new();
775        let collection_config = CollectionConfiguration {
776            hnsw: None,
777            spann: None,
778            embedding_function: None,
779        };
780
781        let internal_config_result = InternalCollectionConfiguration::try_from_config(
782            collection_config,
783            KnnIndex::Hnsw,
784            Some(metadata),
785        );
786
787        assert!(internal_config_result.is_ok());
788        let internal_config = internal_config_result.unwrap();
789
790        assert_eq!(
791            internal_config.vector_index,
792            VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default())
793        );
794    }
795
796    #[test]
797    fn test_no_config_with_metadata_default_spann() {
798        let metadata = Metadata::new();
799        let collection_config = CollectionConfiguration {
800            hnsw: None,
801            spann: None,
802            embedding_function: None,
803        };
804
805        let internal_config_result = InternalCollectionConfiguration::try_from_config(
806            collection_config,
807            KnnIndex::Spann,
808            Some(metadata),
809        );
810
811        assert!(internal_config_result.is_ok());
812        let internal_config = internal_config_result.unwrap();
813
814        assert_eq!(
815            internal_config.vector_index,
816            VectorIndexConfiguration::Spann(InternalSpannConfiguration::default())
817        );
818    }
819
820    #[test]
821    fn test_legacy_metadata_with_hnsw_config() {
822        let mut metadata = Metadata::new();
823        metadata.insert(
824            "hnsw:space".to_string(),
825            crate::MetadataValue::Str("cosine".to_string()),
826        );
827        metadata.insert(
828            "hnsw:construction_ef".to_string(),
829            crate::MetadataValue::Int(1),
830        );
831
832        let collection_config = CollectionConfiguration {
833            hnsw: None,
834            spann: None,
835            embedding_function: None,
836        };
837
838        let internal_config_result = InternalCollectionConfiguration::try_from_config(
839            collection_config,
840            KnnIndex::Hnsw,
841            Some(metadata),
842        );
843
844        assert!(internal_config_result.is_ok());
845        let internal_config = internal_config_result.unwrap();
846
847        assert_eq!(
848            internal_config.vector_index,
849            VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
850                space: Space::Cosine,
851                ef_construction: 1,
852                ..Default::default()
853            })
854        );
855    }
856
857    #[test]
858    fn test_legacy_metadata_with_spann_config() {
859        let mut metadata = Metadata::new();
860        metadata.insert(
861            "hnsw:space".to_string(),
862            crate::MetadataValue::Str("cosine".to_string()),
863        );
864        metadata.insert(
865            "hnsw:construction_ef".to_string(),
866            crate::MetadataValue::Int(1),
867        );
868
869        let collection_config = CollectionConfiguration {
870            hnsw: None,
871            spann: None,
872            embedding_function: None,
873        };
874
875        let internal_config_result = InternalCollectionConfiguration::try_from_config(
876            collection_config,
877            KnnIndex::Spann,
878            Some(metadata),
879        );
880
881        assert!(internal_config_result.is_ok());
882
883        let internal_config = internal_config_result.unwrap();
884
885        assert_eq!(
886            internal_config.vector_index,
887            VectorIndexConfiguration::Spann(InternalSpannConfiguration {
888                space: Space::Cosine,
889                ..Default::default()
890            })
891        );
892    }
893
894    #[test]
895    fn test_update_collection_configuration_with_hnsw() {
896        let mut config = InternalCollectionConfiguration {
897            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
898                space: Space::Cosine,
899                ..Default::default()
900            }),
901            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
902                EmbeddingFunctionNewConfiguration {
903                    name: "test".to_string(),
904                    config: serde_json::Value::Null,
905                },
906            )),
907        };
908        let update_config = UpdateCollectionConfiguration {
909            hnsw: Some(UpdateHnswConfiguration {
910                ef_search: Some(1),
911                ..Default::default()
912            }),
913            spann: None,
914            embedding_function: None,
915        };
916        config.update(&update_config.try_into().unwrap());
917        assert_eq!(
918            config.vector_index,
919            VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
920                space: Space::Cosine,
921                ef_search: 1,
922                ..Default::default()
923            })
924        );
925
926        assert_eq!(
927            config.embedding_function,
928            Some(EmbeddingFunctionConfiguration::Known(
929                EmbeddingFunctionNewConfiguration {
930                    name: "test".to_string(),
931                    config: serde_json::Value::Null,
932                },
933            ))
934        );
935    }
936
937    #[test]
938    fn test_update_collection_configuration_with_spann() {
939        let mut config = InternalCollectionConfiguration {
940            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
941                space: Space::Cosine,
942                ..Default::default()
943            }),
944            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
945                EmbeddingFunctionNewConfiguration {
946                    name: "test".to_string(),
947                    config: serde_json::Value::Null,
948                },
949            )),
950        };
951        let update_config = UpdateCollectionConfiguration {
952            hnsw: None,
953            spann: Some(UpdateSpannConfiguration {
954                ef_search: Some(1),
955                ..Default::default()
956            }),
957            embedding_function: None,
958        };
959        config.update(&update_config.try_into().unwrap());
960        assert_eq!(
961            config.vector_index,
962            VectorIndexConfiguration::Spann(InternalSpannConfiguration {
963                space: Space::Cosine,
964                ef_search: 1,
965                ..Default::default()
966            })
967        );
968
969        assert_eq!(
970            config.embedding_function,
971            Some(EmbeddingFunctionConfiguration::Known(
972                EmbeddingFunctionNewConfiguration {
973                    name: "test".to_string(),
974                    config: serde_json::Value::Null,
975                },
976            ))
977        );
978    }
979
980    #[test]
981    fn test_update_collection_configuration_with_embedding_function() {
982        let mut config = InternalCollectionConfiguration {
983            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
984            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
985                EmbeddingFunctionNewConfiguration {
986                    name: "test".to_string(),
987                    config: serde_json::Value::Null,
988                },
989            )),
990        };
991        let emb_fn_config = EmbeddingFunctionNewConfiguration {
992            name: "test2".to_string(),
993            config: serde_json::Value::Object(serde_json::Map::from_iter([(
994                "test".to_string(),
995                serde_json::Value::String("test".to_string()),
996            )])),
997        };
998        let update_config = UpdateCollectionConfiguration {
999            hnsw: None,
1000            spann: None,
1001            embedding_function: Some(EmbeddingFunctionConfiguration::Known(emb_fn_config)),
1002        };
1003        config.update(&update_config.try_into().unwrap());
1004        assert_eq!(
1005            config.embedding_function,
1006            Some(EmbeddingFunctionConfiguration::Known(
1007                EmbeddingFunctionNewConfiguration {
1008                    name: "test2".to_string(),
1009                    config: serde_json::Value::Object(serde_json::Map::from_iter([(
1010                        "test".to_string(),
1011                        serde_json::Value::String("test".to_string()),
1012                    )])),
1013                },
1014            ))
1015        );
1016    }
1017}