Skip to main content

selene_graph/
vector_index.rs

1//! Built-in vector row-set indexes for node properties.
2//!
3//! Vector indexes are durable registrations plus derived in-memory accelerators
4//! over primary node values. Every kind keeps a row bitmap for alive nodes whose
5//! `(label, property)` value is a vector with the declared dimension; ANN kinds
6//! also maintain a derived search accelerator. Registration and live
7//! maintenance are strict so search cannot hide dimensionality or metric drift;
8//! recovery rebuild remains lenient for corrupted/legacy state and is checked by
9//! the debug consistency net.
10
11use std::collections::BTreeSet;
12use std::mem::size_of;
13
14use roaring::RoaringBitmap;
15use rustc_hash::FxHashMap;
16#[path = "vector_index/build.rs"]
17mod build;
18#[path = "vector_index/config.rs"]
19mod config;
20#[path = "vector_index/hnsw.rs"]
21mod hnsw;
22#[path = "vector_index/ivf.rs"]
23mod ivf;
24#[path = "vector_index/ivf_adapter.rs"]
25mod ivf_adapter;
26#[path = "vector_index/memory.rs"]
27mod memory;
28#[path = "vector_index/rebuild.rs"]
29mod rebuild;
30#[path = "vector_index/search_hit.rs"]
31mod search_hit;
32#[path = "vector_index/turbo_quant.rs"]
33mod turbo_quant;
34#[path = "vector_index/turbo_quant_adapter.rs"]
35mod turbo_quant_adapter;
36
37use selene_core::{
38    DbString, HnswIndexConfig, IvfIndexConfig, LabelSet, PropertyMap, Value, VectorMetric,
39    VectorValue,
40};
41use serde::{Deserialize, Serialize};
42
43use crate::error::{GraphError, GraphResult};
44use crate::graph::VectorIndexEntry;
45pub(crate) use build::{
46    build_vector_index_lenient_with_configs, build_vector_index_with_configs,
47    maintain_vector_indexes_strict, rebuild_vector_indexes, rebuild_vector_indexes_strict,
48};
49pub(crate) use config::MAX_IVF_TARGET_CENTROIDS;
50use config::{hnsw_config_for_kind, ivf_config_for_kind};
51pub(crate) use hnsw::HnswSearchScratch;
52use hnsw::HnswVectorIndex;
53use ivf::IvfVectorIndex;
54pub use memory::{
55    IVF_REBUILD_MIN_PENDING_RETRAIN_ENTRIES, IVF_REBUILD_PENDING_RETRAIN_BASIS_POINTS,
56    VectorIndexMemoryUsage,
57};
58pub use rebuild::{
59    VectorIndexMaintenancePolicy, VectorIndexRebuildEntry, VectorIndexRebuildReport,
60};
61pub(crate) use search_hit::VectorIndexSearchHit;
62use search_hit::{hnsw_hits, ivf_hits};
63use turbo_quant::TurboQuantVectorIndex;
64
65type VectorIndexMap = FxHashMap<(DbString, DbString), VectorIndexEntry>;
66
67/// Vector index algorithm kind.
68#[derive(
69    Clone,
70    Copy,
71    Debug,
72    Deserialize,
73    Eq,
74    Hash,
75    PartialEq,
76    rkyv::Archive,
77    rkyv::Deserialize,
78    rkyv::Serialize,
79    Serialize,
80)]
81pub enum VectorIndexKind {
82    /// Exact row-set index over full-precision vectors stored on graph rows.
83    Flat,
84    /// Approximate HNSW index using squared Euclidean distance.
85    HnswSquaredEuclidean,
86    /// Approximate HNSW index using cosine distance.
87    HnswCosine,
88    /// Approximate HNSW index using negative inner product distance.
89    HnswNegativeInnerProduct,
90    /// Approximate IVF index using squared Euclidean distance.
91    IvfSquaredEuclidean,
92    /// Approximate IVF index using cosine distance.
93    IvfCosine,
94    /// Approximate IVF index using negative inner product distance.
95    IvfNegativeInnerProduct,
96    /// Compressed TurboQuant candidate index using cosine distance.
97    TurboQuantCosine,
98}
99
100/// Optional ANN construction config for one vector-index registration.
101#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
102pub struct VectorIndexConfig {
103    /// HNSW construction config for HNSW vector indexes.
104    pub hnsw: Option<HnswIndexConfig>,
105    /// IVF construction config for IVF vector indexes.
106    pub ivf: Option<IvfIndexConfig>,
107}
108
109impl VectorIndexConfig {
110    /// Construct a vector-index config from optional ANN-specific configs.
111    #[must_use]
112    pub const fn new(hnsw: Option<HnswIndexConfig>, ivf: Option<IvfIndexConfig>) -> Self {
113        Self { hnsw, ivf }
114    }
115
116    /// Construct a vector-index config with HNSW settings only.
117    #[must_use]
118    pub const fn hnsw(config: HnswIndexConfig) -> Self {
119        Self {
120            hnsw: Some(config),
121            ivf: None,
122        }
123    }
124
125    /// Construct a vector-index config with IVF settings only.
126    #[must_use]
127    pub const fn ivf(config: IvfIndexConfig) -> Self {
128        Self {
129            hnsw: None,
130            ivf: Some(config),
131        }
132    }
133}
134
135impl VectorIndexKind {
136    /// Return the HNSW metric for approximate vector-index kinds.
137    #[must_use]
138    pub const fn hnsw_metric(self) -> Option<VectorMetric> {
139        match self {
140            Self::Flat => None,
141            Self::HnswSquaredEuclidean => Some(VectorMetric::SquaredEuclidean),
142            Self::HnswCosine => Some(VectorMetric::Cosine),
143            Self::HnswNegativeInnerProduct => Some(VectorMetric::NegativeInnerProduct),
144            Self::IvfSquaredEuclidean
145            | Self::IvfCosine
146            | Self::IvfNegativeInnerProduct
147            | Self::TurboQuantCosine => None,
148        }
149    }
150
151    /// Return the IVF metric for inverted-file vector-index kinds.
152    #[must_use]
153    pub const fn ivf_metric(self) -> Option<VectorMetric> {
154        match self {
155            Self::Flat
156            | Self::HnswSquaredEuclidean
157            | Self::HnswCosine
158            | Self::HnswNegativeInnerProduct
159            | Self::TurboQuantCosine => None,
160            Self::IvfSquaredEuclidean => Some(VectorMetric::SquaredEuclidean),
161            Self::IvfCosine => Some(VectorMetric::Cosine),
162            Self::IvfNegativeInnerProduct => Some(VectorMetric::NegativeInnerProduct),
163        }
164    }
165
166    /// Return the ANN metric for approximate vector-index kinds.
167    #[must_use]
168    pub const fn ann_metric(self) -> Option<VectorMetric> {
169        match self {
170            Self::Flat => None,
171            Self::HnswSquaredEuclidean | Self::IvfSquaredEuclidean => {
172                Some(VectorMetric::SquaredEuclidean)
173            }
174            Self::HnswCosine | Self::IvfCosine => Some(VectorMetric::Cosine),
175            Self::HnswNegativeInnerProduct | Self::IvfNegativeInnerProduct => {
176                Some(VectorMetric::NegativeInnerProduct)
177            }
178            Self::TurboQuantCosine => Some(VectorMetric::Cosine),
179        }
180    }
181}
182
183/// Built-in vector index state for one `(label, property)` registration.
184#[derive(Clone, Debug)]
185pub struct VectorIndex {
186    kind: VectorIndexKind,
187    dimension: u32,
188    hnsw_config: Option<HnswIndexConfig>,
189    ivf_config: Option<IvfIndexConfig>,
190    rows: RoaringBitmap,
191    hnsw: Option<HnswVectorIndex>,
192    ivf: Option<IvfVectorIndex>,
193    turbo_quant: Option<TurboQuantVectorIndex>,
194}
195
196impl VectorIndex {
197    /// Construct an empty vector index.
198    ///
199    /// # Errors
200    ///
201    /// Returns [`GraphError::VectorIndexInvalidDimension`] when `dimension` is
202    /// zero.
203    pub fn new(kind: VectorIndexKind, dimension: u32) -> GraphResult<Self> {
204        Self::new_with_configs(kind, dimension, None, None)
205    }
206
207    /// Construct an empty vector index with optional HNSW configuration.
208    ///
209    /// # Errors
210    ///
211    /// Returns [`GraphError::VectorIndexInvalidDimension`] when `dimension` is
212    /// zero, or [`GraphError::VectorIndexInvalidHnswConfig`] when the supplied
213    /// HNSW parameters are invalid for the chosen kind.
214    pub fn new_with_hnsw_config(
215        kind: VectorIndexKind,
216        dimension: u32,
217        hnsw_config: Option<HnswIndexConfig>,
218    ) -> GraphResult<Self> {
219        Self::new_with_configs(kind, dimension, hnsw_config, None)
220    }
221
222    /// Construct an empty vector index with optional ANN construction config.
223    ///
224    /// # Errors
225    ///
226    /// Returns [`GraphError::VectorIndexInvalidDimension`] when `dimension` is
227    /// zero, [`GraphError::VectorIndexInvalidHnswConfig`] when supplied HNSW
228    /// parameters are invalid for the chosen kind, or
229    /// [`GraphError::VectorIndexInvalidIvfConfig`] when supplied IVF parameters
230    /// are invalid for the chosen kind.
231    pub fn new_with_configs(
232        kind: VectorIndexKind,
233        dimension: u32,
234        hnsw_config: Option<HnswIndexConfig>,
235        ivf_config: Option<IvfIndexConfig>,
236    ) -> GraphResult<Self> {
237        ensure_dimension(dimension)?;
238        let hnsw_config = hnsw_config_for_kind(kind, hnsw_config)?;
239        let ivf_config = ivf_config_for_kind(kind, ivf_config)?;
240        let hnsw = kind.hnsw_metric().map(|metric| {
241            HnswVectorIndex::with_config(metric, hnsw_config.expect("HNSW kind stores config"))
242        });
243        let ivf = kind
244            .ivf_metric()
245            .map(|metric| IvfVectorIndex::with_config(metric, ivf_config));
246        let turbo_quant = match kind {
247            VectorIndexKind::TurboQuantCosine => Some(TurboQuantVectorIndex::new(dimension)?),
248            _ => None,
249        };
250        Ok(Self {
251            kind,
252            dimension,
253            hnsw_config,
254            ivf_config,
255            rows: RoaringBitmap::new(),
256            hnsw,
257            ivf,
258            turbo_quant,
259        })
260    }
261
262    /// Return the vector index algorithm kind.
263    #[must_use]
264    pub const fn kind(&self) -> VectorIndexKind {
265        self.kind
266    }
267
268    /// Return the required vector dimensionality.
269    #[must_use]
270    pub const fn dimension(&self) -> u32 {
271        self.dimension
272    }
273
274    /// Return the HNSW construction config, if this is an HNSW index.
275    #[must_use]
276    pub const fn hnsw_config(&self) -> Option<HnswIndexConfig> {
277        self.hnsw_config
278    }
279
280    /// Return the IVF construction config, if this is a configured IVF index.
281    #[must_use]
282    pub const fn ivf_config(&self) -> Option<IvfIndexConfig> {
283        self.ivf_config
284    }
285
286    /// Return the number of indexed rows.
287    #[must_use]
288    pub fn cardinality(&self) -> u64 {
289        self.rows.len()
290    }
291
292    /// Borrow the indexed row bitmap.
293    #[must_use]
294    pub const fn rows(&self) -> &RoaringBitmap {
295        &self.rows
296    }
297
298    /// Return true when this index has an ANN graph.
299    #[must_use]
300    pub const fn is_hnsw(&self) -> bool {
301        self.kind.hnsw_metric().is_some()
302    }
303
304    /// Return true when this index has an IVF accelerator.
305    #[must_use]
306    pub const fn is_ivf(&self) -> bool {
307        self.kind.ivf_metric().is_some()
308    }
309
310    /// Return true when this index has a TurboQuant compressed accelerator.
311    #[must_use]
312    pub const fn is_turbo_quant(&self) -> bool {
313        matches!(self.kind, VectorIndexKind::TurboQuantCosine)
314    }
315
316    /// Return the HNSW metric, if this is an HNSW index.
317    #[must_use]
318    pub const fn hnsw_metric(&self) -> Option<VectorMetric> {
319        self.kind.hnsw_metric()
320    }
321
322    /// Return the ANN metric, if this is an approximate index.
323    #[must_use]
324    pub const fn ann_metric(&self) -> Option<VectorMetric> {
325        self.kind.ann_metric()
326    }
327
328    /// Return an estimated memory usage snapshot for this index.
329    #[must_use]
330    pub fn memory_usage(&self) -> VectorIndexMemoryUsage {
331        let row_bitmap_bytes = roaring_heap_bytes(&self.rows);
332        let row_bitmap_serialized_bytes = self.rows.serialized_size();
333        let hnsw = self
334            .hnsw
335            .as_ref()
336            .map(HnswVectorIndex::memory_usage)
337            .unwrap_or_default();
338        let ivf = self
339            .ivf
340            .as_ref()
341            .map(IvfVectorIndex::memory_usage)
342            .unwrap_or_default();
343        let turbo_quant = self
344            .turbo_quant
345            .as_ref()
346            .map(TurboQuantVectorIndex::memory_usage)
347            .unwrap_or_default();
348        let estimated_index_bytes = size_of::<Self>()
349            .saturating_add(row_bitmap_bytes)
350            .saturating_add(hnsw.estimated_heap_bytes)
351            .saturating_add(ivf.estimated_heap_bytes)
352            .saturating_add(turbo_quant.estimated_heap_bytes);
353        VectorIndexMemoryUsage {
354            indexed_rows: self.cardinality(),
355            row_bitmap_bytes,
356            row_bitmap_serialized_bytes,
357            hnsw_index_bytes: hnsw.estimated_heap_bytes,
358            hnsw_referenced_vector_bytes: hnsw.referenced_vector_bytes,
359            hnsw_entries: hnsw.entries,
360            hnsw_live_entries: hnsw.live_entries,
361            hnsw_deleted_entries: hnsw.deleted_entries,
362            hnsw_link_count: hnsw.link_count,
363            hnsw_level_zero_link_count: hnsw.level_zero_link_count,
364            hnsw_upper_layer_link_count: hnsw.upper_layer_link_count,
365            hnsw_max_layer_count: hnsw.max_layer_count,
366            hnsw_max_links_per_layer: hnsw.max_links_per_layer,
367            hnsw_average_links_per_entry_basis_points: hnsw.average_links_per_entry_basis_points,
368            ivf_index_bytes: ivf.estimated_heap_bytes,
369            ivf_referenced_vector_bytes: ivf.referenced_vector_bytes,
370            ivf_entries: ivf.entries,
371            ivf_live_entries: ivf.live_entries,
372            ivf_deleted_entries: ivf.deleted_entries,
373            ivf_centroids: ivf.centroids,
374            ivf_list_count: ivf.list_count,
375            ivf_non_empty_list_count: ivf.non_empty_list_count,
376            ivf_max_list_len: ivf.max_list_len,
377            ivf_average_list_len_basis_points: ivf.average_list_len_basis_points,
378            ivf_assigned_entries: ivf.assigned_entries,
379            ivf_pending_retrain_entries: ivf.pending_retrain_entries,
380            turbo_quant_index_bytes: turbo_quant.estimated_heap_bytes,
381            turbo_quant_referenced_vector_bytes: turbo_quant.referenced_vector_bytes,
382            turbo_quant_entries: turbo_quant.entries,
383            turbo_quant_live_entries: turbo_quant.live_entries,
384            turbo_quant_deleted_entries: turbo_quant.deleted_entries,
385            turbo_quant_code_bytes: turbo_quant.code_bytes,
386            turbo_quant_codebook_bytes: turbo_quant.codebook_bytes,
387            turbo_quant_calibration_bytes: turbo_quant.calibration_bytes,
388            estimated_index_bytes,
389            estimated_reachable_bytes: estimated_index_bytes
390                .saturating_add(hnsw.referenced_vector_bytes)
391                .saturating_add(ivf.referenced_vector_bytes)
392                .saturating_add(turbo_quant.referenced_vector_bytes),
393        }
394    }
395
396    pub(crate) fn insert_value(&mut self, row: u32, vector: &VectorValue) -> GraphResult<()> {
397        let mut scratch = HnswSearchScratch::default();
398        self.insert_value_with_scratch(row, vector, &mut scratch)
399    }
400
401    pub(crate) fn insert_value_with_scratch(
402        &mut self,
403        row: u32,
404        vector: &VectorValue,
405        scratch: &mut HnswSearchScratch,
406    ) -> GraphResult<()> {
407        self.rows.insert(row);
408        if let Some(hnsw) = &mut self.hnsw {
409            hnsw.insert_with_scratch(row, vector.clone(), scratch)?;
410        }
411        if let Some(ivf) = &mut self.ivf {
412            ivf.insert(row, vector.clone())?;
413        }
414        if let Some(turbo_quant) = &mut self.turbo_quant {
415            turbo_quant.insert(row, vector)?;
416        }
417        Ok(())
418    }
419
420    pub(crate) fn finish_bulk_load(&mut self) -> GraphResult<()> {
421        if let Some(hnsw) = &mut self.hnsw {
422            hnsw.finish_bulk_load();
423        }
424        if let Some(ivf) = &mut self.ivf {
425            ivf.finish_bulk_load()?;
426        }
427        if let Some(turbo_quant) = &mut self.turbo_quant {
428            turbo_quant.finish_bulk_load()?;
429        }
430        Ok(())
431    }
432
433    pub(crate) fn remove_row(&mut self, row: u32) {
434        self.rows.remove(row);
435        if let Some(hnsw) = &mut self.hnsw {
436            hnsw.remove(row);
437        }
438        if let Some(ivf) = &mut self.ivf {
439            ivf.remove(row);
440        }
441        if let Some(turbo_quant) = &mut self.turbo_quant {
442            turbo_quant.remove(row);
443        }
444    }
445
446    pub(crate) fn rows_eq(&self, reference: &Self) -> bool {
447        self.kind == reference.kind
448            && self.dimension == reference.dimension
449            && self.hnsw_config == reference.hnsw_config
450            && self.ivf_config == reference.ivf_config
451            && self.rows == reference.rows
452    }
453
454    pub(crate) fn ann_search(
455        &self,
456        query: &VectorValue,
457        k: usize,
458        search_width: usize,
459    ) -> Option<selene_core::CoreResult<Vec<VectorIndexSearchHit>>> {
460        if let Some(hnsw) = &self.hnsw {
461            return Some(hnsw.search(query, k, search_width).map(hnsw_hits));
462        }
463        if let Some(ivf) = &self.ivf {
464            return Some(ivf.search(query, k, search_width).map(ivf_hits));
465        }
466        None
467    }
468
469    pub(crate) fn ann_search_with_scratch(
470        &self,
471        query: &VectorValue,
472        k: usize,
473        search_width: usize,
474        scratch: &mut HnswSearchScratch,
475    ) -> Option<selene_core::CoreResult<Vec<VectorIndexSearchHit>>> {
476        if let Some(hnsw) = &self.hnsw {
477            return Some(
478                hnsw.search_with_scratch(query, k, search_width, scratch)
479                    .map(hnsw_hits),
480            );
481        }
482        if let Some(ivf) = &self.ivf {
483            return Some(ivf.search(query, k, search_width).map(ivf_hits));
484        }
485        None
486    }
487}
488
489fn roaring_heap_bytes(rows: &RoaringBitmap) -> usize {
490    let statistics = rows.statistics();
491    u64_to_usize_saturating(
492        statistics
493            .n_bytes_array_containers
494            .saturating_add(statistics.n_bytes_run_containers)
495            .saturating_add(statistics.n_bytes_bitset_containers),
496    )
497}
498
499/// Error returned when a value cannot be admitted to a vector index.
500#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
501pub enum VectorIndexValueError {
502    /// Value is not a vector.
503    #[error("kind mismatch: observed {observed}")]
504    KindMismatch {
505        /// Observed value kind.
506        observed: &'static str,
507    },
508    /// Vector dimensionality differs from the registration.
509    #[error("dimension mismatch: expected {expected}, observed {observed}")]
510    DimensionMismatch {
511        /// Expected vector dimensionality.
512        expected: u32,
513        /// Observed vector dimensionality.
514        observed: usize,
515    },
516    /// Vector is structurally valid but invalid for the index metric.
517    #[error("metric rejection: {observed}")]
518    MetricRejected {
519        /// Observed metric rejection reason.
520        observed: String,
521    },
522}
523
524impl VectorIndexValueError {
525    fn observed(&self) -> String {
526        match self {
527            Self::KindMismatch { observed } => (*observed).to_owned(),
528            Self::DimensionMismatch { observed, .. } => format!("VECTOR<{observed}>"),
529            Self::MetricRejected { observed } => observed.clone(),
530        }
531    }
532}
533
534pub(crate) fn apply_node_create(
535    indexes: &mut VectorIndexMap,
536    labels: &LabelSet,
537    props: &PropertyMap,
538    row: u32,
539) -> GraphResult<()> {
540    for label in labels.iter() {
541        for (property, value) in props.iter() {
542            if is_null(value) {
543                continue;
544            }
545            insert_commit(indexes, label.clone(), property.clone(), value, row)?;
546        }
547    }
548    Ok(())
549}
550
551pub(crate) fn apply_node_delete(
552    indexes: &mut VectorIndexMap,
553    labels: &LabelSet,
554    props: &PropertyMap,
555    row: u32,
556) -> GraphResult<()> {
557    for label in labels.iter() {
558        for (property, value) in props.iter() {
559            if is_null(value) {
560                continue;
561            }
562            remove_commit(indexes, label.clone(), property.clone(), value, row)?;
563        }
564    }
565    Ok(())
566}
567
568pub(crate) fn apply_node_update(
569    indexes: &mut VectorIndexMap,
570    old_labels: &LabelSet,
571    old_props: &PropertyMap,
572    new_labels: &LabelSet,
573    new_props: &PropertyMap,
574    row: u32,
575) -> GraphResult<()> {
576    let candidates = candidate_keys(indexes, old_labels, old_props, new_labels, new_props);
577    for (label, property) in candidates {
578        match (
579            indexable_value(old_labels, old_props, &label, &property),
580            indexable_value(new_labels, new_props, &label, &property),
581        ) {
582            (Some(old_value), Some(new_value)) => {
583                replace_commit(
584                    indexes,
585                    label.clone(),
586                    property.clone(),
587                    old_value,
588                    new_value,
589                    row,
590                )?;
591            }
592            (Some(value), None) => {
593                remove_commit(indexes, label.clone(), property.clone(), value, row)?;
594            }
595            (None, Some(value)) => {
596                insert_commit(indexes, label.clone(), property.clone(), value, row)?;
597            }
598            (None, None) => {}
599        }
600    }
601    Ok(())
602}
603
604fn candidate_keys(
605    indexes: &VectorIndexMap,
606    old_labels: &LabelSet,
607    old_props: &PropertyMap,
608    new_labels: &LabelSet,
609    new_props: &PropertyMap,
610) -> BTreeSet<(DbString, DbString)> {
611    if indexes.is_empty() {
612        return BTreeSet::new();
613    }
614    let mut labels: BTreeSet<DbString> = BTreeSet::new();
615    labels.extend(old_labels.iter().cloned());
616    labels.extend(new_labels.iter().cloned());
617
618    let mut properties: BTreeSet<DbString> = BTreeSet::new();
619    properties.extend(old_props.keys().cloned());
620    properties.extend(new_props.keys().cloned());
621
622    let mut candidates = BTreeSet::new();
623    for label in &labels {
624        for property in &properties {
625            let key = (label.clone(), property.clone());
626            if indexes.contains_key(&key) {
627                candidates.insert(key);
628            }
629        }
630    }
631    candidates
632}
633
634fn indexable_value<'a>(
635    labels: &LabelSet,
636    props: &'a PropertyMap,
637    label: &DbString,
638    property: &DbString,
639) -> Option<&'a Value> {
640    if !labels.contains(label) {
641        return None;
642    }
643    props.get(property).filter(|value| !is_null(value))
644}
645
646fn insert_commit(
647    indexes: &mut VectorIndexMap,
648    label: DbString,
649    property: DbString,
650    value: &Value,
651    row: u32,
652) -> GraphResult<()> {
653    if let Some(entry) = indexes.get_mut(&(label.clone(), property.clone())) {
654        let vector = admit(value, entry.kind(), entry.dimension())
655            .map_err(|err| index_rejection(label, property, entry.dimension(), err))?;
656        std::sync::Arc::make_mut(&mut entry.index).insert_value(row, vector)?;
657    }
658    Ok(())
659}
660
661fn remove_commit(
662    indexes: &mut VectorIndexMap,
663    label: DbString,
664    property: DbString,
665    value: &Value,
666    row: u32,
667) -> GraphResult<()> {
668    if let Some(entry) = indexes.get_mut(&(label.clone(), property.clone())) {
669        admit(value, entry.kind(), entry.dimension())
670            .map_err(|err| index_rejection(label, property, entry.dimension(), err))?;
671        std::sync::Arc::make_mut(&mut entry.index).remove_row(row);
672    }
673    Ok(())
674}
675
676fn replace_commit(
677    indexes: &mut VectorIndexMap,
678    label: DbString,
679    property: DbString,
680    old_value: &Value,
681    new_value: &Value,
682    row: u32,
683) -> GraphResult<()> {
684    if let Some(entry) = indexes.get_mut(&(label.clone(), property.clone())) {
685        admit(old_value, entry.kind(), entry.dimension()).map_err(|err| {
686            index_rejection(label.clone(), property.clone(), entry.dimension(), err)
687        })?;
688        let vector = admit(new_value, entry.kind(), entry.dimension())
689            .map_err(|err| index_rejection(label, property, entry.dimension(), err))?;
690        std::sync::Arc::make_mut(&mut entry.index).insert_value(row, vector)?;
691    }
692    Ok(())
693}
694
695fn admit(
696    value: &Value,
697    kind: VectorIndexKind,
698    expected_dimension: u32,
699) -> Result<&VectorValue, VectorIndexValueError> {
700    let Value::Vector(vector) = value else {
701        return Err(VectorIndexValueError::KindMismatch {
702            observed: value_kind_name(value),
703        });
704    };
705    if vector.dimension() != expected_dimension as usize {
706        return Err(VectorIndexValueError::DimensionMismatch {
707            expected: expected_dimension,
708            observed: vector.dimension(),
709        });
710    }
711    if let Some(metric) = kind.ann_metric() {
712        metric
713            .distance(vector, vector)
714            .map_err(|err| VectorIndexValueError::MetricRejected {
715                observed: err.to_string(),
716            })?;
717    }
718    Ok(vector)
719}
720
721fn ensure_dimension(dimension: u32) -> GraphResult<()> {
722    if dimension == 0 || dimension as usize > selene_core::MAX_VECTOR_DIMENSION {
723        Err(GraphError::VectorIndexInvalidDimension { dimension })
724    } else {
725        Ok(())
726    }
727}
728
729fn u64_to_usize_saturating(value: u64) -> usize {
730    usize::try_from(value).unwrap_or(usize::MAX)
731}
732
733fn index_rejection(
734    label: DbString,
735    property: DbString,
736    expected_dimension: u32,
737    err: VectorIndexValueError,
738) -> GraphError {
739    GraphError::VectorIndexValueRejected {
740        label,
741        property,
742        expected_dimension,
743        observed: err.observed(),
744    }
745}
746
747fn warn_rejected(
748    op: &'static str,
749    label: DbString,
750    property: DbString,
751    row: u32,
752    err: &VectorIndexValueError,
753) {
754    tracing::warn!(
755        op,
756        %label,
757        %property,
758        row,
759        error = %err,
760        "skipped vector-index update for value that does not match the registered vector index"
761    );
762}
763
764const fn is_null(value: &Value) -> bool {
765    matches!(value, Value::Null)
766}
767
768const fn value_kind_name(value: &Value) -> &'static str {
769    match value {
770        Value::Null => "Null",
771        Value::Bool(_) => "Bool",
772        Value::Int(_) => "Int",
773        Value::Uint(_) => "Uint",
774        Value::Int128(_) => "Int128",
775        Value::Uint128(_) => "Uint128",
776        Value::Float(_) => "Float",
777        Value::Float32(_) => "Float32",
778        Value::Decimal(_) => "Decimal",
779        Value::String(_) => "String",
780        Value::Bytes(_) => "Bytes",
781        Value::List(_) => "List",
782        Value::Record(_) => "Record",
783        Value::RecordTyped(_) => "RecordTyped",
784        Value::Path(_) => "Path",
785        Value::NodeRef(_) => "NodeRef",
786        Value::EdgeRef(_) => "EdgeRef",
787        Value::GraphRef(_) => "GraphRef",
788        Value::TableRef(_) => "TableRef",
789        Value::ZonedDateTime(_) => "ZonedDateTime",
790        Value::LocalDateTime(_) => "LocalDateTime",
791        Value::Date(_) => "Date",
792        Value::ZonedTime(_) => "ZonedTime",
793        Value::LocalTime(_) => "LocalTime",
794        Value::Duration(_) => "Duration",
795        Value::Extended { .. } => "Extended",
796        Value::Uuid(_) => "Uuid",
797        Value::Vector(_) => "Vector",
798        Value::Json(_) => "Json",
799        _ => "Unknown",
800    }
801}
802
803#[cfg(test)]
804#[path = "vector_index/config_tests.rs"]
805mod config_tests;
806
807#[cfg(test)]
808#[path = "vector_index/tests.rs"]
809mod tests;