Skip to main content

velesdb_core/database/
vector_ops.rs

1//! Vector collection creation and retrieval operations.
2
3use crate::collection::VectorCollection;
4use crate::index::hnsw::HnswParams;
5use crate::{CollectionType, DistanceMetric, Result, StorageMode};
6
7use super::Database;
8
9impl Database {
10    /// Creates a new vector collection.
11    ///
12    /// # Errors
13    ///
14    /// Returns an error if a collection with the same name already exists.
15    pub fn create_vector_collection(
16        &self,
17        name: &str,
18        dimension: usize,
19        metric: DistanceMetric,
20    ) -> Result<()> {
21        self.create_vector_collection_with_options(name, dimension, metric, StorageMode::default())
22    }
23
24    /// Creates a new vector collection with custom storage options.
25    ///
26    /// # Errors
27    ///
28    /// Returns an error if a collection with the same name already exists
29    /// or if the dimension exceeds the configured `max_dimensions` limit.
30    pub fn create_vector_collection_with_options(
31        &self,
32        name: &str,
33        dimension: usize,
34        metric: DistanceMetric,
35        storage_mode: StorageMode,
36    ) -> Result<()> {
37        self.ensure_collection_name_available(name)?;
38        self.enforce_vector_dimension_limit(dimension)?;
39        let path = self.data_dir.join(name);
40        let coll = VectorCollection::create(path, name, dimension, metric, storage_mode)?;
41        self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
42        Ok(())
43    }
44
45    /// Creates a new vector collection with custom HNSW parameters.
46    ///
47    /// When `m` or `ef_construction` are `Some`, those values override the
48    /// dimension-based auto-tuned defaults from [`HnswParams::auto`].
49    ///
50    /// Shortcut for [`Database::create_vector_collection_with_params`] that
51    /// only overrides `max_connections` and `ef_construction`.
52    ///
53    /// # Errors
54    ///
55    /// Returns an error if a collection with the same name already exists.
56    pub fn create_vector_collection_with_hnsw(
57        &self,
58        name: &str,
59        dimension: usize,
60        metric: DistanceMetric,
61        storage_mode: StorageMode,
62        m: Option<usize>,
63        ef_construction: Option<usize>,
64    ) -> Result<()> {
65        self.ensure_collection_name_available(name)?;
66        self.enforce_vector_dimension_limit(dimension)?;
67        let path = self.data_dir.join(name);
68        let coll = VectorCollection::create_with_hnsw(
69            path,
70            name,
71            dimension,
72            metric,
73            storage_mode,
74            m,
75            ef_construction,
76        )?;
77        self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
78        Ok(())
79    }
80
81    /// Creates a new vector collection with a fully specified
82    /// [`HnswParams`] and an explicit `pq_rescore_oversampling` override.
83    ///
84    /// This is the most expressive vector constructor exposed by
85    /// `Database`: callers pass every HNSW parameter — `max_connections`,
86    /// `ef_construction`, `max_elements`, `alpha`, storage mode — via a
87    /// single value, and override the PQ rescore factor explicitly rather
88    /// than implicitly falling back to the engine default of `Some(4)`.
89    /// Passing `pq_rescore_oversampling = None` keeps the persisted config
90    /// in "no explicit override" mode so later migrations can recompute
91    /// the factor from dataset shape.
92    ///
93    /// The storage mode argument wins over `hnsw_params.storage_mode` if
94    /// they disagree — the field on `HnswParams` is a legacy denormalised
95    /// copy that the engine keeps in sync with the collection-level value.
96    ///
97    /// # Errors
98    ///
99    /// Returns an error if a collection with the same name already exists
100    /// or if the underlying directory cannot be created.
101    pub fn create_vector_collection_with_params(
102        &self,
103        name: &str,
104        dimension: usize,
105        metric: DistanceMetric,
106        storage_mode: StorageMode,
107        hnsw_params: HnswParams,
108        pq_rescore_oversampling: Option<u32>,
109    ) -> Result<()> {
110        self.ensure_collection_name_available(name)?;
111        self.enforce_vector_dimension_limit(dimension)?;
112        let path = self.data_dir.join(name);
113        let coll = VectorCollection::create_with_params(
114            path,
115            dimension,
116            metric,
117            storage_mode,
118            hnsw_params,
119            pq_rescore_oversampling,
120        )?;
121        self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
122        Ok(())
123    }
124
125    /// Registers a vector collection in the typed registry,
126    /// notifies the observer, and bumps the schema version.
127    fn register_vector_collection(
128        &self,
129        name: &str,
130        coll: &VectorCollection,
131        dimension: usize,
132        metric: DistanceMetric,
133        storage_mode: StorageMode,
134    ) {
135        self.vector_colls
136            .write()
137            .insert(name.to_string(), coll.clone());
138
139        if let Some(ref obs) = self.observer {
140            let kind = CollectionType::Vector {
141                dimension,
142                metric,
143                storage_mode,
144            };
145            obs.on_collection_created(name, &kind);
146        }
147
148        self.schema_version
149            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
150    }
151
152    /// Returns a `VectorCollection` by name.
153    ///
154    /// Checks the typed registry first.  If not found there, falls back to
155    /// opening the collection directory from disk (e.g. for collections created
156    /// via the legacy `create_collection` API that were not registered in the
157    /// typed registry).  The opened instance is cached back into the registry
158    /// so subsequent calls avoid the disk round-trip.
159    ///
160    /// Returns `None` if the collection does not exist on disk.
161    #[must_use]
162    pub fn get_vector_collection(&self, name: &str) -> Option<VectorCollection> {
163        if let Some(c) = self.vector_colls.read().get(name).cloned() {
164            return Some(c);
165        }
166        self.open_vector_collection_from_disk(name)
167    }
168
169    /// Disk fallback for `get_vector_collection`.
170    fn open_vector_collection_from_disk(&self, name: &str) -> Option<VectorCollection> {
171        let cfg = self.read_collection_config(name)?;
172        if cfg.graph_schema.is_some() || cfg.metadata_only {
173            return None;
174        }
175        let coll = VectorCollection::open(self.data_dir.join(name)).ok()?;
176        self.vector_colls
177            .write()
178            .insert(name.to_string(), coll.clone());
179        Some(coll)
180    }
181}