velesdb_core/database/vector_ops.rs
1//! Vector collection creation and retrieval operations.
2
3use crate::collection::VectorCollection;
4use crate::index::hnsw::HnswParams;
5use crate::{CollectionType, DistanceMetric, Result, StorageMode};
6
7use super::Database;
8
9impl Database {
10 /// Creates a new vector collection.
11 ///
12 /// # Errors
13 ///
14 /// Returns an error if a collection with the same name already exists.
15 pub fn create_vector_collection(
16 &self,
17 name: &str,
18 dimension: usize,
19 metric: DistanceMetric,
20 ) -> Result<()> {
21 self.create_vector_collection_with_options(name, dimension, metric, StorageMode::default())
22 }
23
24 /// Creates a new vector collection with custom storage options.
25 ///
26 /// # Errors
27 ///
28 /// Returns an error if a collection with the same name already exists
29 /// or if the dimension exceeds the configured `max_dimensions` limit.
30 pub fn create_vector_collection_with_options(
31 &self,
32 name: &str,
33 dimension: usize,
34 metric: DistanceMetric,
35 storage_mode: StorageMode,
36 ) -> Result<()> {
37 self.ensure_collection_name_available(name)?;
38 self.enforce_vector_dimension_limit(dimension)?;
39 let path = self.data_dir.join(name);
40 let coll = VectorCollection::create(path, name, dimension, metric, storage_mode)?;
41 self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
42 Ok(())
43 }
44
45 /// Creates a new vector collection with custom HNSW parameters.
46 ///
47 /// When `m` or `ef_construction` are `Some`, those values override the
48 /// dimension-based auto-tuned defaults from [`HnswParams::auto`].
49 ///
50 /// Shortcut for [`Database::create_vector_collection_with_params`] that
51 /// only overrides `max_connections` and `ef_construction`.
52 ///
53 /// # Errors
54 ///
55 /// Returns an error if a collection with the same name already exists.
56 pub fn create_vector_collection_with_hnsw(
57 &self,
58 name: &str,
59 dimension: usize,
60 metric: DistanceMetric,
61 storage_mode: StorageMode,
62 m: Option<usize>,
63 ef_construction: Option<usize>,
64 ) -> Result<()> {
65 self.ensure_collection_name_available(name)?;
66 self.enforce_vector_dimension_limit(dimension)?;
67 let path = self.data_dir.join(name);
68 let coll = VectorCollection::create_with_hnsw(
69 path,
70 name,
71 dimension,
72 metric,
73 storage_mode,
74 m,
75 ef_construction,
76 )?;
77 self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
78 Ok(())
79 }
80
81 /// Creates a new vector collection with a fully specified
82 /// [`HnswParams`] and an explicit `pq_rescore_oversampling` override.
83 ///
84 /// This is the most expressive vector constructor exposed by
85 /// `Database`: callers pass every HNSW parameter — `max_connections`,
86 /// `ef_construction`, `max_elements`, `alpha`, storage mode — via a
87 /// single value, and override the PQ rescore factor explicitly rather
88 /// than implicitly falling back to the engine default of `Some(4)`.
89 /// Passing `pq_rescore_oversampling = None` keeps the persisted config
90 /// in "no explicit override" mode so later migrations can recompute
91 /// the factor from dataset shape.
92 ///
93 /// The storage mode argument wins over `hnsw_params.storage_mode` if
94 /// they disagree — the field on `HnswParams` is a legacy denormalised
95 /// copy that the engine keeps in sync with the collection-level value.
96 ///
97 /// # Errors
98 ///
99 /// Returns an error if a collection with the same name already exists
100 /// or if the underlying directory cannot be created.
101 pub fn create_vector_collection_with_params(
102 &self,
103 name: &str,
104 dimension: usize,
105 metric: DistanceMetric,
106 storage_mode: StorageMode,
107 hnsw_params: HnswParams,
108 pq_rescore_oversampling: Option<u32>,
109 ) -> Result<()> {
110 self.ensure_collection_name_available(name)?;
111 self.enforce_vector_dimension_limit(dimension)?;
112 let path = self.data_dir.join(name);
113 let coll = VectorCollection::create_with_params(
114 path,
115 dimension,
116 metric,
117 storage_mode,
118 hnsw_params,
119 pq_rescore_oversampling,
120 )?;
121 self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
122 Ok(())
123 }
124
125 /// Registers a vector collection in the typed registry,
126 /// notifies the observer, and bumps the schema version.
127 fn register_vector_collection(
128 &self,
129 name: &str,
130 coll: &VectorCollection,
131 dimension: usize,
132 metric: DistanceMetric,
133 storage_mode: StorageMode,
134 ) {
135 self.vector_colls
136 .write()
137 .insert(name.to_string(), coll.clone());
138
139 if let Some(ref obs) = self.observer {
140 let kind = CollectionType::Vector {
141 dimension,
142 metric,
143 storage_mode,
144 };
145 obs.on_collection_created(name, &kind);
146 }
147
148 self.schema_version
149 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
150 }
151
152 /// Returns a `VectorCollection` by name.
153 ///
154 /// Checks the typed registry first. If not found there, falls back to
155 /// opening the collection directory from disk (e.g. for collections created
156 /// via the legacy `create_collection` API that were not registered in the
157 /// typed registry). The opened instance is cached back into the registry
158 /// so subsequent calls avoid the disk round-trip.
159 ///
160 /// Returns `None` if the collection does not exist on disk.
161 #[must_use]
162 pub fn get_vector_collection(&self, name: &str) -> Option<VectorCollection> {
163 if let Some(c) = self.vector_colls.read().get(name).cloned() {
164 return Some(c);
165 }
166 self.open_vector_collection_from_disk(name)
167 }
168
169 /// Disk fallback for `get_vector_collection`.
170 fn open_vector_collection_from_disk(&self, name: &str) -> Option<VectorCollection> {
171 let cfg = self.read_collection_config(name)?;
172 if cfg.graph_schema.is_some() || cfg.metadata_only {
173 return None;
174 }
175 let coll = VectorCollection::open(self.data_dir.join(name)).ok()?;
176 self.vector_colls
177 .write()
178 .insert(name.to_string(), coll.clone());
179 Some(coll)
180 }
181}