velesdb_core/collection/vector_collection/lifecycle.rs
1//! Constructor and persistence methods for `VectorCollection`.
2
3use std::path::PathBuf;
4
5use crate::collection::types::Collection;
6use crate::distance::DistanceMetric;
7use crate::error::Result;
8use crate::quantization::StorageMode;
9
10use super::VectorCollection;
11
12impl VectorCollection {
13 /// Creates a new `VectorCollection` at the given path.
14 ///
15 /// # Errors
16 ///
17 /// Returns an error if the directory cannot be created or storage fails.
18 pub fn create(
19 path: PathBuf,
20 _name: &str,
21 dimension: usize,
22 metric: DistanceMetric,
23 storage_mode: StorageMode,
24 ) -> Result<Self> {
25 Ok(Self {
26 inner: Collection::create_with_options(path, dimension, metric, storage_mode)?,
27 })
28 }
29
30 /// Creates a new `VectorCollection` with custom HNSW parameters.
31 ///
32 /// When `m` or `ef_construction` are `Some`, those values override the
33 /// auto-tuned defaults. When both are `None`, this is equivalent to
34 /// [`VectorCollection::create`].
35 ///
36 /// Shortcut for [`VectorCollection::create_with_params`] that only
37 /// overrides `max_connections` and `ef_construction`; every other
38 /// HNSW field stays at the dimension-based auto-tuned default, and
39 /// `pq_rescore_oversampling` uses the engine default of `Some(4)`.
40 ///
41 /// # Errors
42 ///
43 /// Returns an error if the directory cannot be created or storage fails.
44 pub fn create_with_hnsw(
45 path: PathBuf,
46 _name: &str,
47 dimension: usize,
48 metric: DistanceMetric,
49 storage_mode: StorageMode,
50 m: Option<usize>,
51 ef_construction: Option<usize>,
52 ) -> Result<Self> {
53 let mut params = crate::index::hnsw::HnswParams::auto(dimension);
54 if let Some(m) = m {
55 params.max_connections = m;
56 }
57 if let Some(ef) = ef_construction {
58 params.ef_construction = ef;
59 }
60 params.storage_mode = storage_mode;
61 Self::create_with_params(path, dimension, metric, storage_mode, params, None)
62 }
63
64 /// Creates a new `VectorCollection` with a fully specified
65 /// [`HnswParams`](crate::index::hnsw::HnswParams) and an explicit
66 /// `pq_rescore_oversampling` override.
67 ///
68 /// This is the most expressive constructor exposed by
69 /// `VectorCollection`: callers pass the full params object directly,
70 /// including `alpha` (VAMANA neighbour diversification),
71 /// `max_elements` (initial HNSW capacity), and any future field added
72 /// to `HnswParams`, without going through the `(m, ef_construction)`
73 /// shortcut. Passing `pq_rescore_oversampling = None` keeps the
74 /// persisted config in "no explicit override" mode so later migrations
75 /// can recompute the factor from dataset shape.
76 ///
77 /// # Errors
78 ///
79 /// Returns an error if the directory cannot be created or storage fails.
80 pub fn create_with_params(
81 path: PathBuf,
82 dimension: usize,
83 metric: DistanceMetric,
84 storage_mode: StorageMode,
85 mut hnsw_params: crate::index::hnsw::HnswParams,
86 pq_rescore_oversampling: Option<u32>,
87 ) -> Result<Self> {
88 // Make sure the storage mode baked into the params matches the
89 // per-collection storage mode argument. If a caller passed
90 // mismatching values we deliberately let the function argument
91 // win — it is the more direct, less ambiguous source.
92 hnsw_params.storage_mode = storage_mode;
93 Ok(Self {
94 inner: Collection::create_with_full_config(
95 path,
96 dimension,
97 metric,
98 storage_mode,
99 hnsw_params,
100 pq_rescore_oversampling,
101 )?,
102 })
103 }
104
105 /// Opens an existing `VectorCollection` from disk.
106 ///
107 /// # Errors
108 ///
109 /// Returns an error if the config file cannot be read or storage cannot be opened.
110 pub fn open(path: PathBuf) -> Result<Self> {
111 Ok(Self {
112 inner: Collection::open(path)?,
113 })
114 }
115
116 /// Creates a new `VectorCollection` with an async index builder configuration.
117 ///
118 /// # Errors
119 ///
120 /// Returns an error if the directory cannot be created or the config cannot be saved.
121 pub fn create_with_async_builder(
122 path: PathBuf,
123 dimension: usize,
124 metric: DistanceMetric,
125 async_builder_config: crate::collection::streaming::AsyncIndexBuilderConfig,
126 ) -> Result<Self> {
127 Ok(Self {
128 inner: Collection::create_with_async_builder(
129 path,
130 dimension,
131 metric,
132 async_builder_config,
133 )?,
134 })
135 }
136
137 /// Flushes all engines to disk and saves the config.
138 ///
139 /// Issue #423: This fast-path flush skips `vectors.idx` serialization.
140 /// The WAL provides crash recovery for the vector index.
141 ///
142 /// # Errors
143 ///
144 /// Returns an error if any flush operation fails.
145 pub fn flush(&self) -> Result<()> {
146 self.inner.flush()
147 }
148
149 /// Full durability flush including `vectors.idx` serialization.
150 ///
151 /// Issue #423: Use on graceful shutdown to avoid a full WAL replay
152 /// on the next startup.
153 ///
154 /// # Errors
155 ///
156 /// Returns an error if any flush operation fails.
157 pub fn flush_full(&self) -> Result<()> {
158 self.inner.flush_full()
159 }
160}