1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//! Vector collection creation and retrieval operations.
use crate::collection::VectorCollection;
use crate::index::hnsw::HnswParams;
use crate::{CollectionType, DistanceMetric, Result, StorageMode};
use super::Database;
impl Database {
/// Creates a new vector collection.
///
/// # Errors
///
/// Returns an error if a collection with the same name already exists.
pub fn create_vector_collection(
&self,
name: &str,
dimension: usize,
metric: DistanceMetric,
) -> Result<()> {
self.create_vector_collection_with_options(name, dimension, metric, StorageMode::default())
}
/// Creates a new vector collection with custom storage options.
///
/// # Errors
///
/// Returns an error if a collection with the same name already exists
/// or if the dimension exceeds the configured `max_dimensions` limit.
pub fn create_vector_collection_with_options(
&self,
name: &str,
dimension: usize,
metric: DistanceMetric,
storage_mode: StorageMode,
) -> Result<()> {
self.ensure_collection_name_available(name)?;
self.enforce_vector_dimension_limit(dimension)?;
let path = self.data_dir.join(name);
let coll = VectorCollection::create(path, name, dimension, metric, storage_mode)?;
self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
Ok(())
}
/// Creates a new vector collection with custom HNSW parameters.
///
/// When `m` or `ef_construction` are `Some`, those values override the
/// dimension-based auto-tuned defaults from [`HnswParams::auto`].
///
/// Shortcut for [`Database::create_vector_collection_with_params`] that
/// only overrides `max_connections` and `ef_construction`.
///
/// # Errors
///
/// Returns an error if a collection with the same name already exists.
pub fn create_vector_collection_with_hnsw(
&self,
name: &str,
dimension: usize,
metric: DistanceMetric,
storage_mode: StorageMode,
m: Option<usize>,
ef_construction: Option<usize>,
) -> Result<()> {
self.ensure_collection_name_available(name)?;
self.enforce_vector_dimension_limit(dimension)?;
let path = self.data_dir.join(name);
let coll = VectorCollection::create_with_hnsw(
path,
name,
dimension,
metric,
storage_mode,
m,
ef_construction,
)?;
self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
Ok(())
}
/// Creates a new vector collection with a fully specified
/// [`HnswParams`] and an explicit `pq_rescore_oversampling` override.
///
/// This is the most expressive vector constructor exposed by
/// `Database`: callers pass every HNSW parameter — `max_connections`,
/// `ef_construction`, `max_elements`, `alpha`, storage mode — via a
/// single value, and override the PQ rescore factor explicitly rather
/// than implicitly falling back to the engine default of `Some(4)`.
/// Passing `pq_rescore_oversampling = None` keeps the persisted config
/// in "no explicit override" mode so later migrations can recompute
/// the factor from dataset shape.
///
/// The storage mode argument wins over `hnsw_params.storage_mode` if
/// they disagree — the field on `HnswParams` is a legacy denormalised
/// copy that the engine keeps in sync with the collection-level value.
///
/// # Errors
///
/// Returns an error if a collection with the same name already exists
/// or if the underlying directory cannot be created.
pub fn create_vector_collection_with_params(
&self,
name: &str,
dimension: usize,
metric: DistanceMetric,
storage_mode: StorageMode,
hnsw_params: HnswParams,
pq_rescore_oversampling: Option<u32>,
) -> Result<()> {
self.ensure_collection_name_available(name)?;
self.enforce_vector_dimension_limit(dimension)?;
let path = self.data_dir.join(name);
let coll = VectorCollection::create_with_params(
path,
dimension,
metric,
storage_mode,
hnsw_params,
pq_rescore_oversampling,
)?;
self.register_vector_collection(name, &coll, dimension, metric, storage_mode);
Ok(())
}
/// Registers a vector collection in the typed registry,
/// notifies the observer, and bumps the schema version.
fn register_vector_collection(
&self,
name: &str,
coll: &VectorCollection,
dimension: usize,
metric: DistanceMetric,
storage_mode: StorageMode,
) {
self.vector_colls
.write()
.insert(name.to_string(), coll.clone());
if let Some(ref obs) = self.observer {
let kind = CollectionType::Vector {
dimension,
metric,
storage_mode,
};
obs.on_collection_created(name, &kind);
}
self.schema_version
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
/// Returns a `VectorCollection` by name.
///
/// Checks the typed registry first. If not found there, falls back to
/// opening the collection directory from disk (e.g. for collections created
/// via the legacy `create_collection` API that were not registered in the
/// typed registry). The opened instance is cached back into the registry
/// so subsequent calls avoid the disk round-trip.
///
/// Returns `None` if the collection does not exist on disk.
#[must_use]
pub fn get_vector_collection(&self, name: &str) -> Option<VectorCollection> {
if let Some(c) = self.vector_colls.read().get(name).cloned() {
return Some(c);
}
self.open_vector_collection_from_disk(name)
}
/// Disk fallback for `get_vector_collection`.
fn open_vector_collection_from_disk(&self, name: &str) -> Option<VectorCollection> {
let cfg = self.read_collection_config(name)?;
if cfg.graph_schema.is_some() || cfg.metadata_only {
return None;
}
let coll = VectorCollection::open(self.data_dir.join(name)).ok()?;
self.vector_colls
.write()
.insert(name.to_string(), coll.clone());
Some(coll)
}
}