1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
//! Collection CRUD dispatcher: create, delete, list, get, and diagnostics.
//!
//! Type-specific operations are in sibling modules:
//! - [`vector_ops`] — vector collection create/get
//! - [`graph_ops`] — graph collection create/get
//! - [`metadata_ops`] — metadata-only collection create/get
use crate::collection::AnyCollection;
use crate::{CollectionType, DistanceMetric, Error, Result, StorageMode};
use super::Database;
impl Database {
/// Ensures a collection name is valid, free in memory, and free on disk.
///
/// Validates the name against path traversal and forbidden characters
/// **before** any filesystem operation, then checks that no collection
/// with the same name already exists in any registry or on disk, and
/// finally enforces the `LimitsConfig::max_collections` cap so that
/// callers are refused cleanly instead of filling the registry past
/// the configured ceiling.
pub(super) fn ensure_collection_name_available(&self, name: &str) -> Result<()> {
crate::validation::validate_collection_name(name)?;
if self.collection_exists_in_registry(name) {
return Err(Error::CollectionExists(name.to_string()));
}
let collection_path = self.data_dir.join(name);
if collection_path.exists() {
return Err(Error::CollectionExists(name.to_string()));
}
// Wave 3 Commit 7 — enforce `LimitsConfig::max_collections`.
//
// Counted across every typed registry (vector + graph + metadata)
// because the limit is tenant-wide, not per-type. Evaluated after
// the name validation and duplicate checks so the typed error
// precedence stays unchanged: invalid name and duplicate still
// win over the cap — callers that want to detect "too many
// collections" specifically rely on the `GuardRail` variant.
let total_collections = self.vector_colls.read().len()
+ self.graph_colls.read().len()
+ self.metadata_colls.read().len();
let cap = self.config.limits.max_collections;
if total_collections >= cap {
return Err(Error::GuardRail(format!(
"max_collections limit reached ({total_collections} / {cap}); \
raise `limits.max_collections` in VelesConfig to create more"
)));
}
Ok(())
}
/// Checks whether a collection name exists in any of the typed registries.
fn collection_exists_in_registry(&self, name: &str) -> bool {
self.vector_colls.read().contains_key(name)
|| self.graph_colls.read().contains_key(name)
|| self.metadata_colls.read().contains_key(name)
}
/// Enforces `LimitsConfig::max_dimensions` on a prospective vector
/// collection creation.
///
/// Complements [`crate::validation::validate_dimension`] (the static
/// `65_536` hard ceiling): the config-driven limit is typically tighter
/// — 4096 by default — and is consulted here so the guard-rail can
/// be relaxed per tenant via [`Database::open_with_config`] without
/// touching the static constant.
///
/// Dimension `0` is accepted because it is the sentinel used by
/// metadata-only and graph-without-embeddings collections. Callers
/// that need to reject zero should do so upstream via
/// [`crate::validation::validate_dimension`].
pub(super) fn enforce_vector_dimension_limit(&self, dimension: usize) -> Result<()> {
if dimension == 0 {
return Ok(());
}
let cap = self.config.limits.max_dimensions;
if dimension > cap {
return Err(Error::GuardRail(format!(
"vector dimension {dimension} exceeds configured max_dimensions cap of {cap}; \
raise `limits.max_dimensions` in VelesConfig to allow larger vectors"
)));
}
Ok(())
}
/// Creates a new collection with the specified parameters.
///
/// # Arguments
///
/// * `name` - Unique name for the collection
/// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
/// * `metric` - Distance metric to use for similarity calculations
///
/// # Errors
///
/// - Returns `Error::CollectionExists` if a collection with the same name already exists.
/// - Returns an error if the directory cannot be created or storage initialization fails.
///
/// # Examples
///
/// ```rust,no_run
/// # use velesdb_core::{Database, DistanceMetric};
/// let db = Database::open("./data")?;
/// db.create_collection("documents", 768, DistanceMetric::Cosine)?;
/// # Ok::<(), velesdb_core::Error>(())
/// ```
pub fn create_collection(
&self,
name: &str,
dimension: usize,
metric: DistanceMetric,
) -> Result<()> {
self.create_collection_with_options(name, dimension, metric, StorageMode::default())
}
/// Creates a new collection with custom storage options.
///
/// # Errors
///
/// Returns an error if a collection with the same name already exists.
pub fn create_collection_with_options(
&self,
name: &str,
dimension: usize,
metric: DistanceMetric,
storage_mode: StorageMode,
) -> Result<()> {
self.create_vector_collection_with_options(name, dimension, metric, storage_mode)
}
/// Returns a type-erased collection handle by name.
///
/// Checks vector → graph → metadata registries in order.
/// Returns `None` if no collection with the given name exists.
#[must_use]
pub fn get_any_collection(&self, name: &str) -> Option<AnyCollection> {
if let Some(c) = self.get_vector_collection(name) {
return Some(AnyCollection::Vector(c));
}
if let Some(c) = self.get_graph_collection(name) {
return Some(AnyCollection::Graph(c));
}
if let Some(c) = self.get_metadata_collection(name) {
return Some(AnyCollection::Metadata(c));
}
None
}
/// Returns the write generation for a named collection, if it exists.
#[must_use]
pub fn collection_write_generation(&self, name: &str) -> Option<u64> {
if let Some(vc) = self.vector_colls.read().get(name) {
return Some(vc.inner.write_generation());
}
if let Some(gc) = self.graph_colls.read().get(name) {
return Some(gc.inner.write_generation());
}
if let Some(mc) = self.metadata_colls.read().get(name) {
return Some(mc.inner.write_generation());
}
None
}
/// Returns the set of payload field names covered by a secondary index
/// for the named collection (issue #607). Empty set when the collection
/// has no indexes or does not exist.
///
/// Used by `Database::build_plan_with_stats` to thread the real
/// indexed-field set into `QueryPlan::from_query_with_stats` so that
/// `IndexLookup` plan nodes are generated in the EXPLAIN tree when a
/// WHERE clause targets an indexed column.
#[must_use]
pub fn indexed_fields_for(&self, name: &str) -> std::collections::HashSet<String> {
if let Some(vc) = self.vector_colls.read().get(name) {
return vc.inner.indexed_field_names();
}
if let Some(gc) = self.graph_colls.read().get(name) {
return gc.inner.indexed_field_names();
}
if let Some(mc) = self.metadata_colls.read().get(name) {
return mc.inner.indexed_field_names();
}
std::collections::HashSet::new()
}
/// Returns the analyze generation for a named collection, if it exists
/// (issue #608).
///
/// Parallel to [`Self::collection_write_generation`], but tracks `ANALYZE`
/// invocations instead of data mutations. Threaded into the compiled plan
/// cache key so that an `ANALYZE` run alone invalidates cached plans whose
/// cost estimates pre-date the fresh calibrated statistics.
#[must_use]
pub fn collection_analyze_generation(&self, name: &str) -> Option<u64> {
if let Some(vc) = self.vector_colls.read().get(name) {
return Some(vc.inner.analyze_generation());
}
if let Some(gc) = self.graph_colls.read().get(name) {
return Some(gc.inner.analyze_generation());
}
if let Some(mc) = self.metadata_colls.read().get(name) {
return Some(mc.inner.analyze_generation());
}
None
}
/// Lists all collection names in the database.
///
/// Includes collections created via any typed API (vector, graph, metadata).
pub fn list_collections(&self) -> Vec<String> {
let vector_colls = self.vector_colls.read();
let graph_colls = self.graph_colls.read();
let metadata_colls = self.metadata_colls.read();
let mut names: std::collections::HashSet<String> = vector_colls.keys().cloned().collect();
for k in graph_colls.keys() {
names.insert(k.clone());
}
for k in metadata_colls.keys() {
names.insert(k.clone());
}
let mut result: Vec<String> = names.into_iter().collect();
result.sort();
result
}
/// Deletes a collection by name.
///
/// # Errors
///
/// Returns an error if the name is invalid or the collection does not
/// exist in any registry.
pub fn delete_collection(&self, name: &str) -> Result<()> {
crate::validation::validate_collection_name(name)?;
if !self.collection_exists_in_registry(name) {
return Err(Error::CollectionNotFound(name.to_string()));
}
let collection_path = self.data_dir.join(name);
if collection_path.exists() {
std::fs::remove_dir_all(&collection_path)?;
}
self.remove_from_all_registries(name);
if let Some(ref obs) = self.observer {
obs.on_collection_deleted(name);
}
self.schema_version
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(())
}
/// Removes a collection from all registries and stats cache.
fn remove_from_all_registries(&self, name: &str) {
self.vector_colls.write().remove(name);
self.graph_colls.write().remove(name);
self.metadata_colls.write().remove(name);
self.collection_stats.write().remove(name);
}
/// Creates a new collection with a specific type (Vector, Graph, or `MetadataOnly`).
///
/// # Errors
///
/// Returns an error if a collection with the same name already exists.
pub fn create_collection_typed(
&self,
name: &str,
collection_type: &CollectionType,
) -> Result<()> {
match collection_type {
CollectionType::Vector {
dimension,
metric,
storage_mode,
} => {
self.create_vector_collection_with_options(name, *dimension, *metric, *storage_mode)
}
CollectionType::MetadataOnly => self.create_metadata_collection(name),
CollectionType::Graph {
dimension,
metric,
schema,
} => self.create_graph_collection_from_type(name, *dimension, *metric, schema),
}
}
/// Reads and parses `config.json` from a collection directory.
///
/// Returns `None` if the name is invalid, the config file does not exist,
/// or the config cannot be parsed.
pub(super) fn read_collection_config(
&self,
name: &str,
) -> Option<crate::collection::CollectionConfig> {
if crate::validation::validate_collection_name(name).is_err() {
return None;
}
let path = self.data_dir.join(name);
let config_path = path.join("config.json");
if !config_path.exists() {
return None;
}
let data = std::fs::read_to_string(&config_path).ok()?;
serde_json::from_str(&data).ok()
}
/// Propagates updated query limits to all active collections.
pub fn update_guardrails(&self, limits: &crate::guardrails::QueryLimits) {
for vc in self.vector_colls.read().values() {
vc.guard_rails().update_limits(limits);
}
for gc in self.graph_colls.read().values() {
gc.inner.guard_rails().update_limits(limits);
}
for mc in self.metadata_colls.read().values() {
mc.inner.guard_rails().update_limits(limits);
}
}
/// Returns diagnostics for a named collection.
///
/// # Errors
///
/// Returns `Error::CollectionNotFound` if the collection does not exist.
pub fn collection_diagnostics(
&self,
name: &str,
) -> Result<crate::collection::CollectionDiagnostics> {
if let Some(c) = self.get_vector_collection(name) {
return Ok(c.diagnostics());
}
if let Some(c) = self.get_graph_collection(name) {
return Ok(c.diagnostics());
}
if let Some(c) = self.get_metadata_collection(name) {
return Ok(c.diagnostics());
}
Err(Error::CollectionNotFound(name.to_string()))
}
}