velesdb_core/database/mod.rs
1//! Database facade and orchestration layer for collection lifecycle and query routing.
2//!
3//! This module is split into focused submodules:
4//!
5//! - [`collection_ops`] — Collection CRUD dispatcher (create, delete, list, get)
6//! - [`vector_ops`] — Vector collection create/get
7//! - [`graph_ops`] — Graph collection create/get
8//! - [`metadata_ops`] — Metadata-only collection create/get
9//! - [`query_engine`] — `VelesQL` query execution, plan caching, DML dispatch
10//! - [`query_join`] — JOIN execution strategies (lookup, filtered, condition pushdown)
11//! - [`dml_executor`] — DML mutations (INSERT EDGE, DELETE, DELETE EDGE, SELECT EDGES, INSERT NODE)
12//! - [`persistence`] — Loading collections from disk at startup
13//! - [`training`] — `TRAIN QUANTIZER` statement execution
14//! - [`stats`] — Collection statistics (analyze, cache)
15//! - [`database_helpers`] — DML value conversion and JOIN column store helpers
16
17use crate::collection::{GraphCollection, MetadataCollection, VectorCollection};
18use crate::observer::DatabaseObserver;
19use crate::simd_dispatch;
20use crate::{ColumnStore, Error, Result};
21
22mod admin_executor;
23mod collection_ops;
24mod cross_collection;
25mod ddl_executor;
26mod dml_executor;
27mod graph_ops;
28mod introspection_executor;
29mod join_pushdown;
30mod metadata_ops;
31mod persistence;
32mod query_engine;
33mod query_engine_dml;
34mod query_join;
35mod stats;
36mod training;
37mod vector_ops;
38
39#[cfg(feature = "persistence")]
40mod database_helpers;
41
42#[cfg(all(test, feature = "persistence"))]
43mod collection_ops_tests;
44#[cfg(all(test, feature = "persistence"))]
45mod database_helpers_tests;
46#[cfg(all(test, feature = "persistence"))]
47mod database_tests;
48#[cfg(all(test, feature = "persistence"))]
49mod ddl_executor_tests;
50#[cfg(all(test, feature = "persistence"))]
51mod graph_ops_tests;
52#[cfg(all(test, feature = "persistence"))]
53mod query_engine_tests;
54#[cfg(all(test, feature = "persistence"))]
55mod stats_tests;
56
57/// Database instance managing collections and storage.
58///
59/// # Lifecycle
60///
61/// `Database::open()` automatically loads all previously created collections from disk.
62/// There is no need to call `load_collections()` separately.
63///
64/// # Extension (Premium)
65///
66/// Use [`Database::open_with_observer`] to inject a [`DatabaseObserver`] implementation
67/// from `velesdb-premium` without modifying this crate.
68#[cfg(feature = "persistence")]
69pub struct Database {
70 /// Path to the data directory
71 data_dir: std::path::PathBuf,
72 /// Exclusive file lock preventing multi-process corruption.
73 ///
74 /// The lock is held for the lifetime of the `Database` and released on `Drop`.
75 /// The `_` prefix signals this field is kept for its RAII side effect.
76 _lock_file: std::fs::File,
77 /// Root configuration applied to every subsystem.
78 ///
79 /// Stored as an `Arc` so `Database::config()` can hand out cheap,
80 /// cloneable references without forcing the whole struct onto the
81 /// heap or locking. The value is populated at construction time
82 /// (`open`, `open_with_observer`, or `open_with_config`) and is
83 /// immutable for the life of the `Database` — Wave 3 never needs
84 /// to mutate the root config at runtime, and making it immutable
85 /// rules out a large class of surprising behaviours.
86 config: std::sync::Arc<crate::config::VelesConfig>,
87 /// Typed registry: vector collections.
88 vector_colls: parking_lot::RwLock<std::collections::HashMap<String, VectorCollection>>,
89 /// Typed registry: graph collections.
90 graph_colls: parking_lot::RwLock<std::collections::HashMap<String, GraphCollection>>,
91 /// Typed registry: metadata-only collections.
92 metadata_colls: parking_lot::RwLock<std::collections::HashMap<String, MetadataCollection>>,
93 /// Cached collection statistics for CBO planning.
94 collection_stats: parking_lot::RwLock<
95 std::collections::HashMap<String, crate::collection::stats::CollectionStats>,
96 >,
97 /// Optional lifecycle observer (used by velesdb-premium for RBAC, audit, multi-tenant).
98 observer: Option<std::sync::Arc<dyn DatabaseObserver>>,
99 /// Monotonic DDL schema version counter (CACHE-01).
100 ///
101 /// Incremented on every create/drop collection operation.
102 /// Used by `CompiledPlanCache` to invalidate cached query plans.
103 schema_version: std::sync::atomic::AtomicU64,
104 /// Compiled query plan cache (CACHE-02).
105 ///
106 /// Stores recently compiled `QueryPlan` instances keyed by `PlanKey`.
107 /// Default sizing: L1 = 1K hot entries, L2 = 10K LRU entries.
108 compiled_plan_cache: crate::cache::CompiledPlanCache,
109}
110
111#[cfg(feature = "persistence")]
112impl Database {
113 /// Opens or creates a database, **automatically loading all existing collections**.
114 ///
115 /// This replaces the previous `open()` + `load_collections()` two-step pattern.
116 /// The new `open()` is a strict auto-load: all `config.json` directories under
117 /// `path` are loaded on startup.
118 ///
119 /// Uses the default [`VelesConfig`](crate::config::VelesConfig) — every
120 /// subsystem behaves identically to the pre-Wave-3 version of this
121 /// function, so existing callers keep their exact behaviour. Users
122 /// that need to customise subsystem limits or WAL batching should
123 /// call [`Database::open_with_config`] instead.
124 ///
125 /// # Errors
126 ///
127 /// Returns an error if the directory cannot be created or accessed.
128 pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
129 Self::open_impl(path, None, None)
130 }
131
132 /// Opens a database with an explicit [`VelesConfig`](crate::config::VelesConfig).
133 ///
134 /// Every subsystem that honours a config field (HNSW defaults, WAL
135 /// batching, runtime limits, search quality) reads from the passed
136 /// instance. A clone is stored inside the `Database` and retained
137 /// for the lifetime of the handle so sub-systems can consult it
138 /// without re-parsing a TOML file.
139 ///
140 /// # Errors
141 ///
142 /// Returns an error if the directory cannot be created, the lock
143 /// cannot be acquired, or any already-present collection exceeds
144 /// the limits declared in `config.limits` (see
145 /// [`Database::open`] for the default-limit behaviour).
146 pub fn open_with_config<P: AsRef<std::path::Path>>(
147 path: P,
148 config: crate::config::VelesConfig,
149 ) -> Result<Self> {
150 Self::open_impl(path, None, Some(config))
151 }
152
153 /// Opens a database with a [`DatabaseObserver`] (used by velesdb-premium).
154 ///
155 /// The observer receives lifecycle hooks for every collection operation,
156 /// enabling RBAC, audit logging, multi-tenant routing, etc.
157 ///
158 /// Equivalent to [`Database::open`] plus the observer injection —
159 /// applies the default [`VelesConfig`](crate::config::VelesConfig).
160 ///
161 /// # Errors
162 ///
163 /// Returns an error if the directory cannot be created or accessed.
164 pub fn open_with_observer<P: AsRef<std::path::Path>>(
165 path: P,
166 observer: std::sync::Arc<dyn DatabaseObserver>,
167 ) -> Result<Self> {
168 Self::open_impl(path, Some(observer), None)
169 }
170
171 /// Opens a database with both an explicit [`crate::VelesConfig`] and a
172 /// [`crate::DatabaseObserver`]. Used by the premium shell that layers
173 /// RBAC/audit on top of a tenant-specific config file.
174 ///
175 /// # Errors
176 ///
177 /// Same as [`Database::open_with_config`].
178 pub fn open_with_observer_and_config<P: AsRef<std::path::Path>>(
179 path: P,
180 observer: std::sync::Arc<dyn DatabaseObserver>,
181 config: crate::config::VelesConfig,
182 ) -> Result<Self> {
183 Self::open_impl(path, Some(observer), Some(config))
184 }
185
186 fn open_impl<P: AsRef<std::path::Path>>(
187 path: P,
188 observer: Option<std::sync::Arc<dyn DatabaseObserver>>,
189 config: Option<crate::config::VelesConfig>,
190 ) -> Result<Self> {
191 // Validate at the consumption boundary: a `VelesConfig` built
192 // programmatically (not through a loader) never passes through
193 // `validate()`, so an out-of-range field would otherwise reach the
194 // engine unchecked. Loaders already validate, but re-validating here
195 // is cheap and closes the bypass for direct API callers.
196 let config = config.unwrap_or_default();
197 config
198 .validate()
199 .map_err(|e| Error::Config(e.to_string()))?;
200
201 let data_dir = path.as_ref().to_path_buf();
202 std::fs::create_dir_all(&data_dir)?;
203
204 // Acquire exclusive file lock to prevent multi-process corruption
205 let lock_path = data_dir.join("velesdb.lock");
206 let lock_file = std::fs::File::create(&lock_path)?;
207 fs2::FileExt::try_lock_exclusive(&lock_file)
208 .map_err(|_| Error::DatabaseLocked(data_dir.display().to_string()))?;
209
210 // Log SIMD features detected at startup
211 let features = simd_dispatch::simd_features_info();
212 tracing::info!(
213 avx512 = features.avx512f,
214 avx2 = features.avx2,
215 "SIMD features detected - direct dispatch enabled"
216 );
217
218 let db = Self {
219 data_dir,
220 _lock_file: lock_file,
221 config: std::sync::Arc::new(config),
222 vector_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
223 graph_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
224 metadata_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
225 collection_stats: parking_lot::RwLock::new(std::collections::HashMap::new()),
226 observer,
227 schema_version: std::sync::atomic::AtomicU64::new(0),
228 compiled_plan_cache: crate::cache::CompiledPlanCache::new(1_000, 10_000),
229 };
230
231 // Auto-load all existing collections from disk (replaces manual load_collections()).
232 db.load_collections()?;
233
234 Ok(db)
235 }
236
237 /// Returns a reference to the root [`VelesConfig`](crate::config::VelesConfig)
238 /// that was supplied at construction (or the default if the database
239 /// was opened via [`Database::open`]).
240 ///
241 /// Sub-systems (`vector_ops`, `query_engine`, `stats`, …) consult this
242 /// through `database.config()` when they need to honour a user-supplied
243 /// limit or toggle — the shared `Arc` makes the call free of locks
244 /// and cheap to propagate to background threads.
245 #[must_use]
246 pub fn config(&self) -> &crate::config::VelesConfig {
247 &self.config
248 }
249
250 /// Returns a cheap, cloneable handle to the root config.
251 ///
252 /// Use this when you need to move the config into a thread or
253 /// long-lived closure that outlives the current `&self` borrow.
254 #[must_use]
255 pub fn config_arc(&self) -> std::sync::Arc<crate::config::VelesConfig> {
256 std::sync::Arc::clone(&self.config)
257 }
258
259 /// Returns the path to the data directory.
260 #[must_use]
261 pub fn data_dir(&self) -> &std::path::Path {
262 &self.data_dir
263 }
264
265 /// Returns the current DDL schema version counter.
266 #[must_use]
267 pub fn schema_version(&self) -> u64 {
268 self.schema_version
269 .load(std::sync::atomic::Ordering::Relaxed)
270 }
271
272 /// Returns a reference to the compiled query plan cache.
273 #[must_use]
274 pub fn plan_cache(&self) -> &crate::cache::CompiledPlanCache {
275 &self.compiled_plan_cache
276 }
277
278 // =========================================================================
279 // Observer notification helpers (called by server handlers after operations)
280 // =========================================================================
281
282 /// Notifies the observer that points were upserted into a collection.
283 ///
284 /// **Caller contract**: this method is NOT called automatically by
285 /// [`Database`] internals. HTTP handlers and SDK bindings are responsible
286 /// for calling it after a successful upsert, passing the number of points
287 /// written. Forgetting to call it means the observer receives no upsert
288 /// events for that operation.
289 ///
290 /// No-op when no observer is registered.
291 pub fn notify_upsert(&self, collection: &str, point_count: usize) {
292 if let Some(ref obs) = self.observer {
293 obs.on_upsert(collection, point_count);
294 }
295 }
296
297 /// Notifies the observer that a query was executed, with its duration.
298 ///
299 /// **Caller contract**: this method is NOT called automatically by
300 /// [`Database::execute_query`]. Callers must measure the wall-clock
301 /// duration themselves (e.g. `std::time::Instant::now()` before the call)
302 /// and invoke this method afterwards with the elapsed microseconds.
303 ///
304 /// No-op when no observer is registered.
305 pub fn notify_query(&self, collection: &str, duration_us: u64) {
306 if let Some(ref obs) = self.observer {
307 obs.on_query(collection, duration_us);
308 }
309 }
310}