velesdb_core/database/mod.rs
1//! Database facade and orchestration layer for collection lifecycle and query routing.
2//!
3//! This module is split into focused submodules:
4//!
5//! - [`collection_ops`] — Collection CRUD dispatcher (create, delete, list, get)
6//! - [`vector_ops`] — Vector collection create/get
7//! - [`graph_ops`] — Graph collection create/get
8//! - [`metadata_ops`] — Metadata-only collection create/get
9//! - [`query_engine`] — `VelesQL` query execution, plan caching, DML dispatch
10//! - [`query_join`] — JOIN execution strategies (lookup, filtered, condition pushdown)
11//! - [`dml_executor`] — DML mutations (INSERT EDGE, DELETE, DELETE EDGE, SELECT EDGES, INSERT NODE)
12//! - [`persistence`] — Loading collections from disk at startup
13//! - [`training`] — `TRAIN QUANTIZER` statement execution
14//! - [`stats`] — Collection statistics (analyze, cache)
15//! - [`database_helpers`] — DML value conversion and JOIN column store helpers
16
17use crate::collection::{GraphCollection, MetadataCollection, VectorCollection};
18use crate::observer::DatabaseObserver;
19use crate::simd_dispatch;
20use crate::{ColumnStore, Error, Result};
21
22mod admin_executor;
23mod collection_ops;
24mod cross_collection;
25mod ddl_executor;
26mod dml_executor;
27mod graph_ops;
28mod introspection_executor;
29mod join_pushdown;
30mod metadata_ops;
31mod persistence;
32mod query_engine;
33mod query_engine_dml;
34mod query_join;
35mod stats;
36mod training;
37mod vector_ops;
38
39#[cfg(feature = "persistence")]
40mod database_helpers;
41
42#[cfg(all(test, feature = "persistence"))]
43mod collection_ops_tests;
44#[cfg(all(test, feature = "persistence"))]
45mod database_helpers_tests;
46#[cfg(all(test, feature = "persistence"))]
47mod database_tests;
48#[cfg(all(test, feature = "persistence"))]
49mod ddl_executor_tests;
50#[cfg(all(test, feature = "persistence"))]
51mod graph_ops_tests;
52#[cfg(all(test, feature = "persistence"))]
53mod query_engine_tests;
54#[cfg(all(test, feature = "persistence"))]
55mod stats_tests;
56
57/// Database instance managing collections and storage.
58///
59/// # Lifecycle
60///
61/// `Database::open()` automatically loads all previously created collections from disk.
62/// There is no need to call `load_collections()` separately.
63///
64/// # Extension (Premium)
65///
66/// Use [`Database::open_with_observer`] to inject a [`DatabaseObserver`] implementation
67/// from `velesdb-premium` without modifying this crate.
68#[cfg(feature = "persistence")]
69pub struct Database {
70 /// Path to the data directory
71 data_dir: std::path::PathBuf,
72 /// Exclusive file lock preventing multi-process corruption.
73 ///
74 /// The lock is held for the lifetime of the `Database` and released on `Drop`.
75 /// The `_` prefix signals this field is kept for its RAII side effect.
76 _lock_file: std::fs::File,
77 /// Root configuration applied to every subsystem.
78 ///
79 /// Stored as an `Arc` so `Database::config()` can hand out cheap,
80 /// cloneable references without forcing the whole struct onto the
81 /// heap or locking. The value is populated at construction time
82 /// (`open`, `open_with_observer`, or `open_with_config`) and is
83 /// immutable for the life of the `Database` — Wave 3 never needs
84 /// to mutate the root config at runtime, and making it immutable
85 /// rules out a large class of surprising behaviours.
86 config: std::sync::Arc<crate::config::VelesConfig>,
87 /// Typed registry: vector collections.
88 vector_colls: parking_lot::RwLock<std::collections::HashMap<String, VectorCollection>>,
89 /// Typed registry: graph collections.
90 graph_colls: parking_lot::RwLock<std::collections::HashMap<String, GraphCollection>>,
91 /// Typed registry: metadata-only collections.
92 metadata_colls: parking_lot::RwLock<std::collections::HashMap<String, MetadataCollection>>,
93 /// Cached collection statistics for CBO planning.
94 collection_stats: parking_lot::RwLock<
95 std::collections::HashMap<String, crate::collection::stats::CollectionStats>,
96 >,
97 /// Optional lifecycle observer (used by velesdb-premium for RBAC, audit, multi-tenant).
98 observer: Option<std::sync::Arc<dyn DatabaseObserver>>,
99 /// Monotonic DDL schema version counter (CACHE-01).
100 ///
101 /// Incremented on every create/drop collection operation.
102 /// Used by `CompiledPlanCache` to invalidate cached query plans.
103 schema_version: std::sync::atomic::AtomicU64,
104 /// Compiled query plan cache (CACHE-02).
105 ///
106 /// Stores recently compiled `QueryPlan` instances keyed by `PlanKey`.
107 /// Default sizing: L1 = 1K hot entries, L2 = 10K LRU entries.
108 compiled_plan_cache: crate::cache::CompiledPlanCache,
109}
110
111#[cfg(feature = "persistence")]
112impl Database {
113 /// Opens or creates a database, **automatically loading all existing collections**.
114 ///
115 /// This replaces the previous `open()` + `load_collections()` two-step pattern.
116 /// The new `open()` is a strict auto-load: all `config.json` directories under
117 /// `path` are loaded on startup.
118 ///
119 /// Uses the default [`VelesConfig`](crate::config::VelesConfig) — every
120 /// subsystem behaves identically to the pre-Wave-3 version of this
121 /// function, so existing callers keep their exact behaviour. Users
122 /// that need to customise subsystem limits or WAL batching should
123 /// call [`Database::open_with_config`] instead.
124 ///
125 /// # Errors
126 ///
127 /// Returns an error if the directory cannot be created or accessed.
128 pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
129 Self::open_impl(path, None, None)
130 }
131
132 /// Opens a database with an explicit [`VelesConfig`](crate::config::VelesConfig).
133 ///
134 /// Every subsystem that honours a config field (HNSW defaults, WAL
135 /// batching, runtime limits, search quality) reads from the passed
136 /// instance. A clone is stored inside the `Database` and retained
137 /// for the lifetime of the handle so sub-systems can consult it
138 /// without re-parsing a TOML file.
139 ///
140 /// # Errors
141 ///
142 /// Returns an error if the directory cannot be created, the lock
143 /// cannot be acquired, or any already-present collection exceeds
144 /// the limits declared in `config.limits` (see
145 /// [`Database::open`] for the default-limit behaviour).
146 pub fn open_with_config<P: AsRef<std::path::Path>>(
147 path: P,
148 config: crate::config::VelesConfig,
149 ) -> Result<Self> {
150 Self::open_impl(path, None, Some(config))
151 }
152
153 /// Opens a database with a [`DatabaseObserver`] (used by velesdb-premium).
154 ///
155 /// The observer receives lifecycle hooks for every collection operation,
156 /// enabling RBAC, audit logging, multi-tenant routing, etc.
157 ///
158 /// Equivalent to [`Database::open`] plus the observer injection —
159 /// applies the default [`VelesConfig`](crate::config::VelesConfig).
160 ///
161 /// # Errors
162 ///
163 /// Returns an error if the directory cannot be created or accessed.
164 pub fn open_with_observer<P: AsRef<std::path::Path>>(
165 path: P,
166 observer: std::sync::Arc<dyn DatabaseObserver>,
167 ) -> Result<Self> {
168 Self::open_impl(path, Some(observer), None)
169 }
170
171 /// Opens a database with both an explicit [`crate::VelesConfig`] and a
172 /// [`crate::DatabaseObserver`]. Used by the premium shell that layers
173 /// RBAC/audit on top of a tenant-specific config file.
174 ///
175 /// # Errors
176 ///
177 /// Same as [`Database::open_with_config`].
178 pub fn open_with_observer_and_config<P: AsRef<std::path::Path>>(
179 path: P,
180 observer: std::sync::Arc<dyn DatabaseObserver>,
181 config: crate::config::VelesConfig,
182 ) -> Result<Self> {
183 Self::open_impl(path, Some(observer), Some(config))
184 }
185
186 fn open_impl<P: AsRef<std::path::Path>>(
187 path: P,
188 observer: Option<std::sync::Arc<dyn DatabaseObserver>>,
189 config: Option<crate::config::VelesConfig>,
190 ) -> Result<Self> {
191 let data_dir = path.as_ref().to_path_buf();
192 std::fs::create_dir_all(&data_dir)?;
193
194 // Acquire exclusive file lock to prevent multi-process corruption
195 let lock_path = data_dir.join("velesdb.lock");
196 let lock_file = std::fs::File::create(&lock_path)?;
197 fs2::FileExt::try_lock_exclusive(&lock_file)
198 .map_err(|_| Error::DatabaseLocked(data_dir.display().to_string()))?;
199
200 // Log SIMD features detected at startup
201 let features = simd_dispatch::simd_features_info();
202 tracing::info!(
203 avx512 = features.avx512f,
204 avx2 = features.avx2,
205 "SIMD features detected - direct dispatch enabled"
206 );
207
208 let db = Self {
209 data_dir,
210 _lock_file: lock_file,
211 config: std::sync::Arc::new(config.unwrap_or_default()),
212 vector_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
213 graph_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
214 metadata_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
215 collection_stats: parking_lot::RwLock::new(std::collections::HashMap::new()),
216 observer,
217 schema_version: std::sync::atomic::AtomicU64::new(0),
218 compiled_plan_cache: crate::cache::CompiledPlanCache::new(1_000, 10_000),
219 };
220
221 // Auto-load all existing collections from disk (replaces manual load_collections()).
222 db.load_collections()?;
223
224 Ok(db)
225 }
226
227 /// Returns a reference to the root [`VelesConfig`](crate::config::VelesConfig)
228 /// that was supplied at construction (or the default if the database
229 /// was opened via [`Database::open`]).
230 ///
231 /// Sub-systems (`vector_ops`, `query_engine`, `stats`, …) consult this
232 /// through `database.config()` when they need to honour a user-supplied
233 /// limit or toggle — the shared `Arc` makes the call free of locks
234 /// and cheap to propagate to background threads.
235 #[must_use]
236 pub fn config(&self) -> &crate::config::VelesConfig {
237 &self.config
238 }
239
240 /// Returns a cheap, cloneable handle to the root config.
241 ///
242 /// Use this when you need to move the config into a thread or
243 /// long-lived closure that outlives the current `&self` borrow.
244 #[must_use]
245 pub fn config_arc(&self) -> std::sync::Arc<crate::config::VelesConfig> {
246 std::sync::Arc::clone(&self.config)
247 }
248
249 /// Returns the path to the data directory.
250 #[must_use]
251 pub fn data_dir(&self) -> &std::path::Path {
252 &self.data_dir
253 }
254
255 /// Returns the current DDL schema version counter.
256 #[must_use]
257 pub fn schema_version(&self) -> u64 {
258 self.schema_version
259 .load(std::sync::atomic::Ordering::Relaxed)
260 }
261
262 /// Returns a reference to the compiled query plan cache.
263 #[must_use]
264 pub fn plan_cache(&self) -> &crate::cache::CompiledPlanCache {
265 &self.compiled_plan_cache
266 }
267
268 // =========================================================================
269 // Observer notification helpers (called by server handlers after operations)
270 // =========================================================================
271
272 /// Notifies the observer that points were upserted into a collection.
273 ///
274 /// **Caller contract**: this method is NOT called automatically by
275 /// [`Database`] internals. HTTP handlers and SDK bindings are responsible
276 /// for calling it after a successful upsert, passing the number of points
277 /// written. Forgetting to call it means the observer receives no upsert
278 /// events for that operation.
279 ///
280 /// No-op when no observer is registered.
281 pub fn notify_upsert(&self, collection: &str, point_count: usize) {
282 if let Some(ref obs) = self.observer {
283 obs.on_upsert(collection, point_count);
284 }
285 }
286
287 /// Notifies the observer that a query was executed, with its duration.
288 ///
289 /// **Caller contract**: this method is NOT called automatically by
290 /// [`Database::execute_query`]. Callers must measure the wall-clock
291 /// duration themselves (e.g. `std::time::Instant::now()` before the call)
292 /// and invoke this method afterwards with the elapsed microseconds.
293 ///
294 /// No-op when no observer is registered.
295 pub fn notify_query(&self, collection: &str, duration_us: u64) {
296 if let Some(ref obs) = self.observer {
297 obs.on_query(collection, duration_us);
298 }
299 }
300}