Skip to main content

velesdb_core/database/
mod.rs

1//! Database facade and orchestration layer for collection lifecycle and query routing.
2//!
3//! This module is split into focused submodules:
4//!
5//! - [`collection_ops`] — Collection CRUD dispatcher (create, delete, list, get)
6//! - [`vector_ops`] — Vector collection create/get
7//! - [`graph_ops`] — Graph collection create/get
8//! - [`metadata_ops`] — Metadata-only collection create/get
9//! - [`query_engine`] — `VelesQL` query execution, plan caching, DML dispatch
10//! - [`query_join`] — JOIN execution strategies (lookup, filtered, condition pushdown)
11//! - [`dml_executor`] — DML mutations (INSERT EDGE, DELETE, DELETE EDGE, SELECT EDGES, INSERT NODE)
12//! - [`persistence`] — Loading collections from disk at startup
13//! - [`training`] — `TRAIN QUANTIZER` statement execution
14//! - [`stats`] — Collection statistics (analyze, cache)
15//! - [`database_helpers`] — DML value conversion and JOIN column store helpers
16
17use crate::collection::{GraphCollection, MetadataCollection, VectorCollection};
18use crate::observer::DatabaseObserver;
19use crate::simd_dispatch;
20use crate::{ColumnStore, Error, Result};
21
22mod admin_executor;
23mod collection_ops;
24mod cross_collection;
25mod ddl_executor;
26mod dml_executor;
27mod graph_ops;
28mod introspection_executor;
29mod join_pushdown;
30mod metadata_ops;
31mod persistence;
32mod query_engine;
33mod query_engine_dml;
34mod query_join;
35mod stats;
36mod training;
37mod vector_ops;
38
39#[cfg(feature = "persistence")]
40mod database_helpers;
41
42#[cfg(all(test, feature = "persistence"))]
43mod collection_ops_tests;
44#[cfg(all(test, feature = "persistence"))]
45mod database_helpers_tests;
46#[cfg(all(test, feature = "persistence"))]
47mod database_tests;
48#[cfg(all(test, feature = "persistence"))]
49mod ddl_executor_tests;
50#[cfg(all(test, feature = "persistence"))]
51mod graph_ops_tests;
52#[cfg(all(test, feature = "persistence"))]
53mod query_engine_tests;
54#[cfg(all(test, feature = "persistence"))]
55mod stats_tests;
56
57/// Database instance managing collections and storage.
58///
59/// # Lifecycle
60///
61/// `Database::open()` automatically loads all previously created collections from disk.
62/// There is no need to call `load_collections()` separately.
63///
64/// # Extension (Premium)
65///
66/// Use [`Database::open_with_observer`] to inject a [`DatabaseObserver`] implementation
67/// from `velesdb-premium` without modifying this crate.
68#[cfg(feature = "persistence")]
69pub struct Database {
70    /// Path to the data directory
71    data_dir: std::path::PathBuf,
72    /// Exclusive file lock preventing multi-process corruption.
73    ///
74    /// The lock is held for the lifetime of the `Database` and released on `Drop`.
75    /// The `_` prefix signals this field is kept for its RAII side effect.
76    _lock_file: std::fs::File,
77    /// Root configuration applied to every subsystem.
78    ///
79    /// Stored as an `Arc` so `Database::config()` can hand out cheap,
80    /// cloneable references without forcing the whole struct onto the
81    /// heap or locking. The value is populated at construction time
82    /// (`open`, `open_with_observer`, or `open_with_config`) and is
83    /// immutable for the life of the `Database` — Wave 3 never needs
84    /// to mutate the root config at runtime, and making it immutable
85    /// rules out a large class of surprising behaviours.
86    config: std::sync::Arc<crate::config::VelesConfig>,
87    /// Typed registry: vector collections.
88    vector_colls: parking_lot::RwLock<std::collections::HashMap<String, VectorCollection>>,
89    /// Typed registry: graph collections.
90    graph_colls: parking_lot::RwLock<std::collections::HashMap<String, GraphCollection>>,
91    /// Typed registry: metadata-only collections.
92    metadata_colls: parking_lot::RwLock<std::collections::HashMap<String, MetadataCollection>>,
93    /// Cached collection statistics for CBO planning.
94    collection_stats: parking_lot::RwLock<
95        std::collections::HashMap<String, crate::collection::stats::CollectionStats>,
96    >,
97    /// Optional lifecycle observer (used by velesdb-premium for RBAC, audit, multi-tenant).
98    observer: Option<std::sync::Arc<dyn DatabaseObserver>>,
99    /// Monotonic DDL schema version counter (CACHE-01).
100    ///
101    /// Incremented on every create/drop collection operation.
102    /// Used by `CompiledPlanCache` to invalidate cached query plans.
103    schema_version: std::sync::atomic::AtomicU64,
104    /// Compiled query plan cache (CACHE-02).
105    ///
106    /// Stores recently compiled `QueryPlan` instances keyed by `PlanKey`.
107    /// Default sizing: L1 = 1K hot entries, L2 = 10K LRU entries.
108    compiled_plan_cache: crate::cache::CompiledPlanCache,
109}
110
111#[cfg(feature = "persistence")]
112impl Database {
113    /// Opens or creates a database, **automatically loading all existing collections**.
114    ///
115    /// This replaces the previous `open()` + `load_collections()` two-step pattern.
116    /// The new `open()` is a strict auto-load: all `config.json` directories under
117    /// `path` are loaded on startup.
118    ///
119    /// Uses the default [`VelesConfig`](crate::config::VelesConfig) — every
120    /// subsystem behaves identically to the pre-Wave-3 version of this
121    /// function, so existing callers keep their exact behaviour. Users
122    /// that need to customise subsystem limits or WAL batching should
123    /// call [`Database::open_with_config`] instead.
124    ///
125    /// # Errors
126    ///
127    /// Returns an error if the directory cannot be created or accessed.
128    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
129        Self::open_impl(path, None, None)
130    }
131
132    /// Opens a database with an explicit [`VelesConfig`](crate::config::VelesConfig).
133    ///
134    /// Every subsystem that honours a config field (HNSW defaults, WAL
135    /// batching, runtime limits, search quality) reads from the passed
136    /// instance. A clone is stored inside the `Database` and retained
137    /// for the lifetime of the handle so sub-systems can consult it
138    /// without re-parsing a TOML file.
139    ///
140    /// # Errors
141    ///
142    /// Returns an error if the directory cannot be created, the lock
143    /// cannot be acquired, or any already-present collection exceeds
144    /// the limits declared in `config.limits` (see
145    /// [`Database::open`] for the default-limit behaviour).
146    pub fn open_with_config<P: AsRef<std::path::Path>>(
147        path: P,
148        config: crate::config::VelesConfig,
149    ) -> Result<Self> {
150        Self::open_impl(path, None, Some(config))
151    }
152
153    /// Opens a database with a [`DatabaseObserver`] (used by velesdb-premium).
154    ///
155    /// The observer receives lifecycle hooks for every collection operation,
156    /// enabling RBAC, audit logging, multi-tenant routing, etc.
157    ///
158    /// Equivalent to [`Database::open`] plus the observer injection —
159    /// applies the default [`VelesConfig`](crate::config::VelesConfig).
160    ///
161    /// # Errors
162    ///
163    /// Returns an error if the directory cannot be created or accessed.
164    pub fn open_with_observer<P: AsRef<std::path::Path>>(
165        path: P,
166        observer: std::sync::Arc<dyn DatabaseObserver>,
167    ) -> Result<Self> {
168        Self::open_impl(path, Some(observer), None)
169    }
170
171    /// Opens a database with both an explicit [`crate::VelesConfig`] and a
172    /// [`crate::DatabaseObserver`]. Used by the premium shell that layers
173    /// RBAC/audit on top of a tenant-specific config file.
174    ///
175    /// # Errors
176    ///
177    /// Same as [`Database::open_with_config`].
178    pub fn open_with_observer_and_config<P: AsRef<std::path::Path>>(
179        path: P,
180        observer: std::sync::Arc<dyn DatabaseObserver>,
181        config: crate::config::VelesConfig,
182    ) -> Result<Self> {
183        Self::open_impl(path, Some(observer), Some(config))
184    }
185
186    fn open_impl<P: AsRef<std::path::Path>>(
187        path: P,
188        observer: Option<std::sync::Arc<dyn DatabaseObserver>>,
189        config: Option<crate::config::VelesConfig>,
190    ) -> Result<Self> {
191        let data_dir = path.as_ref().to_path_buf();
192        std::fs::create_dir_all(&data_dir)?;
193
194        // Acquire exclusive file lock to prevent multi-process corruption
195        let lock_path = data_dir.join("velesdb.lock");
196        let lock_file = std::fs::File::create(&lock_path)?;
197        fs2::FileExt::try_lock_exclusive(&lock_file)
198            .map_err(|_| Error::DatabaseLocked(data_dir.display().to_string()))?;
199
200        // Log SIMD features detected at startup
201        let features = simd_dispatch::simd_features_info();
202        tracing::info!(
203            avx512 = features.avx512f,
204            avx2 = features.avx2,
205            "SIMD features detected - direct dispatch enabled"
206        );
207
208        let db = Self {
209            data_dir,
210            _lock_file: lock_file,
211            config: std::sync::Arc::new(config.unwrap_or_default()),
212            vector_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
213            graph_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
214            metadata_colls: parking_lot::RwLock::new(std::collections::HashMap::new()),
215            collection_stats: parking_lot::RwLock::new(std::collections::HashMap::new()),
216            observer,
217            schema_version: std::sync::atomic::AtomicU64::new(0),
218            compiled_plan_cache: crate::cache::CompiledPlanCache::new(1_000, 10_000),
219        };
220
221        // Auto-load all existing collections from disk (replaces manual load_collections()).
222        db.load_collections()?;
223
224        Ok(db)
225    }
226
227    /// Returns a reference to the root [`VelesConfig`](crate::config::VelesConfig)
228    /// that was supplied at construction (or the default if the database
229    /// was opened via [`Database::open`]).
230    ///
231    /// Sub-systems (`vector_ops`, `query_engine`, `stats`, …) consult this
232    /// through `database.config()` when they need to honour a user-supplied
233    /// limit or toggle — the shared `Arc` makes the call free of locks
234    /// and cheap to propagate to background threads.
235    #[must_use]
236    pub fn config(&self) -> &crate::config::VelesConfig {
237        &self.config
238    }
239
240    /// Returns a cheap, cloneable handle to the root config.
241    ///
242    /// Use this when you need to move the config into a thread or
243    /// long-lived closure that outlives the current `&self` borrow.
244    #[must_use]
245    pub fn config_arc(&self) -> std::sync::Arc<crate::config::VelesConfig> {
246        std::sync::Arc::clone(&self.config)
247    }
248
249    /// Returns the path to the data directory.
250    #[must_use]
251    pub fn data_dir(&self) -> &std::path::Path {
252        &self.data_dir
253    }
254
255    /// Returns the current DDL schema version counter.
256    #[must_use]
257    pub fn schema_version(&self) -> u64 {
258        self.schema_version
259            .load(std::sync::atomic::Ordering::Relaxed)
260    }
261
262    /// Returns a reference to the compiled query plan cache.
263    #[must_use]
264    pub fn plan_cache(&self) -> &crate::cache::CompiledPlanCache {
265        &self.compiled_plan_cache
266    }
267
268    // =========================================================================
269    // Observer notification helpers (called by server handlers after operations)
270    // =========================================================================
271
272    /// Notifies the observer that points were upserted into a collection.
273    ///
274    /// **Caller contract**: this method is NOT called automatically by
275    /// [`Database`] internals. HTTP handlers and SDK bindings are responsible
276    /// for calling it after a successful upsert, passing the number of points
277    /// written. Forgetting to call it means the observer receives no upsert
278    /// events for that operation.
279    ///
280    /// No-op when no observer is registered.
281    pub fn notify_upsert(&self, collection: &str, point_count: usize) {
282        if let Some(ref obs) = self.observer {
283            obs.on_upsert(collection, point_count);
284        }
285    }
286
287    /// Notifies the observer that a query was executed, with its duration.
288    ///
289    /// **Caller contract**: this method is NOT called automatically by
290    /// [`Database::execute_query`]. Callers must measure the wall-clock
291    /// duration themselves (e.g. `std::time::Instant::now()` before the call)
292    /// and invoke this method afterwards with the elapsed microseconds.
293    ///
294    /// No-op when no observer is registered.
295    pub fn notify_query(&self, collection: &str, duration_us: u64) {
296        if let Some(ref obs) = self.observer {
297            obs.on_query(collection, duration_us);
298        }
299    }
300}