sqlitegraph/lib.rs
1//! SQLite-based graph database with unified backend support.
2//!
3//! `sqlitegraph` provides a lightweight, deterministic graph database for embedded Rust applications.
4//! It supports both SQLite and Native storage backends through a unified API.
5//!
6//! # Architecture
7//!
8//! The crate is organized into focused modules:
9//!
10//! ```text
11//! sqlitegraph/
12//! ├── graph/ # Core graph database (SqliteGraph, GraphEntity, GraphEdge)
13//! ├── backend/ # Unified backend trait (GraphBackend, SqliteGraphBackend, NativeGraphBackend)
14//! ├── algo/ # Graph algorithms (PageRank, Betweenness, Louvain, Label Propagation)
15//! ├── hnsw/ # Vector similarity search (HNSW index, distance metrics)
16//! ├── cache/ # LRU-K adjacency cache for traversal optimization
17//! ├── introspection/ # Debugging and observability APIs
18//! ├── progress/ # Progress tracking for long-running operations
19//! ├── mvcc/ # MVCC-lite snapshot system
20//! ├── pattern_engine/ # Triple pattern matching
21//! ├── query/ # High-level query interface
22//! └── recovery/ # Backup and restore utilities
23//! ```
24//!
25//! # Features
26//!
27//! - **Dual Backend Support**: Choose between SQLite (feature-rich) and Native (performance-optimized) backends
28//! - **Entity and Edge Storage**: Rich metadata support with JSON serialization
29//! - **Pattern Matching**: Efficient triple pattern matching with cache-enabled fast-path
30//! - **Traversal Algorithms**: Built-in BFS, k-hop, and shortest path algorithms
31//! - **Graph Algorithms**: PageRank, Betweenness Centrality, Louvain, Label Propagation
32//! - **Vector Search**: HNSW approximate nearest neighbor search with persistence
33//! - **MVCC Snapshots**: Read isolation with snapshot consistency
34//! - **Bulk Operations**: High-performance batch insertions for large datasets
35//! - **Introspection**: Debugging APIs for cache stats, file sizes, edge counts
36//! - **Progress Tracking**: Callback-based progress for long-running algorithms
37//!
38//! # Quick Start
39//!
40//! ```rust,ignore
41//! use sqlitegraph::{open_graph, GraphConfig, BackendKind};
42//!
43//! // Use SQLite backend (default)
44//! let cfg = GraphConfig::sqlite();
45//! let graph = open_graph("my_graph.db", &cfg)?;
46//!
47//! // Or use Native backend
48//! let cfg = GraphConfig::native();
49//! let graph = open_graph("my_graph.db", &cfg)?;
50//!
51//! // Both backends support the same operations
52//! let node_id = graph.insert_node(/* node spec */)?;
53//! let neighbor_ids = graph.neighbors(node_id, /* query */)?;
54//! ```
55//!
56//! # Backend Selection
57//!
58//! ## Feature Matrix
59//!
60//! | Feature | SQLite Backend | Native Backend |
61//! |---------|----------------|----------------|
62//! | **ACID Transactions** | ✅ Full | ✅ WAL-based |
63//! | **Graph Algorithms** | ✅ Full support | ✅ Full support |
64//! | **HNSW Vector Search** | ✅ With persistence | ✅ In-memory |
65//! | **MVCC Snapshots** | ✅ | ✅ |
66//! | **Pattern Matching** | ✅ | ✅ |
67//! | **Raw SQL Access** | ✅ Native | ❌ Not supported |
68//! | **File Format** | SQLite DB | Custom binary |
69//! | **Startup Time** | Fast | Faster |
70//! | **Dependencies** | libsqlite3 | None (pure Rust) |
71//! | **Write Performance** | Good | Better |
72//! | **Query Performance** | Good | Better |
73//!
74//! ## When to Use SQLite Backend
75//!
76//! Choose SQLite backend when:
77//! - **ACID guarantees** are critical for your application
78//! - **Raw SQL access** needed for complex queries or joins
79//! - **Database compatibility** with SQLite tools (sqlite3, DB Browser)
80//! - **Mature ecosystem** with third-party tooling
81//! - **HNSW persistence** required (vectors survive restarts)
82//!
83//! ## When to Use Native Backend
84//!
85//! Choose Native backend when:
86//! - **Performance is critical** (faster reads/writes)
87//! - **No external dependencies** desired (pure Rust)
88//! - **Fast startup** with large datasets
89//! - **Custom binary format** acceptable
90//! - **HNSW in-memory only** (vectors persist in separate file)
91//!
92//! # Thread Safety
93//!
94//! ## SqliteGraph is NOT Thread-Safe
95//!
96//! `SqliteGraph` uses interior mutability (`RefCell`) and is **not `Sync`**:
97//!
98//! ```rust,ignore
99//! use sqlitegraph::SqliteGraph;
100//! use std::thread;
101//!
102//! let graph = SqliteGraph::open("test.db")?;
103//!
104//! // ❌ WRONG: Sharing graph across threads for writes
105//! let graph_clone = graph;
106//! thread::spawn(move || {
107//! graph_clone.insert_node(...)?; // DATA RACE!
108//! });
109//!
110//! // ✅ CORRECT: Use snapshots for concurrent reads
111//! let snapshot = graph.snapshot()?;
112//! thread::spawn(move || {
113//! let neighbors = snapshot.neighbors(node_id)?; // Thread-safe
114//! });
115//! ```
116//!
117//! ## Concurrent Read Access
118//!
119//! Use [`GraphSnapshot`] for thread-safe concurrent reads:
120//!
121//! ```rust,ignore
122//! use sqlitegraph::{GraphSnapshot, SqliteGraph};
123//!
124//! let graph = SqliteGraph::open("my_graph.db")?;
125//!
126//! // Create multiple snapshots for concurrent reads
127//! let snapshot1 = graph.snapshot()?;
128//! let snapshot2 = graph.snapshot()?;
129//!
130//! // Both snapshots can be used concurrently (thread-safe)
131//! let handle1 = std::thread::spawn(move || {
132//! snapshot1.neighbors(node_id)
133//! });
134//!
135//! let handle2 = std::thread::spawn(move || {
136//! snapshot2.neighbors(node_id)
137//! });
138//! ```
139//!
140//! ## Write Serialization
141//!
142//! All writes must be serialized:
143//!
144//! ```rust,ignore
145//! // ✅ CORRECT: Single thread for all writes
146//! let graph = SqliteGraph::open("my_graph.db")?;
147//! for i in 0..1000 {
148//! graph.insert_node(...)?;
149//! graph.insert_edge(...)?;
150//! }
151//!
152//! // ❌ WRONG: Concurrent writes
153//! let graph = Arc::new(Mutex::new(graph));
154//! let handle1 = thread::spawn(|| {
155//! let g = graph.lock().unwrap();
156//! g.insert_node(...)
157//! });
158//! let handle2 = thread::spawn(|| {
159//! let g = graph.lock().unwrap();
160//! g.insert_node(...)
161//! });
162//! // Even with Mutex, this can cause issues due to RefCell
163//! ```
164//!
165//! # Error Handling
166//!
167//! All operations return [`Result<T, SqliteGraphError>`]:
168//!
169//! ```rust,ignore
170//! use sqlitegraph::{SqliteGraph, SqliteGraphError};
171//!
172//! let graph = SqliteGraph::open("my_graph.db")?;
173//!
174//! match graph.insert_node(node_spec) {
175//! Ok(node_id) => println!("Created node {}", node_id),
176//! Err(SqliteGraphError::EntityNotFound) => {
177//! println!("Node not found");
178//! }
179//! Err(SqliteGraphError::DatabaseError(e)) => {
180//! eprintln!("Database error: {}", e);
181//! }
182//! Err(e) => {
183//! eprintln!("Other error: {}", e);
184//! }
185//! }
186//! ```
187//!
188//! # Performance Comparison
189//!
190//! ## Read Performance
191//! - **SQLite Backend**: 10-100μs per neighbor lookup (cached: ~100ns)
192//! - **Native Backend**: 1-10μs per neighbor lookup (cached: ~100ns)
193//! - **Cache hit ratio**: 80-95% for traversal workloads
194//!
195//! ## Write Performance
196//! - **SQLite Backend**: 100-500μs per insert (transaction-batched)
197//! - **Native Backend**: 10-100μs per insert (transaction-batched)
198//! - **Bulk insert**: 10-100x faster with `bulk_insert_entities()`
199//!
200//! ## Memory Usage
201//! - **Base overhead**: O(V + E) for graph storage
202//! - **Cache overhead**: 10-20% additional memory
203//! - **HNSW index**: 2-3x vector data size
204//!
205//! # Public API Organization
206//!
207//! This crate exports a clean, stable public API organized as follows:
208//!
209//! ## Core Types
210//! - [`GraphEntity`] - Graph node/vertex representation
211//! - [`GraphEdge`] - Graph edge/relationship representation
212//! - [`GraphBackend`] - Unified trait for backend implementations
213//! - [`SqliteGraphBackend`] - SQLite backend implementation
214//! - [`NativeGraphBackend`] - Native backend implementation
215//!
216//! ## Configuration
217//! - [`BackendKind`] - Runtime backend selection enum
218//! - [`GraphConfig`] - Unified configuration for both backends
219//! - [`SqliteConfig`] - SQLite-specific options
220//! - [`NativeConfig`] - Native-specific options
221//! - [`open_graph()`] - Unified factory function
222//!
223//! ## Operations
224//! - [`insert_node()`], [`insert_edge()`] - Single entity/edge insertion
225//! - [`bulk_insert_entities()`], [`bulk_insert_edges()`] - Batch operations
226//! - [`neighbors()`] - Direct neighbor queries
227//! - [`bfs()`], [`k_hop()`], [`shortest_path()`] - Graph traversal algorithms
228//! - [`pattern_engine`] - Pattern matching and triple storage
229//!
230//! ## Graph Algorithms
231//! - [`pagerank`] - PageRank centrality
232//! - [`betweenness_centrality`] - Betweenness centrality
233//! - [`louvain_communities`] - Louvain community detection
234//! - [`label_propagation`] - Label propagation algorithm
235//!
236//! ## Vector Search
237//! - [`hnsw::HnswIndex`] - HNSW vector search index
238//! - [`hnsw::HnswConfig`] - HNSW configuration
239//! - [`hnsw::DistanceMetric`] - Distance metrics (Cosine, Euclidean, etc.)
240//!
241//! ## Utilities
242//! - [`SqliteGraphError`] - Comprehensive error handling
243//! - [`GraphSnapshot`] - MVCC snapshot system
244//! - [`GraphIntrospection`] - Introspection and debugging APIs
245//! - [`ProgressCallback`] - Algorithm progress tracking
246//! - [`recovery`] - Database backup and restore utilities
247
248// Core public modules
249pub mod backend;
250pub mod config;
251pub mod debug;
252pub mod errors;
253pub mod graph;
254pub mod introspection;
255pub mod snapshot;
256
257// Re-export core utilities that are stable public APIs
258pub use api_ergonomics::{Label, NodeId, PropertyKey, PropertyValue};
259pub use graph_opt::{
260 GraphEdgeCreate, GraphEntityCreate, bulk_insert_edges, bulk_insert_entities, cache_stats,
261};
262pub use index::{add_label, add_property};
263pub use mvcc::{GraphSnapshot, SnapshotState};
264pub use pattern_engine::{PatternTriple, TripleMatch, match_triples};
265pub use pattern_engine_cache::match_triples_fast;
266pub use query::GraphQuery;
267pub use recovery::{dump_graph_to_path, load_graph_from_path, load_graph_from_reader};
268pub use snapshot::SnapshotId;
269
270// Re-export backend implementations
271pub use backend::{BackendDirection, ChainStep, GraphBackend};
272pub use backend::{BackupResult, EdgeSpec, NativeGraphBackend, NeighborQuery, NodeSpec, SqliteGraphBackend};
273
274// Re-export backup API for convenience
275#[cfg(feature = "native-v2")]
276pub use backend::native::v2::backup::{BackupConfig, create_backup as database_backup};
277
278// Re-export restore API for convenience
279#[cfg(feature = "native-v2")]
280pub use backend::native::v2::restore::{RestoreConfig, RestoreResult, restore_backup as database_restore};
281
282// Re-export WAL functionality for native backend
283#[cfg(feature = "native-v2")]
284pub use backend::native::v2::wal::{
285 V2WALConfig, V2WALManager,
286 IsolationLevel, WALManagerMetrics,
287};
288
289// Re-export WAL integration for advanced usage
290#[cfg(feature = "native-v2")]
291pub use backend::native::v2::wal::{
292 V2GraphWALIntegrator, GraphWALIntegrationConfig,
293 GraphOperationResult, OperationMetrics,
294};
295
296// Re-export configuration and factory
297pub use config::{BackendKind, GraphConfig, NativeConfig, SqliteConfig, open_graph};
298
299// Re-export error types
300pub use errors::SqliteGraphError;
301
302// Re-export graph core types
303pub use graph::{GraphEdge, GraphEntity, SqliteGraph};
304
305// Re-export graph algorithms
306pub use algo::{
307 betweenness_centrality, label_propagation, louvain_communities, pagerank,
308 betweenness_centrality_with_progress, louvain_communities_with_progress, pagerank_with_progress,
309};
310
311// Re-export progress tracking
312pub use progress::{ConsoleProgress, NoProgress, ProgressCallback, ProgressState};
313
314// Re-export introspection API
315pub use introspection::{GraphIntrospection, EdgeCount, IntrospectError};
316
317// Internal modules - not part of public API
318pub mod algo; // Public for tests
319pub mod progress; // Public for tests and progress API usage
320mod api_ergonomics;
321pub mod backend_selector;
322pub mod bfs; // Public for tests
323pub mod cache; // Public for tests
324mod client; // Public for binary
325mod fault_injection; // Public for tests
326pub mod graph_opt; // Public for tests
327pub mod index; // Public for tests
328pub mod multi_hop; // Public for tests
329mod pattern_engine_cache; // Already moved to core above
330pub mod query_cache; // Public for internal use and tests
331mod reasoning; // Public for binary
332pub mod schema; // Public for tests // Public for tests
333
334// Core public modules (these were accidentally removed)
335pub mod mvcc; // Already exported above
336pub mod pattern_engine; // Already exported above
337pub mod query; // Already exported above
338pub mod recovery; // Already exported above
339
340// Modules that need to remain public for specific use cases
341pub mod bench_gates; // Public for tests
342pub mod bench_meta; // Public for tests
343pub mod bench_regression; // Public for tests
344pub mod bench_utils; // Public for tests
345pub mod dsl; // Public for examples
346pub mod hnsw;
347pub mod pattern; // Public for binary // HNSW vector search capabilities
348
349// Dependency monitoring module (feature-gated)
350#[cfg(feature = "dependency-monitoring")]
351pub mod dependency_monitor;
352
353// Re-export cache statistics for benchmarking
354pub use cache::CacheStats;
355
356/// Create a backup of a SQLiteGraph Native V2 database
357///
358/// This is a convenience function for creating database backups.
359/// For more control over backup options, use `database_backup` with `BackupConfig`.
360///
361/// # Arguments
362/// * `db_path` - Path to the database file
363/// * `backup_dir` - Directory where backup will be stored
364///
365/// # Example
366/// ```no_run
367/// use sqlitegraph;
368/// use std::path::Path;
369///
370/// let result = sqlitegraph::create_backup(
371/// Path::new("mydb.v2"),
372/// Path::new("backups")
373/// );
374/// # Ok::<(), sqlitegraph::SqliteGraphError>(())
375/// ```
376#[cfg(feature = "native-v2")]
377pub fn create_backup(
378 db_path: &std::path::Path,
379 backup_dir: &std::path::Path,
380) -> Result<BackupResult, SqliteGraphError> {
381 let native_result = database_backup(db_path, crate::backend::native::v2::backup::BackupConfig::new(backup_dir))
382 .map_err(|e| SqliteGraphError::connection(e.to_string()))?;
383 Ok(BackupResult {
384 snapshot_path: native_result.snapshot_path,
385 manifest_path: native_result.manifest_path,
386 size_bytes: native_result.size_bytes,
387 checksum: native_result.checksum,
388 record_count: native_result.record_count,
389 duration_secs: native_result.duration_secs,
390 timestamp: native_result.timestamp,
391 checkpoint_performed: native_result.checkpoint_performed,
392 })
393}
394
395/// Restore a SQLiteGraph Native V2 database from a backup
396///
397/// This is a convenience function for restoring database backups.
398/// For more control over restore options, use `database_restore` with `RestoreConfig`.
399///
400/// # Arguments
401/// * `backup_dir` - Directory containing backup files (snapshot + manifest)
402/// * `target_path` - Path where database will be restored
403/// * `overwrite` - Whether to overwrite existing file (default: false)
404///
405/// # Example
406/// ```no_run
407/// use sqlitegraph;
408/// use std::path::Path;
409///
410/// let result = sqlitegraph::restore_from_backup(
411/// Path::new("backups/backup_123456"),
412/// Path::new("restored.v2"),
413/// true // overwrite if exists
414/// );
415/// # Ok::<(), sqlitegraph::SqliteGraphError>(())
416/// ```
417#[cfg(feature = "native-v2")]
418pub fn restore_from_backup(
419 backup_dir: &std::path::Path,
420 target_path: &std::path::Path,
421 overwrite: bool,
422) -> Result<RestoreResult, SqliteGraphError> {
423 let config = RestoreConfig::new(backup_dir, target_path)
424 .with_overwrite(overwrite);
425 database_restore(config)
426 .map_err(|e| SqliteGraphError::connection(e.to_string()))
427}