Skip to main content

sqlitegraph/
introspection.rs

1//! Graph introspection APIs for debugging and observability.
2//!
3//! This module provides structured introspection capabilities for SQLiteGraph,
4//! enabling developers and AI agents to inspect internal graph state for debugging
5//! and observability purposes.
6//!
7//! # What is GraphIntrospection?
8//!
9//! [`GraphIntrospection`] provides a JSON-serializable snapshot of graph state,
10//! designed for both human debugging and LLM consumption. It exposes internal
11//! metrics that are otherwise difficult to access:
12//!
13//! - **Backend type**: SQLite vs Native backend
14//! - **Node/edge counts**: Graph size metrics
15//! - **Cache statistics**: Hit ratios and entry counts
16//! - **File sizes**: Database and WAL file sizes
17//! - **Memory usage**: In-memory vs file-based detection
18//!
19//! # Key Types
20//!
21//! - [`GraphIntrospection`] - Comprehensive introspection snapshot
22//! - [`EdgeCount`] - Edge count with exact/estimate/unavailable states
23//! - [`IntrospectError`] - Introspection-specific errors
24//!
25//! # Usage for Debugging
26//!
27//! ## Basic Introspection
28//!
29//! ```rust,ignore
30//! use sqlitegraph::{open_graph, GraphConfig};
31//!
32//! let graph = open_graph("my_graph.db", &GraphConfig::sqlite())?;
33//! let intro = graph.introspect()?;
34//!
35//! println!("Backend: {}", intro.backend_type);
36//! println!("Nodes: {}", intro.node_count);
37//! println!("Edges: {:?}", intro.edge_count);
38//! println!("Cache hit ratio: {:.2}%", intro.cache_stats.hit_ratio().unwrap_or(0.0));
39//! ```
40//!
41//! ## Cache Performance Analysis
42//!
43//! ```rust,ignore
44//! let intro = graph.introspect()?;
45//!
46//! match intro.cache_stats.hit_ratio() {
47//!     Some(ratio) if ratio < 50.0 => {
48//!         println!("Warning: Low cache hit ratio ({:.1}%)", ratio);
49//!         println!("Consider adjusting cache size or workload");
50//!     }
51//!     Some(ratio) => {
52//!         println!("Good cache performance: {:.1}% hit ratio", ratio);
53//!     }
54//!     None => {
55//!         println!("No cache activity yet");
56//!     }
57//! }
58//! ```
59//!
60//! # Edge Count Strategy
61//!
62//! The [`EdgeCount`] enum provides **adaptive edge counting** based on graph size:
63//!
64//! ## Exact Count (< 10K edges)
65//!
66//! For small to medium graphs, edges are counted exactly:
67//!
68//! ```rust,ignore
69//! match intro.edge_count {
70//!     EdgeCount::Exact(count) => {
71//!         println!("Graph has {} edges", count);
72//!     }
73//!     _ => {}
74//! }
75//! ```
76//!
77//! ## Sampled Estimate (≥ 10K edges)
78//!
79//! For large graphs, edges are estimated via sampling to avoid expensive scans:
80//!
81//! ```rust,ignore
82//! match intro.edge_count {
83//!     EdgeCount::Estimate { count, min, max, sample_size } => {
84//!         println!("Estimated {} edges (95% CI: {}-{})", count, min, max);
85//!         println!("Based on {} node sample", sample_size);
86//!     }
87//!     _ => {}
88//! }
89//! ```
90//!
91//! ### Estimation Algorithm
92//!
93//! - **Sample size**: 1000 nodes (or all nodes if smaller)
94//! - **Confidence interval**: 95% via binomial proportion
95//! - **Accuracy**: Typically ±5% for uniform degree distributions
96//! - **Cost**: O(sample_size) vs O(V) for exact count
97//!
98//! ## Unavailable (Backend-Specific)
99//!
100//! Some backends may not support edge counting:
101//!
102//! ```rust,ignore
103//! match intro.edge_count {
104//!     EdgeCount::Unavailable => {
105//!         println!("Edge counting not available for this backend");
106//!     }
107//!     _ => {}
108//! }
109//! ```
110//!
111//! # File Size Detection
112//!
113//! Introspection provides **file size metrics** for file-based databases:
114//!
115//! ## Database File Size
116//!
117//! ```rust,ignore
118//! if let Some(size) = intro.file_size {
119//!     println!("Database file: {} MB", size / 1_048_576);
120//! } else {
121//!     println!("In-memory database (no file)");
122//! }
123//! ```
124//!
125//! ## WAL File Size
126//!
127//! ```rust,ignore
128//! if let Some(wal_size) = intro.wal_size {
129//!     println!("WAL file: {} MB", wal_size / 1_048_576);
130//!     if wal_size > 100_000_000 {
131//!         println!("Warning: Large WAL - consider checkpoint");
132//!     }
133//! }
134//! ```
135//!
136//! # JSON Serialization for LLMs
137//!
138//! The introspection data structure is fully JSON-serializable for LLM consumption:
139//!
140//! ```rust,ignore
141//! use serde_json;
142//!
143//! let intro = graph.introspect()?;
144//! let json = serde_json::to_string_pretty(&intro)?;
145//!
146//! // Pass to LLM for analysis
147//! let analysis = llm.analyze(&json)?;
148//! ```
149//!
150//! Example JSON output:
151//!
152//! ```json
153//! {
154//!   "backend_type": "sqlite",
155//!   "node_count": 10000,
156//!   "edge_count": {
157//!     "Estimate": {
158//!       "count": 45000,
159//!       "min": 44000,
160//!       "max": 46000,
161//!       "sample_size": 1000
162//!     }
163//!   },
164//!   "cache_stats": {
165//!     "hits": 85000,
166//!     "misses": 15000,
167//!     "entries": 5000
168//!   },
169//!   "file_size": 10485760,
170//!   "wal_size": 524288,
171//!   "is_in_memory": false
172//! }
173//! ```
174//!
175//! # Performance Considerations
176//!
177//! - **Introspection cost**: O(sample_size) for edge estimation, O(1) for other metrics
178//! - **Cache stats**: Aggregated from atomic counters (no locking)
179//! - **File sizes**: Cached `stat()` calls (negligible overhead)
180//! - **Safe for production**: Minimal performance impact
181
182use serde::Serialize;
183use std::path::Path;
184
185use crate::cache::CacheStats;
186use crate::errors::SqliteGraphError;
187
188/// Comprehensive introspection data for a graph instance.
189///
190/// This struct provides a JSON-serializable snapshot of graph state,
191/// designed for both human debugging and LLM consumption.
192///
193/// # Example
194///
195/// ```rust,ignore
196/// use sqlitegraph::{open_graph, GraphConfig};
197///
198/// let graph = open_graph("my_graph.db", &GraphConfig::sqlite())?;
199/// let intro = graph.introspect()?;
200///
201/// println!("Backend: {}", intro.backend_type);
202/// println!("Nodes: {}", intro.node_count);
203/// println!("Cache hit ratio: {:.2}%", intro.cache_stats.hit_ratio());
204///
205/// // Serialize to JSON for LLM consumption
206/// let json = serde_json::to_string_pretty(&intro)?;
207/// ```
208#[derive(Debug, Clone, Serialize)]
209pub struct GraphIntrospection {
210    /// Backend type identifier ("sqlite" or "native-v2")
211    pub backend_type: String,
212
213    /// Total number of nodes in the graph
214    pub node_count: usize,
215
216    /// Total number of edges in the graph (estimated for large graphs)
217    pub edge_count: EdgeCount,
218
219    /// Adjacency cache statistics
220    pub cache_stats: CacheStats,
221
222    /// Memory usage estimate in bytes (if available)
223    pub memory_usage: Option<usize>,
224
225    /// Database file size in bytes (for file-based backends)
226    pub file_size: Option<u64>,
227
228    /// WAL file size in bytes (for backends with WAL enabled)
229    pub wal_size: Option<u64>,
230
231    /// Whether this is an in-memory database
232    pub is_in_memory: bool,
233}
234
235/// Edge count representation.
236///
237/// Provides either an exact count or an estimate for large graphs
238/// where counting would be prohibitively expensive.
239#[derive(Debug, Clone, Serialize)]
240pub enum EdgeCount {
241    /// Exact edge count (for small to medium graphs)
242    Exact(usize),
243
244    /// Estimated edge count with confidence interval
245    Estimate {
246        /// Estimated count
247        count: usize,
248        /// Lower bound of confidence interval
249        min: usize,
250        /// Upper bound of confidence interval
251        max: usize,
252        /// Sample size used for estimation
253        sample_size: usize,
254    },
255
256    /// Edge counting not available for this backend
257    Unavailable,
258}
259
260impl EdgeCount {
261    /// Get the primary count value (exact or estimated).
262    pub fn value(&self) -> Option<usize> {
263        match self {
264            EdgeCount::Exact(count) => Some(*count),
265            EdgeCount::Estimate { count, .. } => Some(*count),
266            EdgeCount::Unavailable => None,
267        }
268    }
269}
270
271impl CacheStats {
272    /// Calculate cache hit ratio as a percentage.
273    ///
274    /// Returns None if there have been no cache accesses.
275    pub fn hit_ratio(&self) -> Option<f64> {
276        let total = self.hits + self.misses;
277        if total == 0 {
278            None
279        } else {
280            Some((self.hits as f64 / total as f64) * 100.0)
281        }
282    }
283}
284
285/// Introspection-specific errors.
286#[derive(Debug, Clone, thiserror::Error)]
287pub enum IntrospectError {
288    /// Failed to query node count
289    #[error("Failed to query node count: {0}")]
290    NodeCountError(String),
291
292    /// Failed to query edge count
293    #[error("Failed to query edge count: {0}")]
294    EdgeCountError(String),
295
296    /// Failed to query file size
297    #[error("Failed to query file size: {0}")]
298    FileSizeError(String),
299
300    /// Backend not supported for introspection
301    #[error("Backend not supported: {0}")]
302    UnsupportedBackend(String),
303}
304
305impl From<IntrospectError> for SqliteGraphError {
306    fn from(err: IntrospectError) -> Self {
307        SqliteGraphError::invalid_input(err.to_string())
308    }
309}
310
311/// Get file size for a database path.
312pub fn get_file_size<P: AsRef<Path>>(path: P) -> Option<u64> {
313    std::fs::metadata(path.as_ref()).ok().map(|m| m.len())
314}
315
316/// Get WAL file size for a database path.
317pub fn get_wal_size<P: AsRef<Path>>(path: P) -> Option<u64> {
318    let wal_path = path.as_ref().with_extension("wal");
319    std::fs::metadata(wal_path).ok().map(|m| m.len())
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    #[test]
327    fn test_cache_hit_ratio() {
328        let stats = CacheStats {
329            hits: 80,
330            misses: 20,
331            entries: 100,
332        };
333
334        assert_eq!(stats.hit_ratio(), Some(80.0));
335    }
336
337    #[test]
338    fn test_cache_hit_ratio_no_accesses() {
339        let stats = CacheStats {
340            hits: 0,
341            misses: 0,
342            entries: 0,
343        };
344
345        assert_eq!(stats.hit_ratio(), None);
346    }
347
348    #[test]
349    fn test_edge_count_exact() {
350        let count = EdgeCount::Exact(1000);
351        assert_eq!(count.value(), Some(1000));
352    }
353
354    #[test]
355    fn test_edge_count_estimate() {
356        let count = EdgeCount::Estimate {
357            count: 1000,
358            min: 900,
359            max: 1100,
360            sample_size: 100,
361        };
362
363        assert_eq!(count.value(), Some(1000));
364    }
365
366    #[test]
367    fn test_edge_count_unavailable() {
368        let count = EdgeCount::Unavailable;
369        assert_eq!(count.value(), None);
370    }
371}