sqlitegraph/introspection.rs
1//! Graph introspection APIs for debugging and observability.
2//!
3//! This module provides structured introspection capabilities for SQLiteGraph,
4//! enabling developers and AI agents to inspect internal graph state for debugging
5//! and observability purposes.
6//!
7//! # What is GraphIntrospection?
8//!
9//! [`GraphIntrospection`] provides a JSON-serializable snapshot of graph state,
10//! designed for both human debugging and LLM consumption. It exposes internal
11//! metrics that are otherwise difficult to access:
12//!
13//! - **Backend type**: SQLite vs Native backend
14//! - **Node/edge counts**: Graph size metrics
15//! - **Cache statistics**: Hit ratios and entry counts
16//! - **File sizes**: Database and WAL file sizes
17//! - **Memory usage**: In-memory vs file-based detection
18//!
19//! # Key Types
20//!
21//! - [`GraphIntrospection`] - Comprehensive introspection snapshot
22//! - [`EdgeCount`] - Edge count with exact/estimate/unavailable states
23//! - [`IntrospectError`] - Introspection-specific errors
24//!
25//! # Usage for Debugging
26//!
27//! ## Basic Introspection
28//!
29//! ```rust,ignore
30//! use sqlitegraph::{open_graph, GraphConfig};
31//!
32//! let graph = open_graph("my_graph.db", &GraphConfig::sqlite())?;
33//! let intro = graph.introspect()?;
34//!
35//! println!("Backend: {}", intro.backend_type);
36//! println!("Nodes: {}", intro.node_count);
37//! println!("Edges: {:?}", intro.edge_count);
38//! println!("Cache hit ratio: {:.2}%", intro.cache_stats.hit_ratio().unwrap_or(0.0));
39//! ```
40//!
41//! ## Cache Performance Analysis
42//!
43//! ```rust,ignore
44//! let intro = graph.introspect()?;
45//!
46//! match intro.cache_stats.hit_ratio() {
47//! Some(ratio) if ratio < 50.0 => {
48//! println!("Warning: Low cache hit ratio ({:.1}%)", ratio);
49//! println!("Consider adjusting cache size or workload");
50//! }
51//! Some(ratio) => {
52//! println!("Good cache performance: {:.1}% hit ratio", ratio);
53//! }
54//! None => {
55//! println!("No cache activity yet");
56//! }
57//! }
58//! ```
59//!
60//! # Edge Count Strategy
61//!
62//! The [`EdgeCount`] enum provides **adaptive edge counting** based on graph size:
63//!
64//! ## Exact Count (< 10K edges)
65//!
66//! For small to medium graphs, edges are counted exactly:
67//!
68//! ```rust,ignore
69//! match intro.edge_count {
70//! EdgeCount::Exact(count) => {
71//! println!("Graph has {} edges", count);
72//! }
73//! _ => {}
74//! }
75//! ```
76//!
77//! ## Sampled Estimate (≥ 10K edges)
78//!
79//! For large graphs, edges are estimated via sampling to avoid expensive scans:
80//!
81//! ```rust,ignore
82//! match intro.edge_count {
83//! EdgeCount::Estimate { count, min, max, sample_size } => {
84//! println!("Estimated {} edges (95% CI: {}-{})", count, min, max);
85//! println!("Based on {} node sample", sample_size);
86//! }
87//! _ => {}
88//! }
89//! ```
90//!
91//! ### Estimation Algorithm
92//!
93//! - **Sample size**: 1000 nodes (or all nodes if smaller)
94//! - **Confidence interval**: 95% via binomial proportion
95//! - **Accuracy**: Typically ±5% for uniform degree distributions
96//! - **Cost**: O(sample_size) vs O(V) for exact count
97//!
98//! ## Unavailable (Backend-Specific)
99//!
100//! Some backends may not support edge counting:
101//!
102//! ```rust,ignore
103//! match intro.edge_count {
104//! EdgeCount::Unavailable => {
105//! println!("Edge counting not available for this backend");
106//! }
107//! _ => {}
108//! }
109//! ```
110//!
111//! # File Size Detection
112//!
113//! Introspection provides **file size metrics** for file-based databases:
114//!
115//! ## Database File Size
116//!
117//! ```rust,ignore
118//! if let Some(size) = intro.file_size {
119//! println!("Database file: {} MB", size / 1_048_576);
120//! } else {
121//! println!("In-memory database (no file)");
122//! }
123//! ```
124//!
125//! ## WAL File Size
126//!
127//! ```rust,ignore
128//! if let Some(wal_size) = intro.wal_size {
129//! println!("WAL file: {} MB", wal_size / 1_048_576);
130//! if wal_size > 100_000_000 {
131//! println!("Warning: Large WAL - consider checkpoint");
132//! }
133//! }
134//! ```
135//!
136//! # JSON Serialization for LLMs
137//!
138//! The introspection data structure is fully JSON-serializable for LLM consumption:
139//!
140//! ```rust,ignore
141//! use serde_json;
142//!
143//! let intro = graph.introspect()?;
144//! let json = serde_json::to_string_pretty(&intro)?;
145//!
146//! // Pass to LLM for analysis
147//! let analysis = llm.analyze(&json)?;
148//! ```
149//!
150//! Example JSON output:
151//!
152//! ```json
153//! {
154//! "backend_type": "sqlite",
155//! "node_count": 10000,
156//! "edge_count": {
157//! "Estimate": {
158//! "count": 45000,
159//! "min": 44000,
160//! "max": 46000,
161//! "sample_size": 1000
162//! }
163//! },
164//! "cache_stats": {
165//! "hits": 85000,
166//! "misses": 15000,
167//! "entries": 5000
168//! },
169//! "file_size": 10485760,
170//! "wal_size": 524288,
171//! "is_in_memory": false
172//! }
173//! ```
174//!
175//! # Performance Considerations
176//!
177//! - **Introspection cost**: O(sample_size) for edge estimation, O(1) for other metrics
178//! - **Cache stats**: Aggregated from atomic counters (no locking)
179//! - **File sizes**: Cached `stat()` calls (negligible overhead)
180//! - **Safe for production**: Minimal performance impact
181
182use serde::Serialize;
183use std::path::Path;
184
185use crate::cache::CacheStats;
186use crate::errors::SqliteGraphError;
187
188/// Comprehensive introspection data for a graph instance.
189///
190/// This struct provides a JSON-serializable snapshot of graph state,
191/// designed for both human debugging and LLM consumption.
192///
193/// # Example
194///
195/// ```rust,ignore
196/// use sqlitegraph::{open_graph, GraphConfig};
197///
198/// let graph = open_graph("my_graph.db", &GraphConfig::sqlite())?;
199/// let intro = graph.introspect()?;
200///
201/// println!("Backend: {}", intro.backend_type);
202/// println!("Nodes: {}", intro.node_count);
203/// println!("Cache hit ratio: {:.2}%", intro.cache_stats.hit_ratio());
204///
205/// // Serialize to JSON for LLM consumption
206/// let json = serde_json::to_string_pretty(&intro)?;
207/// ```
208#[derive(Debug, Clone, Serialize)]
209pub struct GraphIntrospection {
210 /// Backend type identifier ("sqlite" or "native-v2")
211 pub backend_type: String,
212
213 /// Total number of nodes in the graph
214 pub node_count: usize,
215
216 /// Total number of edges in the graph (estimated for large graphs)
217 pub edge_count: EdgeCount,
218
219 /// Adjacency cache statistics
220 pub cache_stats: CacheStats,
221
222 /// Memory usage estimate in bytes (if available)
223 pub memory_usage: Option<usize>,
224
225 /// Database file size in bytes (for file-based backends)
226 pub file_size: Option<u64>,
227
228 /// WAL file size in bytes (for backends with WAL enabled)
229 pub wal_size: Option<u64>,
230
231 /// Whether this is an in-memory database
232 pub is_in_memory: bool,
233}
234
235/// Edge count representation.
236///
237/// Provides either an exact count or an estimate for large graphs
238/// where counting would be prohibitively expensive.
239#[derive(Debug, Clone, Serialize)]
240pub enum EdgeCount {
241 /// Exact edge count (for small to medium graphs)
242 Exact(usize),
243
244 /// Estimated edge count with confidence interval
245 Estimate {
246 /// Estimated count
247 count: usize,
248 /// Lower bound of confidence interval
249 min: usize,
250 /// Upper bound of confidence interval
251 max: usize,
252 /// Sample size used for estimation
253 sample_size: usize,
254 },
255
256 /// Edge counting not available for this backend
257 Unavailable,
258}
259
260impl EdgeCount {
261 /// Get the primary count value (exact or estimated).
262 pub fn value(&self) -> Option<usize> {
263 match self {
264 EdgeCount::Exact(count) => Some(*count),
265 EdgeCount::Estimate { count, .. } => Some(*count),
266 EdgeCount::Unavailable => None,
267 }
268 }
269}
270
271impl CacheStats {
272 /// Calculate cache hit ratio as a percentage.
273 ///
274 /// Returns None if there have been no cache accesses.
275 pub fn hit_ratio(&self) -> Option<f64> {
276 let total = self.hits + self.misses;
277 if total == 0 {
278 None
279 } else {
280 Some((self.hits as f64 / total as f64) * 100.0)
281 }
282 }
283}
284
285/// Introspection-specific errors.
286#[derive(Debug, Clone, thiserror::Error)]
287pub enum IntrospectError {
288 /// Failed to query node count
289 #[error("Failed to query node count: {0}")]
290 NodeCountError(String),
291
292 /// Failed to query edge count
293 #[error("Failed to query edge count: {0}")]
294 EdgeCountError(String),
295
296 /// Failed to query file size
297 #[error("Failed to query file size: {0}")]
298 FileSizeError(String),
299
300 /// Backend not supported for introspection
301 #[error("Backend not supported: {0}")]
302 UnsupportedBackend(String),
303}
304
305impl From<IntrospectError> for SqliteGraphError {
306 fn from(err: IntrospectError) -> Self {
307 SqliteGraphError::invalid_input(err.to_string())
308 }
309}
310
311/// Get file size for a database path.
312pub fn get_file_size<P: AsRef<Path>>(path: P) -> Option<u64> {
313 std::fs::metadata(path.as_ref())
314 .ok()
315 .map(|m| m.len())
316}
317
318/// Get WAL file size for a database path.
319pub fn get_wal_size<P: AsRef<Path>>(path: P) -> Option<u64> {
320 let wal_path = path.as_ref().with_extension("wal");
321 std::fs::metadata(wal_path)
322 .ok()
323 .map(|m| m.len())
324}
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329
330 #[test]
331 fn test_cache_hit_ratio() {
332 let stats = CacheStats {
333 hits: 80,
334 misses: 20,
335 entries: 100,
336 };
337
338 assert_eq!(stats.hit_ratio(), Some(80.0));
339 }
340
341 #[test]
342 fn test_cache_hit_ratio_no_accesses() {
343 let stats = CacheStats {
344 hits: 0,
345 misses: 0,
346 entries: 0,
347 };
348
349 assert_eq!(stats.hit_ratio(), None);
350 }
351
352 #[test]
353 fn test_edge_count_exact() {
354 let count = EdgeCount::Exact(1000);
355 assert_eq!(count.value(), Some(1000));
356 }
357
358 #[test]
359 fn test_edge_count_estimate() {
360 let count = EdgeCount::Estimate {
361 count: 1000,
362 min: 900,
363 max: 1100,
364 sample_size: 100,
365 };
366
367 assert_eq!(count.value(), Some(1000));
368 }
369
370 #[test]
371 fn test_edge_count_unavailable() {
372 let count = EdgeCount::Unavailable;
373 assert_eq!(count.value(), None);
374 }
375}