cadi_core/graph/
store.rs

1//! Sled-backed Graph Store
2//!
3//! The core persistent storage for the CADI dependency graph.
4//! Uses sled for embedded, ACID-compliant storage with O(1) lookups.
5
6use sled::{Db, Tree};
7use std::collections::{HashSet, VecDeque};
8use std::path::Path;
9use std::time::Instant;
10
11use super::{EdgeType, GraphNode, GraphQuery, QueryNode, QueryResult, TraversalDirection};
12use crate::error::{CadiError, CadiResult};
13
14/// The main graph store
15pub struct GraphStore {
16    /// The underlying sled database
17    db: Db,
18
19    /// Chunk metadata: chunk_id -> GraphNode (serialized)
20    nodes: Tree,
21
22    /// Forward edges: chunk_id -> [dependency chunk_ids]
23    /// (things this chunk depends on)
24    dependencies: Tree,
25
26    /// Reverse edges: chunk_id -> [dependent chunk_ids]  
27    /// (things that depend on this chunk)
28    dependents: Tree,
29
30    /// Symbol index: symbol_name -> chunk_id
31    /// For fast "who defines this symbol?" lookups
32    symbols: Tree,
33
34    /// Alias index: alias -> chunk_id
35    /// For human-readable lookups
36    aliases: Tree,
37
38    /// Chunk content storage: chunk_id -> content bytes
39    content: Tree,
40}
41
42impl GraphStore {
43    /// Open or create a graph store at the given path
44    pub fn open(path: impl AsRef<Path>) -> CadiResult<Self> {
45        let db = sled::open(path.as_ref()).map_err(|e| {
46            CadiError::StorageError(format!("Failed to open graph store: {}", e))
47        })?;
48
49        Ok(Self {
50            nodes: db.open_tree("nodes")?,
51            dependencies: db.open_tree("dependencies")?,
52            dependents: db.open_tree("dependents")?,
53            symbols: db.open_tree("symbols")?,
54            aliases: db.open_tree("aliases")?,
55            content: db.open_tree("content")?,
56            db,
57        })
58    }
59
60    /// Create an in-memory graph store (for testing)
61    pub fn in_memory() -> CadiResult<Self> {
62        let config = sled::Config::new().temporary(true);
63        let db = config.open().map_err(|e| {
64            CadiError::StorageError(format!("Failed to create in-memory store: {}", e))
65        })?;
66
67        Ok(Self {
68            nodes: db.open_tree("nodes")?,
69            dependencies: db.open_tree("dependencies")?,
70            dependents: db.open_tree("dependents")?,
71            symbols: db.open_tree("symbols")?,
72            aliases: db.open_tree("aliases")?,
73            content: db.open_tree("content")?,
74            db,
75        })
76    }
77
78    // ========================================================================
79    // Node Operations
80    // ========================================================================
81
82    /// Insert or update a graph node
83    pub fn insert_node(&self, node: &GraphNode) -> CadiResult<()> {
84        let key = node.chunk_id.as_bytes();
85        let value = node.to_bytes()?;
86        
87        self.nodes.insert(key, value)?;
88
89        // Index symbols
90        for symbol in &node.symbols_defined {
91            self.symbols.insert(symbol.as_bytes(), key)?;
92        }
93
94        // Index aliases
95        for alias in &node.aliases {
96            self.aliases.insert(alias.as_bytes(), key)?;
97        }
98
99        // Update edge indices
100        self.update_edge_indices(&node.chunk_id, &node.outgoing_edges, &node.incoming_edges)?;
101
102        Ok(())
103    }
104
105    /// Get a node by chunk ID
106    pub fn get_node(&self, chunk_id: &str) -> CadiResult<Option<GraphNode>> {
107        match self.nodes.get(chunk_id.as_bytes())? {
108            Some(bytes) => Ok(Some(GraphNode::from_bytes(&bytes)?)),
109            None => Ok(None),
110        }
111    }
112
113    /// Check if a node exists
114    pub fn node_exists(&self, chunk_id: &str) -> CadiResult<bool> {
115        Ok(self.nodes.contains_key(chunk_id.as_bytes())?)
116    }
117
118    /// Delete a node and its edges
119    pub fn delete_node(&self, chunk_id: &str) -> CadiResult<bool> {
120        // Get the node first to clean up indices
121        if let Some(node) = self.get_node(chunk_id)? {
122            // Remove from symbol index
123            for symbol in &node.symbols_defined {
124                self.symbols.remove(symbol.as_bytes())?;
125            }
126
127            // Remove from alias index
128            for alias in &node.aliases {
129                self.aliases.remove(alias.as_bytes())?;
130            }
131
132            // Remove edges
133            self.dependencies.remove(chunk_id.as_bytes())?;
134            self.dependents.remove(chunk_id.as_bytes())?;
135
136            // Remove content
137            self.content.remove(chunk_id.as_bytes())?;
138
139            // Remove node
140            self.nodes.remove(chunk_id.as_bytes())?;
141
142            Ok(true)
143        } else {
144            Ok(false)
145        }
146    }
147
148    // ========================================================================
149    // Content Operations
150    // ========================================================================
151
152    /// Store chunk content
153    pub fn store_content(&self, chunk_id: &str, content: &[u8]) -> CadiResult<()> {
154        self.content.insert(chunk_id.as_bytes(), content)?;
155        Ok(())
156    }
157
158    /// Get chunk content
159    pub fn get_content(&self, chunk_id: &str) -> CadiResult<Option<Vec<u8>>> {
160        Ok(self.content.get(chunk_id.as_bytes())?.map(|v| v.to_vec()))
161    }
162
163    /// Get content as string
164    pub fn get_content_str(&self, chunk_id: &str) -> CadiResult<Option<String>> {
165        match self.get_content(chunk_id)? {
166            Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())),
167            None => Ok(None),
168        }
169    }
170
171    // ========================================================================
172    // Edge Operations
173    // ========================================================================
174
175    /// Add a dependency edge: source depends on target
176    pub fn add_dependency(&self, source: &str, target: &str, edge_type: EdgeType) -> CadiResult<()> {
177        // Add to forward index (source -> dependencies)
178        self.add_to_edge_list(&self.dependencies, source, target, edge_type)?;
179        
180        // Add to reverse index (target -> dependents)
181        self.add_to_edge_list(&self.dependents, target, source, edge_type)?;
182
183        Ok(())
184    }
185
186    /// Get all dependencies of a chunk (things it needs)
187    pub fn get_dependencies(&self, chunk_id: &str) -> CadiResult<Vec<(EdgeType, String)>> {
188        self.get_edge_list(&self.dependencies, chunk_id)
189    }
190
191    /// Get all dependents of a chunk (things that need it)
192    pub fn get_dependents(&self, chunk_id: &str) -> CadiResult<Vec<(EdgeType, String)>> {
193        self.get_edge_list(&self.dependents, chunk_id)
194    }
195
196    /// Get dependencies filtered by edge type
197    pub fn get_dependencies_of_type(
198        &self,
199        chunk_id: &str,
200        edge_type: EdgeType,
201    ) -> CadiResult<Vec<String>> {
202        Ok(self
203            .get_dependencies(chunk_id)?
204            .into_iter()
205            .filter(|(et, _)| *et == edge_type)
206            .map(|(_, id)| id)
207            .collect())
208    }
209
210    // ========================================================================
211    // Symbol & Alias Lookups
212    // ========================================================================
213
214    /// Find the chunk that defines a symbol
215    pub fn find_symbol(&self, symbol: &str) -> CadiResult<Option<String>> {
216        match self.symbols.get(symbol.as_bytes())? {
217            Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())),
218            None => Ok(None),
219        }
220    }
221
222    /// Resolve an alias to chunk ID
223    pub fn resolve_alias(&self, alias: &str) -> CadiResult<Option<String>> {
224        match self.aliases.get(alias.as_bytes())? {
225            Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())),
226            None => Ok(None),
227        }
228    }
229
230    /// Get all symbols defined by a chunk
231    pub fn get_symbols_for_chunk(&self, chunk_id: &str) -> CadiResult<Vec<String>> {
232        if let Some(node) = self.get_node(chunk_id)? {
233            Ok(node.symbols_defined)
234        } else {
235            Ok(Vec::new())
236        }
237    }
238
239    // ========================================================================
240    // Graph Queries
241    // ========================================================================
242
243    /// Execute a graph query
244    pub fn query(&self, query: &GraphQuery) -> CadiResult<QueryResult> {
245        let start = Instant::now();
246        let mut visited = HashSet::new();
247        let mut result_nodes = Vec::new();
248        let mut queue = VecDeque::new();
249
250        // Initialize queue with start nodes
251        for chunk_id in &query.start_nodes {
252            if query.include_start {
253                if let Some(node) = self.get_node(chunk_id)? {
254                    result_nodes.push(QueryNode {
255                        chunk_id: chunk_id.clone(),
256                        alias: node.primary_alias,
257                        depth: 0,
258                        reached_via: None,
259                        parent: None,
260                        token_estimate: node.token_estimate,
261                    });
262                }
263            }
264            visited.insert(chunk_id.clone());
265            queue.push_back((chunk_id.clone(), 0, None, None));
266        }
267
268        // BFS traversal
269        while let Some((current_id, depth, _reached_via, _parent)) = queue.pop_front() {
270            if depth >= query.max_depth {
271                continue;
272            }
273            if result_nodes.len() >= query.max_results {
274                break;
275            }
276
277            // Get edges based on direction
278            let edges = match query.direction {
279                TraversalDirection::Outgoing => self.get_dependencies(&current_id)?,
280                TraversalDirection::Incoming => self.get_dependents(&current_id)?,
281                TraversalDirection::Both => {
282                    let mut all = self.get_dependencies(&current_id)?;
283                    all.extend(self.get_dependents(&current_id)?);
284                    all
285                }
286            };
287
288            for (edge_type, target_id) in edges {
289                if visited.contains(&target_id) {
290                    continue;
291                }
292                if !query.should_follow_edge(edge_type) {
293                    continue;
294                }
295
296                visited.insert(target_id.clone());
297
298                // Get node details
299                if let Some(node) = self.get_node(&target_id)? {
300                    // Apply filters
301                    if let Some(ref lang) = query.language_filter {
302                        if &node.language != lang {
303                            continue;
304                        }
305                    }
306                    if let Some(ref gran) = query.granularity_filter {
307                        if &node.granularity != gran {
308                            continue;
309                        }
310                    }
311
312                    result_nodes.push(QueryNode {
313                        chunk_id: target_id.clone(),
314                        alias: node.primary_alias,
315                        depth: depth + 1,
316                        reached_via: Some(edge_type),
317                        parent: Some(current_id.clone()),
318                        token_estimate: node.token_estimate,
319                    });
320
321                    queue.push_back((
322                        target_id,
323                        depth + 1,
324                        Some(edge_type),
325                        Some(current_id.clone()),
326                    ));
327                }
328            }
329        }
330
331        Ok(QueryResult {
332            nodes: result_nodes,
333            nodes_visited: visited.len(),
334            truncated: visited.len() > query.max_results,
335            execution_time_ms: start.elapsed().as_millis() as u64,
336        })
337    }
338
339    /// Get token estimate for a chunk
340    pub fn get_token_estimate(&self, chunk_id: &str) -> CadiResult<usize> {
341        if let Some(node) = self.get_node(chunk_id)? {
342            Ok(node.token_estimate)
343        } else {
344            Ok(0)
345        }
346    }
347
348    // ========================================================================
349    // Statistics
350    // ========================================================================
351
352    /// Get store statistics
353    pub fn stats(&self) -> CadiResult<GraphStoreStats> {
354        Ok(GraphStoreStats {
355            node_count: self.nodes.len(),
356            symbol_count: self.symbols.len(),
357            alias_count: self.aliases.len(),
358            dependency_entries: self.dependencies.len(),
359            dependent_entries: self.dependents.len(),
360            content_entries: self.content.len(),
361            db_size_bytes: self.db.size_on_disk()?,
362        })
363    }
364
365    /// Flush all pending writes to disk
366    pub fn flush(&self) -> CadiResult<()> {
367        self.db.flush()?;
368        Ok(())
369    }
370
371    // ========================================================================
372    // Private Helpers
373    // ========================================================================
374
375    fn update_edge_indices(
376        &self,
377        chunk_id: &str,
378        outgoing: &[(EdgeType, String)],
379        _incoming: &[(EdgeType, String)],
380    ) -> CadiResult<()> {
381        // Store outgoing edges
382        if !outgoing.is_empty() {
383            let serialized = serde_json::to_vec(outgoing)?;
384            self.dependencies.insert(chunk_id.as_bytes(), serialized)?;
385        }
386
387        // Update reverse indices for each outgoing edge
388        for (edge_type, target) in outgoing {
389            self.add_to_edge_list(&self.dependents, target, chunk_id, *edge_type)?;
390        }
391
392        Ok(())
393    }
394
395    fn add_to_edge_list(
396        &self,
397        tree: &Tree,
398        key: &str,
399        value: &str,
400        edge_type: EdgeType,
401    ) -> CadiResult<()> {
402        let mut edges = self.get_edge_list(tree, key)?;
403        
404        // Avoid duplicates
405        if !edges.iter().any(|(et, v)| et == &edge_type && v == value) {
406            edges.push((edge_type, value.to_string()));
407            let serialized = serde_json::to_vec(&edges)?;
408            tree.insert(key.as_bytes(), serialized)?;
409        }
410        
411        Ok(())
412    }
413
414    fn get_edge_list(&self, tree: &Tree, key: &str) -> CadiResult<Vec<(EdgeType, String)>> {
415        match tree.get(key.as_bytes())? {
416            Some(bytes) => Ok(serde_json::from_slice(&bytes)?),
417            None => Ok(Vec::new()),
418        }
419    }
420}
421
422/// Statistics about the graph store
423#[derive(Debug, Clone)]
424pub struct GraphStoreStats {
425    pub node_count: usize,
426    pub symbol_count: usize,
427    pub alias_count: usize,
428    pub dependency_entries: usize,
429    pub dependent_entries: usize,
430    pub content_entries: usize,
431    pub db_size_bytes: u64,
432}
433
434impl std::fmt::Display for GraphStoreStats {
435    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
436        write!(
437            f,
438            "GraphStore: {} nodes, {} symbols, {} aliases, {} deps, {} size",
439            self.node_count,
440            self.symbol_count,
441            self.alias_count,
442            self.dependency_entries,
443            human_bytes(self.db_size_bytes)
444        )
445    }
446}
447
448fn human_bytes(bytes: u64) -> String {
449    const KB: u64 = 1024;
450    const MB: u64 = KB * 1024;
451    const GB: u64 = MB * 1024;
452
453    if bytes >= GB {
454        format!("{:.2} GB", bytes as f64 / GB as f64)
455    } else if bytes >= MB {
456        format!("{:.2} MB", bytes as f64 / MB as f64)
457    } else if bytes >= KB {
458        format!("{:.2} KB", bytes as f64 / KB as f64)
459    } else {
460        format!("{} bytes", bytes)
461    }
462}
463
464// Implement From for sled errors
465impl From<sled::Error> for CadiError {
466    fn from(e: sled::Error) -> Self {
467        CadiError::StorageError(e.to_string())
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    #[test]
476    fn test_store_creation() {
477        let store = GraphStore::in_memory().unwrap();
478        let stats = store.stats().unwrap();
479        assert_eq!(stats.node_count, 0);
480    }
481
482    #[test]
483    fn test_node_crud() {
484        let store = GraphStore::in_memory().unwrap();
485
486        let node = GraphNode::new("chunk:sha256:abc123", "abc123")
487            .with_alias("test/node")
488            .with_language("rust")
489            .with_size(1000)
490            .with_defines(vec!["my_function".to_string()]);
491
492        // Insert
493        store.insert_node(&node).unwrap();
494        assert!(store.node_exists("chunk:sha256:abc123").unwrap());
495
496        // Get
497        let retrieved = store.get_node("chunk:sha256:abc123").unwrap().unwrap();
498        assert_eq!(retrieved.chunk_id, "chunk:sha256:abc123");
499        assert_eq!(retrieved.primary_alias, Some("test/node".to_string()));
500
501        // Symbol lookup
502        let found = store.find_symbol("my_function").unwrap();
503        assert_eq!(found, Some("chunk:sha256:abc123".to_string()));
504
505        // Alias lookup
506        let found = store.resolve_alias("test/node").unwrap();
507        assert_eq!(found, Some("chunk:sha256:abc123".to_string()));
508
509        // Delete
510        assert!(store.delete_node("chunk:sha256:abc123").unwrap());
511        assert!(!store.node_exists("chunk:sha256:abc123").unwrap());
512    }
513
514    #[test]
515    fn test_content_storage() {
516        let store = GraphStore::in_memory().unwrap();
517
518        let content = b"fn hello() { println!(\"Hello\"); }";
519        store.store_content("chunk:abc", content).unwrap();
520
521        let retrieved = store.get_content("chunk:abc").unwrap().unwrap();
522        assert_eq!(retrieved, content);
523
524        let as_str = store.get_content_str("chunk:abc").unwrap().unwrap();
525        assert!(as_str.contains("hello"));
526    }
527
528    #[test]
529    fn test_dependency_edges() {
530        let store = GraphStore::in_memory().unwrap();
531
532        // Create nodes
533        let node_a = GraphNode::new("chunk:a", "a").with_defines(vec!["A".to_string()]);
534        let node_b = GraphNode::new("chunk:b", "b").with_defines(vec!["B".to_string()]);
535        let node_c = GraphNode::new("chunk:c", "c").with_defines(vec!["C".to_string()]);
536
537        store.insert_node(&node_a).unwrap();
538        store.insert_node(&node_b).unwrap();
539        store.insert_node(&node_c).unwrap();
540
541        // A depends on B (imports)
542        // A depends on C (type ref)
543        store.add_dependency("chunk:a", "chunk:b", EdgeType::Imports).unwrap();
544        store.add_dependency("chunk:a", "chunk:c", EdgeType::TypeRef).unwrap();
545
546        // Check dependencies of A
547        let deps = store.get_dependencies("chunk:a").unwrap();
548        assert_eq!(deps.len(), 2);
549
550        // Check dependents of B
551        let deps = store.get_dependents("chunk:b").unwrap();
552        assert_eq!(deps.len(), 1);
553        assert_eq!(deps[0].1, "chunk:a");
554
555        // Filter by type
556        let import_deps = store.get_dependencies_of_type("chunk:a", EdgeType::Imports).unwrap();
557        assert_eq!(import_deps.len(), 1);
558        assert_eq!(import_deps[0], "chunk:b");
559    }
560
561    #[test]
562    fn test_graph_query() {
563        let store = GraphStore::in_memory().unwrap();
564
565        // Create a dependency chain: A -> B -> C
566        for (id, alias) in [("a", "root"), ("b", "mid"), ("c", "leaf")] {
567            let node = GraphNode::new(format!("chunk:{}", id), id)
568                .with_alias(alias)
569                .with_size(100);
570            store.insert_node(&node).unwrap();
571        }
572
573        store.add_dependency("chunk:a", "chunk:b", EdgeType::Imports).unwrap();
574        store.add_dependency("chunk:b", "chunk:c", EdgeType::Imports).unwrap();
575
576        // Query dependencies of A with depth 2
577        let query = GraphQuery::dependencies("chunk:a").with_depth(2);
578        let result = store.query(&query).unwrap();
579
580        assert_eq!(result.nodes.len(), 3); // A, B, C
581        assert!(result.nodes.iter().any(|n| n.chunk_id == "chunk:a"));
582        assert!(result.nodes.iter().any(|n| n.chunk_id == "chunk:b"));
583        assert!(result.nodes.iter().any(|n| n.chunk_id == "chunk:c"));
584    }
585}