1use sled::{Db, Tree};
7use std::collections::{HashSet, VecDeque};
8use std::path::Path;
9use std::time::Instant;
10
11use super::{EdgeType, GraphNode, GraphQuery, QueryNode, QueryResult, TraversalDirection};
12use crate::error::{CadiError, CadiResult};
13
14pub struct GraphStore {
16 db: Db,
18
19 nodes: Tree,
21
22 dependencies: Tree,
25
26 dependents: Tree,
29
30 symbols: Tree,
33
34 aliases: Tree,
37
38 content: Tree,
40}
41
42impl GraphStore {
43 pub fn open(path: impl AsRef<Path>) -> CadiResult<Self> {
45 let db = sled::open(path.as_ref()).map_err(|e| {
46 CadiError::StorageError(format!("Failed to open graph store: {}", e))
47 })?;
48
49 Ok(Self {
50 nodes: db.open_tree("nodes")?,
51 dependencies: db.open_tree("dependencies")?,
52 dependents: db.open_tree("dependents")?,
53 symbols: db.open_tree("symbols")?,
54 aliases: db.open_tree("aliases")?,
55 content: db.open_tree("content")?,
56 db,
57 })
58 }
59
60 pub fn in_memory() -> CadiResult<Self> {
62 let config = sled::Config::new().temporary(true);
63 let db = config.open().map_err(|e| {
64 CadiError::StorageError(format!("Failed to create in-memory store: {}", e))
65 })?;
66
67 Ok(Self {
68 nodes: db.open_tree("nodes")?,
69 dependencies: db.open_tree("dependencies")?,
70 dependents: db.open_tree("dependents")?,
71 symbols: db.open_tree("symbols")?,
72 aliases: db.open_tree("aliases")?,
73 content: db.open_tree("content")?,
74 db,
75 })
76 }
77
78 pub fn insert_node(&self, node: &GraphNode) -> CadiResult<()> {
84 let key = node.chunk_id.as_bytes();
85 let value = node.to_bytes()?;
86
87 self.nodes.insert(key, value)?;
88
89 for symbol in &node.symbols_defined {
91 self.symbols.insert(symbol.as_bytes(), key)?;
92 }
93
94 for alias in &node.aliases {
96 self.aliases.insert(alias.as_bytes(), key)?;
97 }
98
99 self.update_edge_indices(&node.chunk_id, &node.outgoing_edges, &node.incoming_edges)?;
101
102 Ok(())
103 }
104
105 pub fn get_node(&self, chunk_id: &str) -> CadiResult<Option<GraphNode>> {
107 match self.nodes.get(chunk_id.as_bytes())? {
108 Some(bytes) => Ok(Some(GraphNode::from_bytes(&bytes)?)),
109 None => Ok(None),
110 }
111 }
112
113 pub fn node_exists(&self, chunk_id: &str) -> CadiResult<bool> {
115 Ok(self.nodes.contains_key(chunk_id.as_bytes())?)
116 }
117
118 pub fn delete_node(&self, chunk_id: &str) -> CadiResult<bool> {
120 if let Some(node) = self.get_node(chunk_id)? {
122 for symbol in &node.symbols_defined {
124 self.symbols.remove(symbol.as_bytes())?;
125 }
126
127 for alias in &node.aliases {
129 self.aliases.remove(alias.as_bytes())?;
130 }
131
132 self.dependencies.remove(chunk_id.as_bytes())?;
134 self.dependents.remove(chunk_id.as_bytes())?;
135
136 self.content.remove(chunk_id.as_bytes())?;
138
139 self.nodes.remove(chunk_id.as_bytes())?;
141
142 Ok(true)
143 } else {
144 Ok(false)
145 }
146 }
147
148 pub fn store_content(&self, chunk_id: &str, content: &[u8]) -> CadiResult<()> {
154 self.content.insert(chunk_id.as_bytes(), content)?;
155 Ok(())
156 }
157
158 pub fn get_content(&self, chunk_id: &str) -> CadiResult<Option<Vec<u8>>> {
160 Ok(self.content.get(chunk_id.as_bytes())?.map(|v| v.to_vec()))
161 }
162
163 pub fn get_content_str(&self, chunk_id: &str) -> CadiResult<Option<String>> {
165 match self.get_content(chunk_id)? {
166 Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())),
167 None => Ok(None),
168 }
169 }
170
171 pub fn add_dependency(&self, source: &str, target: &str, edge_type: EdgeType) -> CadiResult<()> {
177 self.add_to_edge_list(&self.dependencies, source, target, edge_type)?;
179
180 self.add_to_edge_list(&self.dependents, target, source, edge_type)?;
182
183 Ok(())
184 }
185
186 pub fn get_dependencies(&self, chunk_id: &str) -> CadiResult<Vec<(EdgeType, String)>> {
188 self.get_edge_list(&self.dependencies, chunk_id)
189 }
190
191 pub fn get_dependents(&self, chunk_id: &str) -> CadiResult<Vec<(EdgeType, String)>> {
193 self.get_edge_list(&self.dependents, chunk_id)
194 }
195
196 pub fn get_dependencies_of_type(
198 &self,
199 chunk_id: &str,
200 edge_type: EdgeType,
201 ) -> CadiResult<Vec<String>> {
202 Ok(self
203 .get_dependencies(chunk_id)?
204 .into_iter()
205 .filter(|(et, _)| *et == edge_type)
206 .map(|(_, id)| id)
207 .collect())
208 }
209
210 pub fn find_symbol(&self, symbol: &str) -> CadiResult<Option<String>> {
216 match self.symbols.get(symbol.as_bytes())? {
217 Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())),
218 None => Ok(None),
219 }
220 }
221
222 pub fn resolve_alias(&self, alias: &str) -> CadiResult<Option<String>> {
224 match self.aliases.get(alias.as_bytes())? {
225 Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())),
226 None => Ok(None),
227 }
228 }
229
230 pub fn get_symbols_for_chunk(&self, chunk_id: &str) -> CadiResult<Vec<String>> {
232 if let Some(node) = self.get_node(chunk_id)? {
233 Ok(node.symbols_defined)
234 } else {
235 Ok(Vec::new())
236 }
237 }
238
239 pub fn query(&self, query: &GraphQuery) -> CadiResult<QueryResult> {
245 let start = Instant::now();
246 let mut visited = HashSet::new();
247 let mut result_nodes = Vec::new();
248 let mut queue = VecDeque::new();
249
250 for chunk_id in &query.start_nodes {
252 if query.include_start {
253 if let Some(node) = self.get_node(chunk_id)? {
254 result_nodes.push(QueryNode {
255 chunk_id: chunk_id.clone(),
256 alias: node.primary_alias,
257 depth: 0,
258 reached_via: None,
259 parent: None,
260 token_estimate: node.token_estimate,
261 });
262 }
263 }
264 visited.insert(chunk_id.clone());
265 queue.push_back((chunk_id.clone(), 0, None, None));
266 }
267
268 while let Some((current_id, depth, _reached_via, _parent)) = queue.pop_front() {
270 if depth >= query.max_depth {
271 continue;
272 }
273 if result_nodes.len() >= query.max_results {
274 break;
275 }
276
277 let edges = match query.direction {
279 TraversalDirection::Outgoing => self.get_dependencies(¤t_id)?,
280 TraversalDirection::Incoming => self.get_dependents(¤t_id)?,
281 TraversalDirection::Both => {
282 let mut all = self.get_dependencies(¤t_id)?;
283 all.extend(self.get_dependents(¤t_id)?);
284 all
285 }
286 };
287
288 for (edge_type, target_id) in edges {
289 if visited.contains(&target_id) {
290 continue;
291 }
292 if !query.should_follow_edge(edge_type) {
293 continue;
294 }
295
296 visited.insert(target_id.clone());
297
298 if let Some(node) = self.get_node(&target_id)? {
300 if let Some(ref lang) = query.language_filter {
302 if &node.language != lang {
303 continue;
304 }
305 }
306 if let Some(ref gran) = query.granularity_filter {
307 if &node.granularity != gran {
308 continue;
309 }
310 }
311
312 result_nodes.push(QueryNode {
313 chunk_id: target_id.clone(),
314 alias: node.primary_alias,
315 depth: depth + 1,
316 reached_via: Some(edge_type),
317 parent: Some(current_id.clone()),
318 token_estimate: node.token_estimate,
319 });
320
321 queue.push_back((
322 target_id,
323 depth + 1,
324 Some(edge_type),
325 Some(current_id.clone()),
326 ));
327 }
328 }
329 }
330
331 Ok(QueryResult {
332 nodes: result_nodes,
333 nodes_visited: visited.len(),
334 truncated: visited.len() > query.max_results,
335 execution_time_ms: start.elapsed().as_millis() as u64,
336 })
337 }
338
339 pub fn get_token_estimate(&self, chunk_id: &str) -> CadiResult<usize> {
341 if let Some(node) = self.get_node(chunk_id)? {
342 Ok(node.token_estimate)
343 } else {
344 Ok(0)
345 }
346 }
347
348 pub fn stats(&self) -> CadiResult<GraphStoreStats> {
354 Ok(GraphStoreStats {
355 node_count: self.nodes.len(),
356 symbol_count: self.symbols.len(),
357 alias_count: self.aliases.len(),
358 dependency_entries: self.dependencies.len(),
359 dependent_entries: self.dependents.len(),
360 content_entries: self.content.len(),
361 db_size_bytes: self.db.size_on_disk()?,
362 })
363 }
364
365 pub fn flush(&self) -> CadiResult<()> {
367 self.db.flush()?;
368 Ok(())
369 }
370
371 fn update_edge_indices(
376 &self,
377 chunk_id: &str,
378 outgoing: &[(EdgeType, String)],
379 _incoming: &[(EdgeType, String)],
380 ) -> CadiResult<()> {
381 if !outgoing.is_empty() {
383 let serialized = serde_json::to_vec(outgoing)?;
384 self.dependencies.insert(chunk_id.as_bytes(), serialized)?;
385 }
386
387 for (edge_type, target) in outgoing {
389 self.add_to_edge_list(&self.dependents, target, chunk_id, *edge_type)?;
390 }
391
392 Ok(())
393 }
394
395 fn add_to_edge_list(
396 &self,
397 tree: &Tree,
398 key: &str,
399 value: &str,
400 edge_type: EdgeType,
401 ) -> CadiResult<()> {
402 let mut edges = self.get_edge_list(tree, key)?;
403
404 if !edges.iter().any(|(et, v)| et == &edge_type && v == value) {
406 edges.push((edge_type, value.to_string()));
407 let serialized = serde_json::to_vec(&edges)?;
408 tree.insert(key.as_bytes(), serialized)?;
409 }
410
411 Ok(())
412 }
413
414 fn get_edge_list(&self, tree: &Tree, key: &str) -> CadiResult<Vec<(EdgeType, String)>> {
415 match tree.get(key.as_bytes())? {
416 Some(bytes) => Ok(serde_json::from_slice(&bytes)?),
417 None => Ok(Vec::new()),
418 }
419 }
420}
421
422#[derive(Debug, Clone)]
424pub struct GraphStoreStats {
425 pub node_count: usize,
426 pub symbol_count: usize,
427 pub alias_count: usize,
428 pub dependency_entries: usize,
429 pub dependent_entries: usize,
430 pub content_entries: usize,
431 pub db_size_bytes: u64,
432}
433
434impl std::fmt::Display for GraphStoreStats {
435 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
436 write!(
437 f,
438 "GraphStore: {} nodes, {} symbols, {} aliases, {} deps, {} size",
439 self.node_count,
440 self.symbol_count,
441 self.alias_count,
442 self.dependency_entries,
443 human_bytes(self.db_size_bytes)
444 )
445 }
446}
447
448fn human_bytes(bytes: u64) -> String {
449 const KB: u64 = 1024;
450 const MB: u64 = KB * 1024;
451 const GB: u64 = MB * 1024;
452
453 if bytes >= GB {
454 format!("{:.2} GB", bytes as f64 / GB as f64)
455 } else if bytes >= MB {
456 format!("{:.2} MB", bytes as f64 / MB as f64)
457 } else if bytes >= KB {
458 format!("{:.2} KB", bytes as f64 / KB as f64)
459 } else {
460 format!("{} bytes", bytes)
461 }
462}
463
464impl From<sled::Error> for CadiError {
466 fn from(e: sled::Error) -> Self {
467 CadiError::StorageError(e.to_string())
468 }
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474
475 #[test]
476 fn test_store_creation() {
477 let store = GraphStore::in_memory().unwrap();
478 let stats = store.stats().unwrap();
479 assert_eq!(stats.node_count, 0);
480 }
481
482 #[test]
483 fn test_node_crud() {
484 let store = GraphStore::in_memory().unwrap();
485
486 let node = GraphNode::new("chunk:sha256:abc123", "abc123")
487 .with_alias("test/node")
488 .with_language("rust")
489 .with_size(1000)
490 .with_defines(vec!["my_function".to_string()]);
491
492 store.insert_node(&node).unwrap();
494 assert!(store.node_exists("chunk:sha256:abc123").unwrap());
495
496 let retrieved = store.get_node("chunk:sha256:abc123").unwrap().unwrap();
498 assert_eq!(retrieved.chunk_id, "chunk:sha256:abc123");
499 assert_eq!(retrieved.primary_alias, Some("test/node".to_string()));
500
501 let found = store.find_symbol("my_function").unwrap();
503 assert_eq!(found, Some("chunk:sha256:abc123".to_string()));
504
505 let found = store.resolve_alias("test/node").unwrap();
507 assert_eq!(found, Some("chunk:sha256:abc123".to_string()));
508
509 assert!(store.delete_node("chunk:sha256:abc123").unwrap());
511 assert!(!store.node_exists("chunk:sha256:abc123").unwrap());
512 }
513
514 #[test]
515 fn test_content_storage() {
516 let store = GraphStore::in_memory().unwrap();
517
518 let content = b"fn hello() { println!(\"Hello\"); }";
519 store.store_content("chunk:abc", content).unwrap();
520
521 let retrieved = store.get_content("chunk:abc").unwrap().unwrap();
522 assert_eq!(retrieved, content);
523
524 let as_str = store.get_content_str("chunk:abc").unwrap().unwrap();
525 assert!(as_str.contains("hello"));
526 }
527
528 #[test]
529 fn test_dependency_edges() {
530 let store = GraphStore::in_memory().unwrap();
531
532 let node_a = GraphNode::new("chunk:a", "a").with_defines(vec!["A".to_string()]);
534 let node_b = GraphNode::new("chunk:b", "b").with_defines(vec!["B".to_string()]);
535 let node_c = GraphNode::new("chunk:c", "c").with_defines(vec!["C".to_string()]);
536
537 store.insert_node(&node_a).unwrap();
538 store.insert_node(&node_b).unwrap();
539 store.insert_node(&node_c).unwrap();
540
541 store.add_dependency("chunk:a", "chunk:b", EdgeType::Imports).unwrap();
544 store.add_dependency("chunk:a", "chunk:c", EdgeType::TypeRef).unwrap();
545
546 let deps = store.get_dependencies("chunk:a").unwrap();
548 assert_eq!(deps.len(), 2);
549
550 let deps = store.get_dependents("chunk:b").unwrap();
552 assert_eq!(deps.len(), 1);
553 assert_eq!(deps[0].1, "chunk:a");
554
555 let import_deps = store.get_dependencies_of_type("chunk:a", EdgeType::Imports).unwrap();
557 assert_eq!(import_deps.len(), 1);
558 assert_eq!(import_deps[0], "chunk:b");
559 }
560
561 #[test]
562 fn test_graph_query() {
563 let store = GraphStore::in_memory().unwrap();
564
565 for (id, alias) in [("a", "root"), ("b", "mid"), ("c", "leaf")] {
567 let node = GraphNode::new(format!("chunk:{}", id), id)
568 .with_alias(alias)
569 .with_size(100);
570 store.insert_node(&node).unwrap();
571 }
572
573 store.add_dependency("chunk:a", "chunk:b", EdgeType::Imports).unwrap();
574 store.add_dependency("chunk:b", "chunk:c", EdgeType::Imports).unwrap();
575
576 let query = GraphQuery::dependencies("chunk:a").with_depth(2);
578 let result = store.query(&query).unwrap();
579
580 assert_eq!(result.nodes.len(), 3); assert!(result.nodes.iter().any(|n| n.chunk_id == "chunk:a"));
582 assert!(result.nodes.iter().any(|n| n.chunk_id == "chunk:b"));
583 assert!(result.nodes.iter().any(|n| n.chunk_id == "chunk:c"));
584 }
585}