1use std::collections::{HashMap, HashSet, BTreeSet};
15use serde::{Deserialize, Serialize};
16use indexmap::IndexMap;
17use dashmap::DashMap;
18use parking_lot::RwLock;
19use scribe_core::{Result, error::ScribeError};
20
21pub type NodeId = String;
23
24pub type EdgeWeight = f64;
26
27#[derive(Debug, Clone)]
29pub struct DependencyGraph {
30 forward_edges: IndexMap<NodeId, BTreeSet<NodeId>>,
32
33 reverse_edges: IndexMap<NodeId, BTreeSet<NodeId>>,
35
36 nodes: BTreeSet<NodeId>,
38
39 node_cache: HashMap<NodeId, NodeMetadata>,
41
42 stats_cache: Option<GraphStatistics>,
44}
45
46#[derive(Debug, Clone, PartialEq)]
48pub struct NodeMetadata {
49 pub file_path: String,
51 pub language: Option<String>,
53 pub is_entrypoint: bool,
55 pub is_test: bool,
57 pub size_bytes: u64,
59}
60
61impl NodeMetadata {
62 pub fn new(file_path: String) -> Self {
64 let language = detect_language_from_extension(&file_path);
65 let is_entrypoint = is_entrypoint_file(&file_path);
66 let is_test = is_test_file(&file_path);
67
68 Self {
69 file_path,
70 language,
71 is_entrypoint,
72 is_test,
73 size_bytes: 0,
74 }
75 }
76
77 pub fn with_size(mut self, size_bytes: u64) -> Self {
79 self.size_bytes = size_bytes;
80 self
81 }
82}
83
84impl DependencyGraph {
86 pub fn new() -> Self {
88 Self {
89 forward_edges: IndexMap::new(),
90 reverse_edges: IndexMap::new(),
91 nodes: BTreeSet::new(),
92 node_cache: HashMap::new(),
93 stats_cache: None,
94 }
95 }
96
97 pub fn with_capacity(capacity: usize) -> Self {
99 Self {
100 forward_edges: IndexMap::with_capacity(capacity),
101 reverse_edges: IndexMap::with_capacity(capacity),
102 nodes: BTreeSet::new(),
103 node_cache: HashMap::with_capacity(capacity),
104 stats_cache: None,
105 }
106 }
107
108 pub fn add_node(&mut self, node_id: NodeId) -> Result<()> {
110 self.nodes.insert(node_id.clone());
111
112 self.forward_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
114 self.reverse_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
115
116 if !self.node_cache.contains_key(&node_id) {
118 self.node_cache.insert(node_id.clone(), NodeMetadata::new(node_id));
119 }
120
121 self.stats_cache = None;
123
124 Ok(())
125 }
126
127 pub fn add_node_with_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<()> {
129 self.add_node(node_id.clone())?;
130 self.node_cache.insert(node_id, metadata);
131 Ok(())
132 }
133
134 pub fn add_edge(&mut self, from_node: NodeId, to_node: NodeId) -> Result<()> {
136 self.add_node(from_node.clone())?;
138 self.add_node(to_node.clone())?;
139
140 if let Some(forward_set) = self.forward_edges.get_mut(&from_node) {
142 forward_set.insert(to_node.clone());
143 }
144
145 if let Some(reverse_set) = self.reverse_edges.get_mut(&to_node) {
147 reverse_set.insert(from_node);
148 }
149
150 self.stats_cache = None;
152
153 Ok(())
154 }
155
156 pub fn add_edges(&mut self, edges: &[(NodeId, NodeId)]) -> Result<()> {
158 for (from_node, to_node) in edges {
159 self.add_edge(from_node.clone(), to_node.clone())?;
160 }
161 Ok(())
162 }
163
164 pub fn remove_node(&mut self, node_id: &NodeId) -> Result<bool> {
166 if !self.nodes.contains(node_id) {
167 return Ok(false);
168 }
169
170 self.nodes.remove(node_id);
172
173 if let Some(outgoing) = self.forward_edges.shift_remove(node_id) {
175 for target in &outgoing {
176 if let Some(reverse_set) = self.reverse_edges.get_mut(target) {
177 reverse_set.remove(node_id);
178 }
179 }
180 }
181
182 if let Some(incoming) = self.reverse_edges.shift_remove(node_id) {
184 for source in &incoming {
185 if let Some(forward_set) = self.forward_edges.get_mut(source) {
186 forward_set.remove(node_id);
187 }
188 }
189 }
190
191 self.node_cache.remove(node_id);
193
194 self.stats_cache = None;
196
197 Ok(true)
198 }
199
200 pub fn remove_edge(&mut self, from_node: &NodeId, to_node: &NodeId) -> Result<bool> {
202 let forward_removed = if let Some(forward_set) = self.forward_edges.get_mut(from_node) {
203 forward_set.remove(to_node)
204 } else {
205 false
206 };
207
208 let reverse_removed = if let Some(reverse_set) = self.reverse_edges.get_mut(to_node) {
209 reverse_set.remove(from_node)
210 } else {
211 false
212 };
213
214 if forward_removed || reverse_removed {
215 self.stats_cache = None;
216 }
217
218 Ok(forward_removed || reverse_removed)
219 }
220
221 pub fn contains_node(&self, node_id: &NodeId) -> bool {
223 self.nodes.contains(node_id)
224 }
225
226 pub fn contains_edge(&self, from_node: &NodeId, to_node: &NodeId) -> bool {
228 self.forward_edges
229 .get(from_node)
230 .map_or(false, |edges| edges.contains(to_node))
231 }
232
233 pub fn node_count(&self) -> usize {
235 self.nodes.len()
236 }
237
238 pub fn edge_count(&self) -> usize {
240 self.forward_edges.values().map(|edges| edges.len()).sum()
241 }
242
243 pub fn nodes(&self) -> impl Iterator<Item = &NodeId> {
245 self.nodes.iter()
246 }
247
248 pub fn edges(&self) -> impl Iterator<Item = (&NodeId, &NodeId)> {
250 self.forward_edges.iter()
251 .flat_map(|(from, targets)| targets.iter().map(move |to| (from, to)))
252 }
253}
254
255impl DependencyGraph {
257 pub fn in_degree(&self, node_id: &NodeId) -> usize {
259 self.reverse_edges.get(node_id).map_or(0, |edges| edges.len())
260 }
261
262 pub fn out_degree(&self, node_id: &NodeId) -> usize {
264 self.forward_edges.get(node_id).map_or(0, |edges| edges.len())
265 }
266
267 pub fn degree(&self, node_id: &NodeId) -> usize {
269 self.in_degree(node_id) + self.out_degree(node_id)
270 }
271
272 pub fn outgoing_neighbors(&self, node_id: &NodeId) -> Option<&BTreeSet<NodeId>> {
274 self.forward_edges.get(node_id)
275 }
276
277 pub fn incoming_neighbors(&self, node_id: &NodeId) -> Option<&BTreeSet<NodeId>> {
279 self.reverse_edges.get(node_id)
280 }
281
282 pub fn all_neighbors(&self, node_id: &NodeId) -> HashSet<&NodeId> {
284 let mut neighbors = HashSet::new();
285
286 if let Some(outgoing) = self.forward_edges.get(node_id) {
287 neighbors.extend(outgoing.iter());
288 }
289
290 if let Some(incoming) = self.reverse_edges.get(node_id) {
291 neighbors.extend(incoming.iter());
292 }
293
294 neighbors
295 }
296
297 pub fn get_degree_info(&self, node_id: &NodeId) -> Option<DegreeInfo> {
299 if !self.contains_node(node_id) {
300 return None;
301 }
302
303 Some(DegreeInfo {
304 node_id: node_id.clone(),
305 in_degree: self.in_degree(node_id),
306 out_degree: self.out_degree(node_id),
307 total_degree: self.degree(node_id),
308 })
309 }
310}
311
312impl DependencyGraph {
314 pub fn node_metadata(&self, node_id: &NodeId) -> Option<&NodeMetadata> {
316 self.node_cache.get(node_id)
317 }
318
319 pub fn set_node_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<()> {
321 if !self.contains_node(&node_id) {
322 return Err(ScribeError::invalid_operation(
323 format!("Node {} does not exist in graph", node_id),
324 "set_node_metadata"
325 ));
326 }
327
328 self.node_cache.insert(node_id, metadata);
329 Ok(())
330 }
331
332 pub fn entrypoint_nodes(&self) -> Vec<&NodeId> {
334 self.node_cache.iter()
335 .filter_map(|(id, meta)| if meta.is_entrypoint { Some(id) } else { None })
336 .collect()
337 }
338
339 pub fn test_nodes(&self) -> Vec<&NodeId> {
341 self.node_cache.iter()
342 .filter_map(|(id, meta)| if meta.is_test { Some(id) } else { None })
343 .collect()
344 }
345
346 pub fn nodes_by_language(&self, language: &str) -> Vec<&NodeId> {
348 self.node_cache.iter()
349 .filter_map(|(id, meta)| {
350 if meta.language.as_deref() == Some(language) {
351 Some(id)
352 } else {
353 None
354 }
355 })
356 .collect()
357 }
358}
359
360impl DependencyGraph {
362 pub fn pagerank_iterator(&self) -> impl Iterator<Item = (&NodeId, Option<&BTreeSet<NodeId>>)> {
364 self.nodes.iter().map(move |node| {
365 (node, self.reverse_edges.get(node))
366 })
367 }
368
369 pub fn dangling_nodes(&self) -> Vec<&NodeId> {
371 self.nodes.iter()
372 .filter(|&node| self.out_degree(node) == 0)
373 .collect()
374 }
375
376 pub fn estimate_scc_count(&self) -> usize {
378 if self.nodes.is_empty() {
379 return 0;
380 }
381
382 let potential_scc_nodes = self.nodes.iter()
384 .filter(|&node| self.in_degree(node) > 0 && self.out_degree(node) > 0)
385 .count();
386
387 let estimated_scc = if potential_scc_nodes > 0 {
389 std::cmp::max(1, potential_scc_nodes / 3)
390 } else {
391 0
392 };
393
394 let isolated_nodes = self.nodes.len() - potential_scc_nodes;
396 estimated_scc + isolated_nodes
397 }
398
399 pub fn is_strongly_connected(&self) -> bool {
401 if self.nodes.is_empty() {
402 return true;
403 }
404
405 self.nodes.iter().all(|node| {
407 self.in_degree(node) > 0 && self.out_degree(node) > 0
408 })
409 }
410}
411
412impl DependencyGraph {
414 pub fn into_concurrent(self) -> ConcurrentDependencyGraph {
416 ConcurrentDependencyGraph {
417 forward_edges: DashMap::from_iter(self.forward_edges),
418 reverse_edges: DashMap::from_iter(self.reverse_edges),
419 nodes: RwLock::new(self.nodes),
420 node_cache: DashMap::from_iter(self.node_cache),
421 stats_cache: RwLock::new(self.stats_cache),
422 }
423 }
424}
425
426#[derive(Debug)]
428pub struct ConcurrentDependencyGraph {
429 forward_edges: DashMap<NodeId, BTreeSet<NodeId>>,
430 reverse_edges: DashMap<NodeId, BTreeSet<NodeId>>,
431 nodes: RwLock<BTreeSet<NodeId>>,
432 node_cache: DashMap<NodeId, NodeMetadata>,
433 stats_cache: RwLock<Option<GraphStatistics>>,
434}
435
436impl ConcurrentDependencyGraph {
437 pub fn add_node(&self, node_id: NodeId) -> Result<()> {
439 {
440 let mut nodes = self.nodes.write();
441 nodes.insert(node_id.clone());
442 }
443
444 self.forward_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
445 self.reverse_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
446
447 if !self.node_cache.contains_key(&node_id) {
448 self.node_cache.insert(node_id.clone(), NodeMetadata::new(node_id));
449 }
450
451 *self.stats_cache.write() = None;
453
454 Ok(())
455 }
456
457 pub fn in_degree(&self, node_id: &NodeId) -> usize {
459 self.reverse_edges.get(node_id).map_or(0, |entry| entry.len())
460 }
461
462 pub fn out_degree(&self, node_id: &NodeId) -> usize {
464 self.forward_edges.get(node_id).map_or(0, |entry| entry.len())
465 }
466
467 pub fn into_sequential(self) -> DependencyGraph {
469 let nodes = self.nodes.into_inner();
470 let stats_cache = self.stats_cache.into_inner();
471
472 DependencyGraph {
473 forward_edges: self.forward_edges.into_iter().collect(),
474 reverse_edges: self.reverse_edges.into_iter().collect(),
475 nodes,
476 node_cache: self.node_cache.into_iter().collect(),
477 stats_cache,
478 }
479 }
480}
481
482#[derive(Debug, Clone, PartialEq)]
484pub struct DegreeInfo {
485 pub node_id: NodeId,
486 pub in_degree: usize,
487 pub out_degree: usize,
488 pub total_degree: usize,
489}
490
491#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
493pub struct GraphStatistics {
494 pub total_nodes: usize,
496 pub total_edges: usize,
498 pub in_degree_avg: f64,
500 pub in_degree_max: usize,
502 pub out_degree_avg: f64,
504 pub out_degree_max: usize,
506 pub strongly_connected_components: usize,
508 pub graph_density: f64,
510 pub isolated_nodes: usize,
512 pub dangling_nodes: usize,
514}
515
516impl GraphStatistics {
517 pub fn empty() -> Self {
519 Self {
520 total_nodes: 0,
521 total_edges: 0,
522 in_degree_avg: 0.0,
523 in_degree_max: 0,
524 out_degree_avg: 0.0,
525 out_degree_max: 0,
526 strongly_connected_components: 0,
527 graph_density: 0.0,
528 isolated_nodes: 0,
529 dangling_nodes: 0,
530 }
531 }
532}
533
534fn detect_language_from_extension(file_path: &str) -> Option<String> {
536 let ext = std::path::Path::new(file_path)
537 .extension()
538 .and_then(|s| s.to_str())
539 .map(|s| s.to_lowercase())?;
540
541 match ext.as_str() {
542 "py" => Some("python".to_string()),
543 "js" | "jsx" | "mjs" => Some("javascript".to_string()),
544 "ts" | "tsx" => Some("typescript".to_string()),
545 "rs" => Some("rust".to_string()),
546 "go" => Some("go".to_string()),
547 "java" | "kt" => Some("java".to_string()),
548 "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
549 "c" => Some("c".to_string()),
550 "rb" => Some("ruby".to_string()),
551 "php" => Some("php".to_string()),
552 "cs" => Some("csharp".to_string()),
553 "swift" => Some("swift".to_string()),
554 _ => None,
555 }
556}
557
558fn is_entrypoint_file(file_path: &str) -> bool {
559 let file_name = std::path::Path::new(file_path)
560 .file_name()
561 .and_then(|s| s.to_str())
562 .unwrap_or("")
563 .to_lowercase();
564
565 matches!(file_name.as_str(), "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" |
566 "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
567 "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
568 "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
569 "lib.rs" | "__init__.py")
570}
571
572fn is_test_file(file_path: &str) -> bool {
573 let path_lower = file_path.to_lowercase();
574 path_lower.contains("test") ||
575 path_lower.contains("spec") ||
576 path_lower.contains("__tests__") ||
577 path_lower.ends_with("_test.py") ||
578 path_lower.ends_with("_test.rs") ||
579 path_lower.ends_with("_test.go") ||
580 path_lower.ends_with(".test.js") ||
581 path_lower.ends_with(".test.ts") ||
582 path_lower.ends_with("_spec.rb")
583}
584
585impl Default for DependencyGraph {
586 fn default() -> Self {
587 Self::new()
588 }
589}
590
591#[cfg(test)]
592mod tests {
593 use super::*;
594
595 #[test]
596 fn test_graph_creation() {
597 let graph = DependencyGraph::new();
598 assert_eq!(graph.node_count(), 0);
599 assert_eq!(graph.edge_count(), 0);
600 }
601
602 #[test]
603 fn test_node_operations() {
604 let mut graph = DependencyGraph::new();
605
606 graph.add_node("main.py".to_string()).unwrap();
608 graph.add_node("utils.py".to_string()).unwrap();
609
610 assert_eq!(graph.node_count(), 2);
611 assert!(graph.contains_node(&"main.py".to_string()));
612 assert!(graph.contains_node(&"utils.py".to_string()));
613
614 let removed = graph.remove_node(&"utils.py".to_string()).unwrap();
616 assert!(removed);
617 assert_eq!(graph.node_count(), 1);
618 assert!(!graph.contains_node(&"utils.py".to_string()));
619 }
620
621 #[test]
622 fn test_edge_operations() {
623 let mut graph = DependencyGraph::new();
624
625 graph.add_edge("main.py".to_string(), "utils.py".to_string()).unwrap();
627
628 assert_eq!(graph.node_count(), 2);
629 assert_eq!(graph.edge_count(), 1);
630 assert!(graph.contains_edge(&"main.py".to_string(), &"utils.py".to_string()));
631
632 assert_eq!(graph.out_degree(&"main.py".to_string()), 1);
634 assert_eq!(graph.in_degree(&"utils.py".to_string()), 1);
635 assert_eq!(graph.in_degree(&"main.py".to_string()), 0);
636 assert_eq!(graph.out_degree(&"utils.py".to_string()), 0);
637 }
638
639 #[test]
640 fn test_multiple_edges() {
641 let mut graph = DependencyGraph::new();
642
643 let edges = vec![
644 ("main.py".to_string(), "utils.py".to_string()),
645 ("main.py".to_string(), "config.py".to_string()),
646 ("utils.py".to_string(), "config.py".to_string()),
647 ];
648
649 graph.add_edges(&edges).unwrap();
650
651 assert_eq!(graph.node_count(), 3);
652 assert_eq!(graph.edge_count(), 3);
653
654 assert_eq!(graph.out_degree(&"main.py".to_string()), 2);
656
657 assert_eq!(graph.in_degree(&"config.py".to_string()), 2);
659 }
660
661 #[test]
662 fn test_node_metadata() {
663 let mut graph = DependencyGraph::new();
664
665 let metadata = NodeMetadata::new("main.py".to_string()).with_size(1024);
666 graph.add_node_with_metadata("main.py".to_string(), metadata).unwrap();
667
668 let retrieved = graph.node_metadata(&"main.py".to_string()).unwrap();
669 assert_eq!(retrieved.file_path, "main.py");
670 assert_eq!(retrieved.language, Some("python".to_string()));
671 assert!(retrieved.is_entrypoint);
672 assert!(!retrieved.is_test);
673 assert_eq!(retrieved.size_bytes, 1024);
674 }
675
676 #[test]
677 fn test_language_detection() {
678 assert_eq!(detect_language_from_extension("main.py"), Some("python".to_string()));
679 assert_eq!(detect_language_from_extension("app.js"), Some("javascript".to_string()));
680 assert_eq!(detect_language_from_extension("lib.rs"), Some("rust".to_string()));
681 assert_eq!(detect_language_from_extension("server.go"), Some("go".to_string()));
682 assert_eq!(detect_language_from_extension("component.tsx"), Some("typescript".to_string()));
683 assert_eq!(detect_language_from_extension("unknown.xyz"), None);
684 }
685
686 #[test]
687 fn test_file_classification() {
688 assert!(is_entrypoint_file("main.py"));
689 assert!(is_entrypoint_file("index.js"));
690 assert!(is_entrypoint_file("lib.rs"));
691 assert!(!is_entrypoint_file("utils.py"));
692
693 assert!(is_test_file("test_utils.py"));
694 assert!(is_test_file("utils.test.js"));
695 assert!(is_test_file("integration_test.rs"));
696 assert!(!is_test_file("utils.py"));
697 }
698
699 #[test]
700 fn test_pagerank_iterator() {
701 let mut graph = DependencyGraph::new();
702
703 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
705 graph.add_edge("B".to_string(), "C".to_string()).unwrap();
706 graph.add_edge("C".to_string(), "A".to_string()).unwrap();
707
708 let pagerank_data: Vec<_> = graph.pagerank_iterator().collect();
709 assert_eq!(pagerank_data.len(), 3);
710
711 for (node, reverse_edges) in pagerank_data {
713 assert!(reverse_edges.is_some());
714 assert!(!reverse_edges.unwrap().is_empty());
715 }
716 }
717
718 #[test]
719 fn test_dangling_nodes() {
720 let mut graph = DependencyGraph::new();
721
722 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
724 graph.add_node("C".to_string()).unwrap();
725 graph.add_edge("B".to_string(), "D".to_string()).unwrap();
726
727 let dangling = graph.dangling_nodes();
728
729 assert_eq!(dangling.len(), 2);
731 assert!(dangling.contains(&&"C".to_string()));
732 assert!(dangling.contains(&&"D".to_string()));
733 }
734
735 #[test]
736 fn test_concurrent_graph() {
737 let mut graph = DependencyGraph::new();
738 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
739 graph.add_edge("B".to_string(), "C".to_string()).unwrap();
740
741 let concurrent = graph.into_concurrent();
742
743 assert_eq!(concurrent.in_degree(&"B".to_string()), 1);
745 assert_eq!(concurrent.out_degree(&"B".to_string()), 1);
746
747 concurrent.add_node("D".to_string()).unwrap();
749
750 let sequential = concurrent.into_sequential();
752 assert_eq!(sequential.node_count(), 4); }
754
755 #[test]
756 fn test_scc_estimation() {
757 let mut graph = DependencyGraph::new();
758
759 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
761 graph.add_edge("B".to_string(), "A".to_string()).unwrap();
762 graph.add_edge("C".to_string(), "D".to_string()).unwrap();
763 graph.add_node("E".to_string()).unwrap(); let scc_count = graph.estimate_scc_count();
766
767 assert!(scc_count >= 3); }
770
771 #[test]
772 fn test_nodes_by_language() {
773 let mut graph = DependencyGraph::new();
774
775 graph.add_node("main.py".to_string()).unwrap();
776 graph.add_node("utils.py".to_string()).unwrap();
777 graph.add_node("app.js".to_string()).unwrap();
778 graph.add_node("lib.rs".to_string()).unwrap();
779
780 let python_nodes = graph.nodes_by_language("python");
781 let js_nodes = graph.nodes_by_language("javascript");
782 let rust_nodes = graph.nodes_by_language("rust");
783
784 assert_eq!(python_nodes.len(), 2);
785 assert_eq!(js_nodes.len(), 1);
786 assert_eq!(rust_nodes.len(), 1);
787
788 assert!(python_nodes.contains(&&"main.py".to_string()));
789 assert!(python_nodes.contains(&&"utils.py".to_string()));
790 }
791}