scribe_graph/
graph.rs

1//! # Graph Data Structures for PageRank Centrality
2//!
3//! Efficient graph representation optimized for dependency analysis and PageRank computation.
4//! This module implements the core graph structures used in the PageRank centrality algorithm
5//! with emphasis on reverse edges for code dependency analysis.
6//!
7//! ## Design Philosophy
8//! - **Forward edges**: A imports B (A -> B)  
9//! - **Reverse edges**: B is imported by A (B <- A)
10//! - **PageRank flows along reverse edges** (importance flows to imported files)
11//! - **Memory-efficient adjacency list representation** for large graphs (10k+ nodes)
12//! - **Fast degree queries** and statistics calculation
13
14use std::collections::{HashMap, HashSet, BTreeSet};
15use serde::{Deserialize, Serialize};
16use indexmap::IndexMap;
17use dashmap::DashMap;
18use parking_lot::RwLock;
19use scribe_core::{Result, error::ScribeError};
20
21/// Node identifier type for the dependency graph
22pub type NodeId = String;
23
24/// Edge weight type (unused in unweighted PageRank, but reserved for extensions)
25pub type EdgeWeight = f64;
26
27/// Efficient dependency graph representation optimized for PageRank computation
28#[derive(Debug, Clone)]
29pub struct DependencyGraph {
30    /// Forward adjacency list: file -> files it imports
31    forward_edges: IndexMap<NodeId, BTreeSet<NodeId>>,
32    
33    /// Reverse adjacency list: file -> files that import it (for PageRank)
34    reverse_edges: IndexMap<NodeId, BTreeSet<NodeId>>,
35    
36    /// All nodes in the graph (includes isolated nodes)
37    nodes: BTreeSet<NodeId>,
38    
39    /// Node metadata cache
40    node_cache: HashMap<NodeId, NodeMetadata>,
41    
42    /// Graph statistics cache (invalidated on mutations)
43    stats_cache: Option<GraphStatistics>,
44}
45
46/// Metadata associated with each node in the graph
47#[derive(Debug, Clone, PartialEq)]
48pub struct NodeMetadata {
49    /// File path of the node
50    pub file_path: String,
51    /// Programming language detected
52    pub language: Option<String>,
53    /// Whether this is an entrypoint file
54    pub is_entrypoint: bool,
55    /// Whether this is a test file
56    pub is_test: bool,
57    /// File size in bytes (for statistics)
58    pub size_bytes: u64,
59}
60
61impl NodeMetadata {
62    /// Create new node metadata
63    pub fn new(file_path: String) -> Self {
64        let language = detect_language_from_extension(&file_path);
65        let is_entrypoint = is_entrypoint_file(&file_path);
66        let is_test = is_test_file(&file_path);
67        
68        Self {
69            file_path,
70            language,
71            is_entrypoint,
72            is_test,
73            size_bytes: 0,
74        }
75    }
76    
77    /// Create with size information
78    pub fn with_size(mut self, size_bytes: u64) -> Self {
79        self.size_bytes = size_bytes;
80        self
81    }
82}
83
84/// Graph construction and manipulation operations
85impl DependencyGraph {
86    /// Create a new empty dependency graph
87    pub fn new() -> Self {
88        Self {
89            forward_edges: IndexMap::new(),
90            reverse_edges: IndexMap::new(),
91            nodes: BTreeSet::new(),
92            node_cache: HashMap::new(),
93            stats_cache: None,
94        }
95    }
96    
97    /// Create with initial capacity hint for performance optimization
98    pub fn with_capacity(capacity: usize) -> Self {
99        Self {
100            forward_edges: IndexMap::with_capacity(capacity),
101            reverse_edges: IndexMap::with_capacity(capacity),
102            nodes: BTreeSet::new(),
103            node_cache: HashMap::with_capacity(capacity),
104            stats_cache: None,
105        }
106    }
107    
108    /// Add a node to the graph (can exist without edges)
109    pub fn add_node(&mut self, node_id: NodeId) -> Result<()> {
110        self.nodes.insert(node_id.clone());
111        
112        // Initialize empty adjacency lists if not present
113        self.forward_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
114        self.reverse_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
115        
116        // Add default metadata if not present
117        if !self.node_cache.contains_key(&node_id) {
118            self.node_cache.insert(node_id.clone(), NodeMetadata::new(node_id));
119        }
120        
121        // Invalidate cache
122        self.stats_cache = None;
123        
124        Ok(())
125    }
126    
127    /// Add a node with metadata
128    pub fn add_node_with_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<()> {
129        self.add_node(node_id.clone())?;
130        self.node_cache.insert(node_id, metadata);
131        Ok(())
132    }
133    
134    /// Add an import edge: from_file imports to_file
135    pub fn add_edge(&mut self, from_node: NodeId, to_node: NodeId) -> Result<()> {
136        // Ensure both nodes exist
137        self.add_node(from_node.clone())?;
138        self.add_node(to_node.clone())?;
139        
140        // Add forward edge: from_node -> to_node
141        if let Some(forward_set) = self.forward_edges.get_mut(&from_node) {
142            forward_set.insert(to_node.clone());
143        }
144        
145        // Add reverse edge: to_node <- from_node
146        if let Some(reverse_set) = self.reverse_edges.get_mut(&to_node) {
147            reverse_set.insert(from_node);
148        }
149        
150        // Invalidate cache
151        self.stats_cache = None;
152        
153        Ok(())
154    }
155    
156    /// Add multiple edges efficiently (batch operation)
157    pub fn add_edges(&mut self, edges: &[(NodeId, NodeId)]) -> Result<()> {
158        for (from_node, to_node) in edges {
159            self.add_edge(from_node.clone(), to_node.clone())?;
160        }
161        Ok(())
162    }
163    
164    /// Remove a node and all its edges
165    pub fn remove_node(&mut self, node_id: &NodeId) -> Result<bool> {
166        if !self.nodes.contains(node_id) {
167            return Ok(false);
168        }
169        
170        // Remove from nodes set
171        self.nodes.remove(node_id);
172        
173        // Get outgoing edges to clean up reverse references
174        if let Some(outgoing) = self.forward_edges.shift_remove(node_id) {
175            for target in &outgoing {
176                if let Some(reverse_set) = self.reverse_edges.get_mut(target) {
177                    reverse_set.remove(node_id);
178                }
179            }
180        }
181        
182        // Get incoming edges to clean up forward references  
183        if let Some(incoming) = self.reverse_edges.shift_remove(node_id) {
184            for source in &incoming {
185                if let Some(forward_set) = self.forward_edges.get_mut(source) {
186                    forward_set.remove(node_id);
187                }
188            }
189        }
190        
191        // Remove metadata
192        self.node_cache.remove(node_id);
193        
194        // Invalidate cache
195        self.stats_cache = None;
196        
197        Ok(true)
198    }
199    
200    /// Remove an edge between two nodes
201    pub fn remove_edge(&mut self, from_node: &NodeId, to_node: &NodeId) -> Result<bool> {
202        let forward_removed = if let Some(forward_set) = self.forward_edges.get_mut(from_node) {
203            forward_set.remove(to_node)
204        } else {
205            false
206        };
207        
208        let reverse_removed = if let Some(reverse_set) = self.reverse_edges.get_mut(to_node) {
209            reverse_set.remove(from_node)
210        } else {
211            false
212        };
213        
214        if forward_removed || reverse_removed {
215            self.stats_cache = None;
216        }
217        
218        Ok(forward_removed || reverse_removed)
219    }
220    
221    /// Check if a node exists in the graph
222    pub fn contains_node(&self, node_id: &NodeId) -> bool {
223        self.nodes.contains(node_id)
224    }
225    
226    /// Check if an edge exists between two nodes
227    pub fn contains_edge(&self, from_node: &NodeId, to_node: &NodeId) -> bool {
228        self.forward_edges
229            .get(from_node)
230            .map_or(false, |edges| edges.contains(to_node))
231    }
232    
233    /// Get the number of nodes in the graph
234    pub fn node_count(&self) -> usize {
235        self.nodes.len()
236    }
237    
238    /// Get the total number of edges in the graph
239    pub fn edge_count(&self) -> usize {
240        self.forward_edges.values().map(|edges| edges.len()).sum()
241    }
242    
243    /// Get all nodes in the graph
244    pub fn nodes(&self) -> impl Iterator<Item = &NodeId> {
245        self.nodes.iter()
246    }
247    
248    /// Get all edges in the graph as (from, to) pairs
249    pub fn edges(&self) -> impl Iterator<Item = (&NodeId, &NodeId)> {
250        self.forward_edges.iter()
251            .flat_map(|(from, targets)| targets.iter().map(move |to| (from, to)))
252    }
253}
254
255/// Degree and neighbor query operations
256impl DependencyGraph {
257    /// Get in-degree of a node (number of files that import this node)
258    pub fn in_degree(&self, node_id: &NodeId) -> usize {
259        self.reverse_edges.get(node_id).map_or(0, |edges| edges.len())
260    }
261    
262    /// Get out-degree of a node (number of files this node imports)
263    pub fn out_degree(&self, node_id: &NodeId) -> usize {
264        self.forward_edges.get(node_id).map_or(0, |edges| edges.len())
265    }
266    
267    /// Get total degree of a node (in + out)
268    pub fn degree(&self, node_id: &NodeId) -> usize {
269        self.in_degree(node_id) + self.out_degree(node_id)
270    }
271    
272    /// Get nodes that this node imports (outgoing edges)
273    pub fn outgoing_neighbors(&self, node_id: &NodeId) -> Option<&BTreeSet<NodeId>> {
274        self.forward_edges.get(node_id)
275    }
276    
277    /// Get nodes that import this node (incoming edges) - important for PageRank
278    pub fn incoming_neighbors(&self, node_id: &NodeId) -> Option<&BTreeSet<NodeId>> {
279        self.reverse_edges.get(node_id)
280    }
281    
282    /// Get both incoming and outgoing neighbors
283    pub fn all_neighbors(&self, node_id: &NodeId) -> HashSet<&NodeId> {
284        let mut neighbors = HashSet::new();
285        
286        if let Some(outgoing) = self.forward_edges.get(node_id) {
287            neighbors.extend(outgoing.iter());
288        }
289        
290        if let Some(incoming) = self.reverse_edges.get(node_id) {
291            neighbors.extend(incoming.iter());
292        }
293        
294        neighbors
295    }
296    
297    /// Get degree information for a node
298    pub fn get_degree_info(&self, node_id: &NodeId) -> Option<DegreeInfo> {
299        if !self.contains_node(node_id) {
300            return None;
301        }
302        
303        Some(DegreeInfo {
304            node_id: node_id.clone(),
305            in_degree: self.in_degree(node_id),
306            out_degree: self.out_degree(node_id),
307            total_degree: self.degree(node_id),
308        })
309    }
310}
311
312/// Node metadata and information queries
313impl DependencyGraph {
314    /// Get metadata for a node
315    pub fn node_metadata(&self, node_id: &NodeId) -> Option<&NodeMetadata> {
316        self.node_cache.get(node_id)
317    }
318    
319    /// Set metadata for a node
320    pub fn set_node_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<()> {
321        if !self.contains_node(&node_id) {
322            return Err(ScribeError::invalid_operation(
323                format!("Node {} does not exist in graph", node_id),
324                "set_node_metadata"
325            ));
326        }
327        
328        self.node_cache.insert(node_id, metadata);
329        Ok(())
330    }
331    
332    /// Get all entrypoint nodes
333    pub fn entrypoint_nodes(&self) -> Vec<&NodeId> {
334        self.node_cache.iter()
335            .filter_map(|(id, meta)| if meta.is_entrypoint { Some(id) } else { None })
336            .collect()
337    }
338    
339    /// Get all test nodes
340    pub fn test_nodes(&self) -> Vec<&NodeId> {
341        self.node_cache.iter()
342            .filter_map(|(id, meta)| if meta.is_test { Some(id) } else { None })
343            .collect()
344    }
345    
346    /// Get nodes by language
347    pub fn nodes_by_language(&self, language: &str) -> Vec<&NodeId> {
348        self.node_cache.iter()
349            .filter_map(|(id, meta)| {
350                if meta.language.as_deref() == Some(language) {
351                    Some(id)
352                } else {
353                    None
354                }
355            })
356            .collect()
357    }
358}
359
360/// Specialized operations for PageRank computation
361impl DependencyGraph {
362    /// Get all nodes with their reverse edge neighbors (for PageRank iteration)
363    pub fn pagerank_iterator(&self) -> impl Iterator<Item = (&NodeId, Option<&BTreeSet<NodeId>>)> {
364        self.nodes.iter().map(move |node| {
365            (node, self.reverse_edges.get(node))
366        })
367    }
368    
369    /// Get dangling nodes (nodes with no outgoing edges)
370    pub fn dangling_nodes(&self) -> Vec<&NodeId> {
371        self.nodes.iter()
372            .filter(|&node| self.out_degree(node) == 0)
373            .collect()
374    }
375    
376    /// Get strongly connected components (simplified estimation for statistics)
377    pub fn estimate_scc_count(&self) -> usize {
378        if self.nodes.is_empty() {
379            return 0;
380        }
381        
382        // Count nodes with both in and out edges (likely in cycles)
383        let potential_scc_nodes = self.nodes.iter()
384            .filter(|&node| self.in_degree(node) > 0 && self.out_degree(node) > 0)
385            .count();
386        
387        // Rough estimate: most SCCs are small, assume average size of 3
388        let estimated_scc = if potential_scc_nodes > 0 {
389            std::cmp::max(1, potential_scc_nodes / 3)
390        } else {
391            0
392        };
393        
394        // Add isolated nodes and simple chains
395        let isolated_nodes = self.nodes.len() - potential_scc_nodes;
396        estimated_scc + isolated_nodes
397    }
398    
399    /// Check if the graph is strongly connected (simplified check)
400    pub fn is_strongly_connected(&self) -> bool {
401        if self.nodes.is_empty() {
402            return true;
403        }
404        
405        // Simplified check: all nodes have both in and out edges
406        self.nodes.iter().all(|node| {
407            self.in_degree(node) > 0 && self.out_degree(node) > 0
408        })
409    }
410}
411
412/// Concurrent graph operations for performance
413impl DependencyGraph {
414    /// Create a thread-safe concurrent graph for parallel operations
415    pub fn into_concurrent(self) -> ConcurrentDependencyGraph {
416        ConcurrentDependencyGraph {
417            forward_edges: DashMap::from_iter(self.forward_edges),
418            reverse_edges: DashMap::from_iter(self.reverse_edges),
419            nodes: RwLock::new(self.nodes),
420            node_cache: DashMap::from_iter(self.node_cache),
421            stats_cache: RwLock::new(self.stats_cache),
422        }
423    }
424}
425
426/// Thread-safe concurrent version of DependencyGraph
427#[derive(Debug)]
428pub struct ConcurrentDependencyGraph {
429    forward_edges: DashMap<NodeId, BTreeSet<NodeId>>,
430    reverse_edges: DashMap<NodeId, BTreeSet<NodeId>>,
431    nodes: RwLock<BTreeSet<NodeId>>,
432    node_cache: DashMap<NodeId, NodeMetadata>,
433    stats_cache: RwLock<Option<GraphStatistics>>,
434}
435
436impl ConcurrentDependencyGraph {
437    /// Add a node concurrently
438    pub fn add_node(&self, node_id: NodeId) -> Result<()> {
439        {
440            let mut nodes = self.nodes.write();
441            nodes.insert(node_id.clone());
442        }
443        
444        self.forward_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
445        self.reverse_edges.entry(node_id.clone()).or_insert_with(BTreeSet::new);
446        
447        if !self.node_cache.contains_key(&node_id) {
448            self.node_cache.insert(node_id.clone(), NodeMetadata::new(node_id));
449        }
450        
451        // Invalidate stats cache
452        *self.stats_cache.write() = None;
453        
454        Ok(())
455    }
456    
457    /// Get in-degree concurrently
458    pub fn in_degree(&self, node_id: &NodeId) -> usize {
459        self.reverse_edges.get(node_id).map_or(0, |entry| entry.len())
460    }
461    
462    /// Get out-degree concurrently  
463    pub fn out_degree(&self, node_id: &NodeId) -> usize {
464        self.forward_edges.get(node_id).map_or(0, |entry| entry.len())
465    }
466    
467    /// Convert back to single-threaded graph
468    pub fn into_sequential(self) -> DependencyGraph {
469        let nodes = self.nodes.into_inner();
470        let stats_cache = self.stats_cache.into_inner();
471        
472        DependencyGraph {
473            forward_edges: self.forward_edges.into_iter().collect(),
474            reverse_edges: self.reverse_edges.into_iter().collect(),
475            nodes,
476            node_cache: self.node_cache.into_iter().collect(),
477            stats_cache,
478        }
479    }
480}
481
482/// Degree information for a node
483#[derive(Debug, Clone, PartialEq)]
484pub struct DegreeInfo {
485    pub node_id: NodeId,
486    pub in_degree: usize,
487    pub out_degree: usize,
488    pub total_degree: usize,
489}
490
491/// Graph statistics computed lazily and cached
492#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
493pub struct GraphStatistics {
494    /// Total number of nodes
495    pub total_nodes: usize,
496    /// Total number of edges
497    pub total_edges: usize,
498    /// Average in-degree
499    pub in_degree_avg: f64,
500    /// Maximum in-degree
501    pub in_degree_max: usize,
502    /// Average out-degree
503    pub out_degree_avg: f64,
504    /// Maximum out-degree
505    pub out_degree_max: usize,
506    /// Estimated number of strongly connected components
507    pub strongly_connected_components: usize,
508    /// Graph density (actual_edges / possible_edges)
509    pub graph_density: f64,
510    /// Number of isolated nodes (no edges)
511    pub isolated_nodes: usize,
512    /// Number of dangling nodes (no outgoing edges)
513    pub dangling_nodes: usize,
514}
515
516impl GraphStatistics {
517    /// Create empty statistics
518    pub fn empty() -> Self {
519        Self {
520            total_nodes: 0,
521            total_edges: 0,
522            in_degree_avg: 0.0,
523            in_degree_max: 0,
524            out_degree_avg: 0.0,
525            out_degree_max: 0,
526            strongly_connected_components: 0,
527            graph_density: 0.0,
528            isolated_nodes: 0,
529            dangling_nodes: 0,
530        }
531    }
532}
533
534/// Utility functions for node classification
535fn detect_language_from_extension(file_path: &str) -> Option<String> {
536    let ext = std::path::Path::new(file_path)
537        .extension()
538        .and_then(|s| s.to_str())
539        .map(|s| s.to_lowercase())?;
540    
541    match ext.as_str() {
542        "py" => Some("python".to_string()),
543        "js" | "jsx" | "mjs" => Some("javascript".to_string()),
544        "ts" | "tsx" => Some("typescript".to_string()),
545        "rs" => Some("rust".to_string()),
546        "go" => Some("go".to_string()),
547        "java" | "kt" => Some("java".to_string()),
548        "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
549        "c" => Some("c".to_string()),
550        "rb" => Some("ruby".to_string()),
551        "php" => Some("php".to_string()),
552        "cs" => Some("csharp".to_string()),
553        "swift" => Some("swift".to_string()),
554        _ => None,
555    }
556}
557
558fn is_entrypoint_file(file_path: &str) -> bool {
559    let file_name = std::path::Path::new(file_path)
560        .file_name()
561        .and_then(|s| s.to_str())
562        .unwrap_or("")
563        .to_lowercase();
564    
565    matches!(file_name.as_str(), "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" | 
566                                 "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
567                                 "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
568                                 "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
569                                 "lib.rs" | "__init__.py")
570}
571
572fn is_test_file(file_path: &str) -> bool {
573    let path_lower = file_path.to_lowercase();
574    path_lower.contains("test") || 
575    path_lower.contains("spec") ||
576    path_lower.contains("__tests__") ||
577    path_lower.ends_with("_test.py") ||
578    path_lower.ends_with("_test.rs") ||
579    path_lower.ends_with("_test.go") ||
580    path_lower.ends_with(".test.js") ||
581    path_lower.ends_with(".test.ts") ||
582    path_lower.ends_with("_spec.rb")
583}
584
585impl Default for DependencyGraph {
586    fn default() -> Self {
587        Self::new()
588    }
589}
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594    
595    #[test]
596    fn test_graph_creation() {
597        let graph = DependencyGraph::new();
598        assert_eq!(graph.node_count(), 0);
599        assert_eq!(graph.edge_count(), 0);
600    }
601    
602    #[test]
603    fn test_node_operations() {
604        let mut graph = DependencyGraph::new();
605        
606        // Add nodes
607        graph.add_node("main.py".to_string()).unwrap();
608        graph.add_node("utils.py".to_string()).unwrap();
609        
610        assert_eq!(graph.node_count(), 2);
611        assert!(graph.contains_node(&"main.py".to_string()));
612        assert!(graph.contains_node(&"utils.py".to_string()));
613        
614        // Remove node
615        let removed = graph.remove_node(&"utils.py".to_string()).unwrap();
616        assert!(removed);
617        assert_eq!(graph.node_count(), 1);
618        assert!(!graph.contains_node(&"utils.py".to_string()));
619    }
620    
621    #[test]
622    fn test_edge_operations() {
623        let mut graph = DependencyGraph::new();
624        
625        // Add edge (automatically creates nodes)
626        graph.add_edge("main.py".to_string(), "utils.py".to_string()).unwrap();
627        
628        assert_eq!(graph.node_count(), 2);
629        assert_eq!(graph.edge_count(), 1);
630        assert!(graph.contains_edge(&"main.py".to_string(), &"utils.py".to_string()));
631        
632        // Check degrees
633        assert_eq!(graph.out_degree(&"main.py".to_string()), 1);
634        assert_eq!(graph.in_degree(&"utils.py".to_string()), 1);
635        assert_eq!(graph.in_degree(&"main.py".to_string()), 0);
636        assert_eq!(graph.out_degree(&"utils.py".to_string()), 0);
637    }
638    
639    #[test]
640    fn test_multiple_edges() {
641        let mut graph = DependencyGraph::new();
642        
643        let edges = vec![
644            ("main.py".to_string(), "utils.py".to_string()),
645            ("main.py".to_string(), "config.py".to_string()),
646            ("utils.py".to_string(), "config.py".to_string()),
647        ];
648        
649        graph.add_edges(&edges).unwrap();
650        
651        assert_eq!(graph.node_count(), 3);
652        assert_eq!(graph.edge_count(), 3);
653        
654        // main.py should have out-degree 2
655        assert_eq!(graph.out_degree(&"main.py".to_string()), 2);
656        
657        // config.py should have in-degree 2
658        assert_eq!(graph.in_degree(&"config.py".to_string()), 2);
659    }
660    
661    #[test]
662    fn test_node_metadata() {
663        let mut graph = DependencyGraph::new();
664        
665        let metadata = NodeMetadata::new("main.py".to_string()).with_size(1024);
666        graph.add_node_with_metadata("main.py".to_string(), metadata).unwrap();
667        
668        let retrieved = graph.node_metadata(&"main.py".to_string()).unwrap();
669        assert_eq!(retrieved.file_path, "main.py");
670        assert_eq!(retrieved.language, Some("python".to_string()));
671        assert!(retrieved.is_entrypoint);
672        assert!(!retrieved.is_test);
673        assert_eq!(retrieved.size_bytes, 1024);
674    }
675    
676    #[test]
677    fn test_language_detection() {
678        assert_eq!(detect_language_from_extension("main.py"), Some("python".to_string()));
679        assert_eq!(detect_language_from_extension("app.js"), Some("javascript".to_string()));
680        assert_eq!(detect_language_from_extension("lib.rs"), Some("rust".to_string()));
681        assert_eq!(detect_language_from_extension("server.go"), Some("go".to_string()));
682        assert_eq!(detect_language_from_extension("component.tsx"), Some("typescript".to_string()));
683        assert_eq!(detect_language_from_extension("unknown.xyz"), None);
684    }
685    
686    #[test]
687    fn test_file_classification() {
688        assert!(is_entrypoint_file("main.py"));
689        assert!(is_entrypoint_file("index.js"));
690        assert!(is_entrypoint_file("lib.rs"));
691        assert!(!is_entrypoint_file("utils.py"));
692        
693        assert!(is_test_file("test_utils.py"));
694        assert!(is_test_file("utils.test.js"));
695        assert!(is_test_file("integration_test.rs"));
696        assert!(!is_test_file("utils.py"));
697    }
698    
699    #[test]
700    fn test_pagerank_iterator() {
701        let mut graph = DependencyGraph::new();
702        
703        // Build a small graph: A -> B -> C, C -> A (creates cycle)
704        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
705        graph.add_edge("B".to_string(), "C".to_string()).unwrap();
706        graph.add_edge("C".to_string(), "A".to_string()).unwrap();
707        
708        let pagerank_data: Vec<_> = graph.pagerank_iterator().collect();
709        assert_eq!(pagerank_data.len(), 3);
710        
711        // Each node should have incoming edges (reverse edges)
712        for (node, reverse_edges) in pagerank_data {
713            assert!(reverse_edges.is_some());
714            assert!(!reverse_edges.unwrap().is_empty());
715        }
716    }
717    
718    #[test]
719    fn test_dangling_nodes() {
720        let mut graph = DependencyGraph::new();
721        
722        // A -> B, C is isolated, D has no outgoing edges
723        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
724        graph.add_node("C".to_string()).unwrap();
725        graph.add_edge("B".to_string(), "D".to_string()).unwrap();
726        
727        let dangling = graph.dangling_nodes();
728        
729        // C and D should be dangling (no outgoing edges)
730        assert_eq!(dangling.len(), 2);
731        assert!(dangling.contains(&&"C".to_string()));
732        assert!(dangling.contains(&&"D".to_string()));
733    }
734    
735    #[test]
736    fn test_concurrent_graph() {
737        let mut graph = DependencyGraph::new();
738        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
739        graph.add_edge("B".to_string(), "C".to_string()).unwrap();
740        
741        let concurrent = graph.into_concurrent();
742        
743        // Test concurrent operations
744        assert_eq!(concurrent.in_degree(&"B".to_string()), 1);
745        assert_eq!(concurrent.out_degree(&"B".to_string()), 1);
746        
747        // Add node concurrently
748        concurrent.add_node("D".to_string()).unwrap();
749        
750        // Convert back to sequential
751        let sequential = concurrent.into_sequential();
752        assert_eq!(sequential.node_count(), 4); // A, B, C, D
753    }
754    
755    #[test]
756    fn test_scc_estimation() {
757        let mut graph = DependencyGraph::new();
758        
759        // Create a graph with potential cycles: A <-> B, C -> D
760        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
761        graph.add_edge("B".to_string(), "A".to_string()).unwrap();
762        graph.add_edge("C".to_string(), "D".to_string()).unwrap();
763        graph.add_node("E".to_string()).unwrap(); // Isolated
764        
765        let scc_count = graph.estimate_scc_count();
766        
767        // Should estimate: 1 SCC (A,B have both in/out), plus isolated/chain nodes (C,D,E)
768        assert!(scc_count >= 3); // At least C, D, E as separate components
769    }
770    
771    #[test]
772    fn test_nodes_by_language() {
773        let mut graph = DependencyGraph::new();
774        
775        graph.add_node("main.py".to_string()).unwrap();
776        graph.add_node("utils.py".to_string()).unwrap();
777        graph.add_node("app.js".to_string()).unwrap();
778        graph.add_node("lib.rs".to_string()).unwrap();
779        
780        let python_nodes = graph.nodes_by_language("python");
781        let js_nodes = graph.nodes_by_language("javascript");
782        let rust_nodes = graph.nodes_by_language("rust");
783        
784        assert_eq!(python_nodes.len(), 2);
785        assert_eq!(js_nodes.len(), 1);
786        assert_eq!(rust_nodes.len(), 1);
787        
788        assert!(python_nodes.contains(&&"main.py".to_string()));
789        assert!(python_nodes.contains(&&"utils.py".to_string()));
790    }
791}