scribe_graph/
graph.rs

1//! # Graph Data Structures for PageRank Centrality
2//!
3//! Efficient graph representation optimized for dependency analysis and PageRank computation.
4//! This module implements the core graph structures used in the PageRank centrality algorithm
5//! with emphasis on reverse edges for code dependency analysis.
6//!
7//! ## Design Philosophy
8//! - **Forward edges**: A imports B (A -> B)  
9//! - **Reverse edges**: B is imported by A (B <- A)
10//! - **PageRank flows along reverse edges** (importance flows to imported files)
11//! - **Memory-efficient adjacency list representation** for large graphs (10k+ nodes)
12//! - **Fast degree queries** and statistics calculation
13
14use std::collections::{HashMap, HashSet};
15use serde::{Deserialize, Serialize};
16use dashmap::DashMap;
17use parking_lot::RwLock;
18use scribe_core::{Result, error::ScribeError};
19
20/// Internal node identifier type for efficient graph operations (usize for array indexing)
21pub type InternalNodeId = usize;
22
23/// External node identifier type for the dependency graph (file paths)
24pub type NodeId = String;
25
26/// Edge weight type (unused in unweighted PageRank, but reserved for extensions)
27pub type EdgeWeight = f64;
28
29/// Efficient dependency graph representation optimized for PageRank computation
30/// Uses integer-based internal representation for massive performance improvements
31#[derive(Debug, Clone)]
32pub struct DependencyGraph {
33    /// Forward adjacency list: internal_id -> set of internal_ids it imports
34    forward_edges: Vec<HashSet<InternalNodeId>>,
35    
36    /// Reverse adjacency list: internal_id -> set of internal_ids that import it (for PageRank)
37    reverse_edges: Vec<HashSet<InternalNodeId>>,
38    
39    /// Mapping from file path to internal node ID
40    path_to_id: HashMap<NodeId, InternalNodeId>,
41    
42    /// Mapping from internal node ID to file path
43    id_to_path: Vec<NodeId>,
44    
45    /// Node metadata cache (indexed by internal ID)
46    node_metadata: Vec<Option<NodeMetadata>>,
47    
48    /// Graph statistics cache (invalidated on mutations)
49    stats_cache: Option<GraphStatistics>,
50    
51    /// Next available internal node ID
52    next_id: InternalNodeId,
53}
54
55/// Metadata associated with each node in the graph
56#[derive(Debug, Clone, PartialEq)]
57pub struct NodeMetadata {
58    /// File path of the node
59    pub file_path: String,
60    /// Programming language detected
61    pub language: Option<String>,
62    /// Whether this is an entrypoint file
63    pub is_entrypoint: bool,
64    /// Whether this is a test file
65    pub is_test: bool,
66    /// File size in bytes (for statistics)
67    pub size_bytes: u64,
68}
69
70impl NodeMetadata {
71    /// Create new node metadata
72    pub fn new(file_path: String) -> Self {
73        let language = detect_language_from_extension(&file_path);
74        let is_entrypoint = is_entrypoint_file(&file_path);
75        let is_test = is_test_file(&file_path);
76        
77        Self {
78            file_path,
79            language,
80            is_entrypoint,
81            is_test,
82            size_bytes: 0,
83        }
84    }
85    
86    /// Create with size information
87    pub fn with_size(mut self, size_bytes: u64) -> Self {
88        self.size_bytes = size_bytes;
89        self
90    }
91}
92
93/// Graph construction and manipulation operations
94impl DependencyGraph {
95    /// Create a new empty dependency graph
96    pub fn new() -> Self {
97        Self {
98            forward_edges: Vec::new(),
99            reverse_edges: Vec::new(),
100            path_to_id: HashMap::new(),
101            id_to_path: Vec::new(),
102            node_metadata: Vec::new(),
103            stats_cache: None,
104            next_id: 0,
105        }
106    }
107    
108    /// Create with initial capacity hint for performance optimization
109    pub fn with_capacity(capacity: usize) -> Self {
110        Self {
111            forward_edges: Vec::with_capacity(capacity),
112            reverse_edges: Vec::with_capacity(capacity),
113            path_to_id: HashMap::with_capacity(capacity),
114            id_to_path: Vec::with_capacity(capacity),
115            node_metadata: Vec::with_capacity(capacity),
116            stats_cache: None,
117            next_id: 0,
118        }
119    }
120    
121    /// Add a node to the graph (can exist without edges)
122    pub fn add_node(&mut self, node_id: NodeId) -> Result<InternalNodeId> {
123        // Check if node already exists
124        if let Some(&existing_id) = self.path_to_id.get(&node_id) {
125            return Ok(existing_id);
126        }
127        
128        let internal_id = self.next_id;
129        self.next_id += 1;
130        
131        // Add to mappings
132        self.path_to_id.insert(node_id.clone(), internal_id);
133        self.id_to_path.push(node_id.clone());
134        
135        // Initialize empty adjacency lists
136        self.forward_edges.push(HashSet::new());
137        self.reverse_edges.push(HashSet::new());
138        
139        // Add default metadata
140        self.node_metadata.push(Some(NodeMetadata::new(node_id)));
141        
142        // Invalidate cache
143        self.stats_cache = None;
144        
145        Ok(internal_id)
146    }
147    
148    /// Add a node with metadata
149    pub fn add_node_with_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<InternalNodeId> {
150        let internal_id = self.add_node(node_id)?;
151        self.node_metadata[internal_id] = Some(metadata);
152        Ok(internal_id)
153    }
154    
155    /// Add an import edge: from_file imports to_file
156    pub fn add_edge(&mut self, from_node: NodeId, to_node: NodeId) -> Result<()> {
157        // Ensure both nodes exist and get their internal IDs
158        let from_id = self.add_node(from_node)?;
159        let to_id = self.add_node(to_node)?;
160        
161        // Add forward edge: from_id -> to_id
162        self.forward_edges[from_id].insert(to_id);
163        
164        // Add reverse edge: to_id <- from_id
165        self.reverse_edges[to_id].insert(from_id);
166        
167        // Invalidate cache
168        self.stats_cache = None;
169        
170        Ok(())
171    }
172    
173    /// Add multiple edges efficiently (batch operation)
174    pub fn add_edges(&mut self, edges: &[(NodeId, NodeId)]) -> Result<()> {
175        for (from_node, to_node) in edges {
176            self.add_edge(from_node.clone(), to_node.clone())?;
177        }
178        Ok(())
179    }
180    
181    /// Remove a node and all its edges
182    pub fn remove_node(&mut self, node_id: &NodeId) -> Result<bool> {
183        let internal_id = match self.path_to_id.get(node_id) {
184            Some(&id) => id,
185            None => return Ok(false),
186        };
187        
188        // Get outgoing edges to clean up reverse references
189        let outgoing = self.forward_edges[internal_id].clone();
190        for target_id in &outgoing {
191            self.reverse_edges[*target_id].remove(&internal_id);
192        }
193        
194        // Get incoming edges to clean up forward references  
195        let incoming = self.reverse_edges[internal_id].clone();
196        for source_id in &incoming {
197            self.forward_edges[*source_id].remove(&internal_id);
198        }
199        
200        // Clear the adjacency lists for this node
201        self.forward_edges[internal_id].clear();
202        self.reverse_edges[internal_id].clear();
203        
204        // Remove metadata
205        self.node_metadata[internal_id] = None;
206        
207        // Remove from path mapping (but keep internal_id for consistency)
208        self.path_to_id.remove(node_id);
209        
210        // Note: We don't remove from id_to_path to maintain index consistency
211        // Instead, we'll need to handle None cases when iterating
212        
213        // Invalidate cache
214        self.stats_cache = None;
215        
216        Ok(true)
217    }
218    
219    /// Remove an edge between two nodes
220    pub fn remove_edge(&mut self, from_node: &NodeId, to_node: &NodeId) -> Result<bool> {
221        let from_id = match self.path_to_id.get(from_node) {
222            Some(&id) => id,
223            None => return Ok(false),
224        };
225        
226        let to_id = match self.path_to_id.get(to_node) {
227            Some(&id) => id,
228            None => return Ok(false),
229        };
230        
231        let forward_removed = self.forward_edges[from_id].remove(&to_id);
232        let reverse_removed = self.reverse_edges[to_id].remove(&from_id);
233        
234        if forward_removed || reverse_removed {
235            self.stats_cache = None;
236        }
237        
238        Ok(forward_removed || reverse_removed)
239    }
240    
241    /// Check if a node exists in the graph
242    pub fn contains_node(&self, node_id: &NodeId) -> bool {
243        self.path_to_id.contains_key(node_id)
244    }
245    
246    /// Check if an edge exists between two nodes
247    pub fn contains_edge(&self, from_node: &NodeId, to_node: &NodeId) -> bool {
248        match (self.path_to_id.get(from_node), self.path_to_id.get(to_node)) {
249            (Some(&from_id), Some(&to_id)) => {
250                self.forward_edges[from_id].contains(&to_id)
251            }
252            _ => false,
253        }
254    }
255    
256    /// Get the number of nodes in the graph
257    pub fn node_count(&self) -> usize {
258        self.path_to_id.len()
259    }
260    
261    /// Get the total number of edges in the graph
262    pub fn edge_count(&self) -> usize {
263        self.forward_edges.iter().map(|edges| edges.len()).sum()
264    }
265    
266    /// Get all nodes in the graph
267    pub fn nodes(&self) -> impl Iterator<Item = &NodeId> {
268        self.path_to_id.keys()
269    }
270    
271    /// Get all edges in the graph as (from, to) pairs
272    pub fn edges(&self) -> impl Iterator<Item = (String, String)> + '_ {
273        self.forward_edges.iter().enumerate()
274            .flat_map(move |(from_id, targets)| {
275                let from_path = self.id_to_path[from_id].clone();
276                targets.iter().map(move |&to_id| {
277                    let to_path = self.id_to_path[to_id].clone();
278                    (from_path.clone(), to_path)
279                })
280            })
281    }
282}
283
284/// Degree and neighbor query operations
285impl DependencyGraph {
286    /// Get in-degree of a node (number of files that import this node)
287    pub fn in_degree(&self, node_id: &NodeId) -> usize {
288        match self.path_to_id.get(node_id) {
289            Some(&internal_id) => self.reverse_edges[internal_id].len(),
290            None => 0,
291        }
292    }
293    
294    /// Get out-degree of a node (number of files this node imports)
295    pub fn out_degree(&self, node_id: &NodeId) -> usize {
296        match self.path_to_id.get(node_id) {
297            Some(&internal_id) => self.forward_edges[internal_id].len(),
298            None => 0,
299        }
300    }
301    
302    /// Get total degree of a node (in + out)
303    pub fn degree(&self, node_id: &NodeId) -> usize {
304        self.in_degree(node_id) + self.out_degree(node_id)
305    }
306    
307    /// Get nodes that this node imports (outgoing edges)
308    pub fn outgoing_neighbors(&self, node_id: &NodeId) -> Option<Vec<&NodeId>> {
309        match self.path_to_id.get(node_id) {
310            Some(&internal_id) => {
311                let neighbors: Vec<&NodeId> = self.forward_edges[internal_id]
312                    .iter()
313                    .map(|&target_id| &self.id_to_path[target_id])
314                    .collect();
315                Some(neighbors)
316            }
317            None => None,
318        }
319    }
320    
321    /// Get nodes that import this node (incoming edges) - important for PageRank
322    pub fn incoming_neighbors(&self, node_id: &NodeId) -> Option<Vec<&NodeId>> {
323        match self.path_to_id.get(node_id) {
324            Some(&internal_id) => {
325                let neighbors: Vec<&NodeId> = self.reverse_edges[internal_id]
326                    .iter()
327                    .map(|&source_id| &self.id_to_path[source_id])
328                    .collect();
329                Some(neighbors)
330            }
331            None => None,
332        }
333    }
334    
335    /// Get both incoming and outgoing neighbors
336    pub fn all_neighbors(&self, node_id: &NodeId) -> HashSet<&NodeId> {
337        let mut neighbors = HashSet::new();
338        
339        if let Some(&internal_id) = self.path_to_id.get(node_id) {
340            // Add outgoing neighbors
341            for &target_id in &self.forward_edges[internal_id] {
342                neighbors.insert(&self.id_to_path[target_id]);
343            }
344            
345            // Add incoming neighbors
346            for &source_id in &self.reverse_edges[internal_id] {
347                neighbors.insert(&self.id_to_path[source_id]);
348            }
349        }
350        
351        neighbors
352    }
353    
354    /// Get degree information for a node
355    pub fn get_degree_info(&self, node_id: &NodeId) -> Option<DegreeInfo> {
356        if !self.contains_node(node_id) {
357            return None;
358        }
359        
360        Some(DegreeInfo {
361            node_id: node_id.clone(),
362            in_degree: self.in_degree(node_id),
363            out_degree: self.out_degree(node_id),
364            total_degree: self.degree(node_id),
365        })
366    }
367    
368    /// Internal API: Get internal node ID for path (for PageRank optimization)
369    pub(crate) fn get_internal_id(&self, node_id: &NodeId) -> Option<InternalNodeId> {
370        self.path_to_id.get(node_id).copied()
371    }
372    
373    /// Internal API: Get path for internal ID
374    pub(crate) fn get_path(&self, internal_id: InternalNodeId) -> Option<&NodeId> {
375        self.id_to_path.get(internal_id)
376    }
377    
378    /// Internal API: Get incoming neighbors by internal ID (for PageRank)
379    pub(crate) fn incoming_neighbors_by_id(&self, internal_id: InternalNodeId) -> Option<&HashSet<InternalNodeId>> {
380        self.reverse_edges.get(internal_id)
381    }
382    
383    /// Internal API: Get out-degree by internal ID (for PageRank)
384    pub(crate) fn out_degree_by_id(&self, internal_id: InternalNodeId) -> usize {
385        self.forward_edges.get(internal_id).map_or(0, |edges| edges.len())
386    }
387    
388    /// Internal API: Get total number of active nodes (for PageRank)
389    pub(crate) fn internal_node_count(&self) -> usize {
390        self.path_to_id.len()
391    }
392    
393    /// Internal API: Iterator over all internal node IDs with their paths
394    pub(crate) fn internal_nodes(&self) -> impl Iterator<Item = (InternalNodeId, &NodeId)> {
395        self.path_to_id.iter().map(|(path, &id)| (id, path))
396    }
397}
398
399/// Node metadata and information queries
400impl DependencyGraph {
401    /// Get metadata for a node
402    pub fn node_metadata(&self, node_id: &NodeId) -> Option<&NodeMetadata> {
403        match self.path_to_id.get(node_id) {
404            Some(&internal_id) => self.node_metadata[internal_id].as_ref(),
405            None => None,
406        }
407    }
408    
409    /// Set metadata for a node
410    pub fn set_node_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<()> {
411        match self.path_to_id.get(&node_id) {
412            Some(&internal_id) => {
413                self.node_metadata[internal_id] = Some(metadata);
414                Ok(())
415            }
416            None => Err(ScribeError::invalid_operation(
417                format!("Node {} does not exist in graph", node_id),
418                "set_node_metadata".to_string()
419            )),
420        }
421    }
422    
423    /// Get all entrypoint nodes
424    pub fn entrypoint_nodes(&self) -> Vec<&NodeId> {
425        self.node_metadata.iter().enumerate()
426            .filter_map(|(internal_id, meta_opt)| {
427                if let Some(meta) = meta_opt {
428                    if meta.is_entrypoint {
429                        return Some(&self.id_to_path[internal_id]);
430                    }
431                }
432                None
433            })
434            .collect()
435    }
436    
437    /// Get all test nodes
438    pub fn test_nodes(&self) -> Vec<&NodeId> {
439        self.node_metadata.iter().enumerate()
440            .filter_map(|(internal_id, meta_opt)| {
441                if let Some(meta) = meta_opt {
442                    if meta.is_test {
443                        return Some(&self.id_to_path[internal_id]);
444                    }
445                }
446                None
447            })
448            .collect()
449    }
450    
451    /// Get nodes by language
452    pub fn nodes_by_language(&self, language: &str) -> Vec<&NodeId> {
453        self.node_metadata.iter().enumerate()
454            .filter_map(|(internal_id, meta_opt)| {
455                if let Some(meta) = meta_opt {
456                    if meta.language.as_deref() == Some(language) {
457                        return Some(&self.id_to_path[internal_id]);
458                    }
459                }
460                None
461            })
462            .collect()
463    }
464}
465
466/// Specialized operations for PageRank computation
467impl DependencyGraph {
468    /// Get all nodes with their reverse edge neighbors (for PageRank iteration)
469    pub fn pagerank_iterator(&self) -> impl Iterator<Item = (&NodeId, Option<Vec<&NodeId>>)> + '_ {
470        self.path_to_id.iter().map(|(node_path, &internal_id)| {
471            let incoming: Option<Vec<&NodeId>> = if !self.reverse_edges[internal_id].is_empty() {
472                Some(self.reverse_edges[internal_id]
473                    .iter()
474                    .map(|&source_id| &self.id_to_path[source_id])
475                    .collect())
476            } else {
477                Some(Vec::new())
478            };
479            (node_path, incoming)
480        })
481    }
482    
483    /// Get dangling nodes (nodes with no outgoing edges)
484    pub fn dangling_nodes(&self) -> Vec<&NodeId> {
485        self.path_to_id.iter()
486            .filter(|(_, &internal_id)| self.forward_edges[internal_id].is_empty())
487            .map(|(node_path, _)| node_path)
488            .collect()
489    }
490    
491    /// Get strongly connected components (simplified estimation for statistics)
492    pub fn estimate_scc_count(&self) -> usize {
493        if self.path_to_id.is_empty() {
494            return 0;
495        }
496        
497        // Count nodes with both in and out edges (likely in cycles)
498        let potential_scc_nodes = self.path_to_id.iter()
499            .filter(|(_, &internal_id)| {
500                !self.reverse_edges[internal_id].is_empty() && 
501                !self.forward_edges[internal_id].is_empty()
502            })
503            .count();
504        
505        // Rough estimate: most SCCs are small, assume average size of 3
506        let estimated_scc = if potential_scc_nodes > 0 {
507            std::cmp::max(1, potential_scc_nodes / 3)
508        } else {
509            0
510        };
511        
512        // Add isolated nodes and simple chains
513        let isolated_nodes = self.path_to_id.len() - potential_scc_nodes;
514        estimated_scc + isolated_nodes
515    }
516    
517    /// Check if the graph is strongly connected (simplified check)
518    pub fn is_strongly_connected(&self) -> bool {
519        if self.path_to_id.is_empty() {
520            return true;
521        }
522        
523        // Simplified check: all nodes have both in and out edges
524        self.path_to_id.iter().all(|(_, &internal_id)| {
525            !self.reverse_edges[internal_id].is_empty() && 
526            !self.forward_edges[internal_id].is_empty()
527        })
528    }
529}
530
531/// Concurrent graph operations for performance
532impl DependencyGraph {
533    /// Create a thread-safe concurrent graph for parallel operations
534    pub fn into_concurrent(self) -> ConcurrentDependencyGraph {
535        // Convert Vec<HashSet> to DashMap representation for concurrency
536        let forward_edges = DashMap::new();
537        let reverse_edges = DashMap::new();
538        
539        for (internal_id, edge_set) in self.forward_edges.into_iter().enumerate() {
540            forward_edges.insert(internal_id, edge_set);
541        }
542        
543        for (internal_id, edge_set) in self.reverse_edges.into_iter().enumerate() {
544            reverse_edges.insert(internal_id, edge_set);
545        }
546        
547        ConcurrentDependencyGraph {
548            forward_edges,
549            reverse_edges,
550            path_to_id: DashMap::from_iter(self.path_to_id),
551            id_to_path: RwLock::new(self.id_to_path),
552            node_metadata: RwLock::new(self.node_metadata),
553            stats_cache: RwLock::new(self.stats_cache),
554            next_id: RwLock::new(self.next_id),
555        }
556    }
557}
558
559/// Thread-safe concurrent version of DependencyGraph
560#[derive(Debug)]
561pub struct ConcurrentDependencyGraph {
562    forward_edges: DashMap<InternalNodeId, HashSet<InternalNodeId>>,
563    reverse_edges: DashMap<InternalNodeId, HashSet<InternalNodeId>>,
564    path_to_id: DashMap<NodeId, InternalNodeId>,
565    id_to_path: RwLock<Vec<NodeId>>,
566    node_metadata: RwLock<Vec<Option<NodeMetadata>>>,
567    stats_cache: RwLock<Option<GraphStatistics>>,
568    next_id: RwLock<InternalNodeId>,
569}
570
571impl ConcurrentDependencyGraph {
572    /// Add a node concurrently
573    pub fn add_node(&self, node_id: NodeId) -> Result<InternalNodeId> {
574        // Check if node already exists
575        if let Some(existing_id) = self.path_to_id.get(&node_id) {
576            return Ok(*existing_id);
577        }
578        
579        let internal_id = {
580            let mut next_id = self.next_id.write();
581            let id = *next_id;
582            *next_id += 1;
583            id
584        };
585        
586        // Add to mappings
587        self.path_to_id.insert(node_id.clone(), internal_id);
588        {
589            let mut id_to_path = self.id_to_path.write();
590            id_to_path.push(node_id.clone());
591        }
592        
593        // Initialize empty adjacency lists
594        self.forward_edges.insert(internal_id, HashSet::new());
595        self.reverse_edges.insert(internal_id, HashSet::new());
596        
597        // Add default metadata
598        {
599            let mut metadata = self.node_metadata.write();
600            metadata.push(Some(NodeMetadata::new(node_id)));
601        }
602        
603        // Invalidate stats cache
604        *self.stats_cache.write() = None;
605        
606        Ok(internal_id)
607    }
608    
609    /// Get in-degree concurrently
610    pub fn in_degree(&self, node_id: &NodeId) -> usize {
611        match self.path_to_id.get(node_id) {
612            Some(internal_id) => {
613                self.reverse_edges.get(&internal_id).map_or(0, |entry| entry.len())
614            }
615            None => 0,
616        }
617    }
618    
619    /// Get out-degree concurrently  
620    pub fn out_degree(&self, node_id: &NodeId) -> usize {
621        match self.path_to_id.get(node_id) {
622            Some(internal_id) => {
623                self.forward_edges.get(&internal_id).map_or(0, |entry| entry.len())
624            }
625            None => 0,
626        }
627    }
628    
629    /// Convert back to single-threaded graph
630    pub fn into_sequential(self) -> DependencyGraph {
631        let id_to_path = self.id_to_path.into_inner();
632        let node_metadata = self.node_metadata.into_inner();
633        let stats_cache = self.stats_cache.into_inner();
634        let next_id = self.next_id.into_inner();
635        
636        // Convert DashMap back to Vec
637        let mut forward_edges = vec![HashSet::new(); next_id];
638        let mut reverse_edges = vec![HashSet::new(); next_id];
639        
640        for (internal_id, edge_set) in self.forward_edges.into_iter() {
641            if internal_id < forward_edges.len() {
642                forward_edges[internal_id] = edge_set;
643            }
644        }
645        
646        for (internal_id, edge_set) in self.reverse_edges.into_iter() {
647            if internal_id < reverse_edges.len() {
648                reverse_edges[internal_id] = edge_set;
649            }
650        }
651        
652        DependencyGraph {
653            forward_edges,
654            reverse_edges,
655            path_to_id: self.path_to_id.into_iter().collect(),
656            id_to_path,
657            node_metadata,
658            stats_cache,
659            next_id,
660        }
661    }
662}
663
664/// Degree information for a node
665#[derive(Debug, Clone, PartialEq)]
666pub struct DegreeInfo {
667    pub node_id: NodeId,
668    pub in_degree: usize,
669    pub out_degree: usize,
670    pub total_degree: usize,
671}
672
673/// Graph statistics computed lazily and cached
674#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
675pub struct GraphStatistics {
676    /// Total number of nodes
677    pub total_nodes: usize,
678    /// Total number of edges
679    pub total_edges: usize,
680    /// Average in-degree
681    pub in_degree_avg: f64,
682    /// Maximum in-degree
683    pub in_degree_max: usize,
684    /// Average out-degree
685    pub out_degree_avg: f64,
686    /// Maximum out-degree
687    pub out_degree_max: usize,
688    /// Estimated number of strongly connected components
689    pub strongly_connected_components: usize,
690    /// Graph density (actual_edges / possible_edges)
691    pub graph_density: f64,
692    /// Number of isolated nodes (no edges)
693    pub isolated_nodes: usize,
694    /// Number of dangling nodes (no outgoing edges)
695    pub dangling_nodes: usize,
696}
697
698impl GraphStatistics {
699    /// Create empty statistics
700    pub fn empty() -> Self {
701        Self {
702            total_nodes: 0,
703            total_edges: 0,
704            in_degree_avg: 0.0,
705            in_degree_max: 0,
706            out_degree_avg: 0.0,
707            out_degree_max: 0,
708            strongly_connected_components: 0,
709            graph_density: 0.0,
710            isolated_nodes: 0,
711            dangling_nodes: 0,
712        }
713    }
714}
715
716/// Utility functions for node classification
717fn detect_language_from_extension(file_path: &str) -> Option<String> {
718    let ext = std::path::Path::new(file_path)
719        .extension()
720        .and_then(|s| s.to_str())
721        .map(|s| s.to_lowercase())?;
722    
723    match ext.as_str() {
724        "py" => Some("python".to_string()),
725        "js" | "jsx" | "mjs" => Some("javascript".to_string()),
726        "ts" | "tsx" => Some("typescript".to_string()),
727        "rs" => Some("rust".to_string()),
728        "go" => Some("go".to_string()),
729        "java" | "kt" => Some("java".to_string()),
730        "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
731        "c" => Some("c".to_string()),
732        "rb" => Some("ruby".to_string()),
733        "php" => Some("php".to_string()),
734        "cs" => Some("csharp".to_string()),
735        "swift" => Some("swift".to_string()),
736        _ => None,
737    }
738}
739
740fn is_entrypoint_file(file_path: &str) -> bool {
741    let file_name = std::path::Path::new(file_path)
742        .file_name()
743        .and_then(|s| s.to_str())
744        .unwrap_or("")
745        .to_lowercase();
746    
747    matches!(file_name.as_str(), "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" | 
748                                 "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
749                                 "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
750                                 "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
751                                 "lib.rs" | "__init__.py")
752}
753
754fn is_test_file(file_path: &str) -> bool {
755    let path_lower = file_path.to_lowercase();
756    path_lower.contains("test") || 
757    path_lower.contains("spec") ||
758    path_lower.contains("__tests__") ||
759    path_lower.ends_with("_test.py") ||
760    path_lower.ends_with("_test.rs") ||
761    path_lower.ends_with("_test.go") ||
762    path_lower.ends_with(".test.js") ||
763    path_lower.ends_with(".test.ts") ||
764    path_lower.ends_with("_spec.rb")
765}
766
767impl Default for DependencyGraph {
768    fn default() -> Self {
769        Self::new()
770    }
771}
772
773#[cfg(test)]
774mod tests {
775    use super::*;
776    
777    #[test]
778    fn test_graph_creation() {
779        let graph = DependencyGraph::new();
780        assert_eq!(graph.node_count(), 0);
781        assert_eq!(graph.edge_count(), 0);
782    }
783    
784    #[test]
785    fn test_node_operations() {
786        let mut graph = DependencyGraph::new();
787        
788        // Add nodes
789        graph.add_node("main.py".to_string()).unwrap();
790        graph.add_node("utils.py".to_string()).unwrap();
791        
792        assert_eq!(graph.node_count(), 2);
793        assert!(graph.contains_node(&"main.py".to_string()));
794        assert!(graph.contains_node(&"utils.py".to_string()));
795        
796        // Remove node
797        let removed = graph.remove_node(&"utils.py".to_string()).unwrap();
798        assert!(removed);
799        assert_eq!(graph.node_count(), 1);
800        assert!(!graph.contains_node(&"utils.py".to_string()));
801    }
802    
803    #[test]
804    fn test_edge_operations() {
805        let mut graph = DependencyGraph::new();
806        
807        // Add edge (automatically creates nodes)
808        graph.add_edge("main.py".to_string(), "utils.py".to_string()).unwrap();
809        
810        assert_eq!(graph.node_count(), 2);
811        assert_eq!(graph.edge_count(), 1);
812        assert!(graph.contains_edge(&"main.py".to_string(), &"utils.py".to_string()));
813        
814        // Check degrees
815        assert_eq!(graph.out_degree(&"main.py".to_string()), 1);
816        assert_eq!(graph.in_degree(&"utils.py".to_string()), 1);
817        assert_eq!(graph.in_degree(&"main.py".to_string()), 0);
818        assert_eq!(graph.out_degree(&"utils.py".to_string()), 0);
819    }
820    
821    #[test]
822    fn test_multiple_edges() {
823        let mut graph = DependencyGraph::new();
824        
825        let edges = vec![
826            ("main.py".to_string(), "utils.py".to_string()),
827            ("main.py".to_string(), "config.py".to_string()),
828            ("utils.py".to_string(), "config.py".to_string()),
829        ];
830        
831        graph.add_edges(&edges).unwrap();
832        
833        assert_eq!(graph.node_count(), 3);
834        assert_eq!(graph.edge_count(), 3);
835        
836        // main.py should have out-degree 2
837        assert_eq!(graph.out_degree(&"main.py".to_string()), 2);
838        
839        // config.py should have in-degree 2
840        assert_eq!(graph.in_degree(&"config.py".to_string()), 2);
841    }
842    
843    #[test]
844    fn test_node_metadata() {
845        let mut graph = DependencyGraph::new();
846        
847        let metadata = NodeMetadata::new("main.py".to_string()).with_size(1024);
848        graph.add_node_with_metadata("main.py".to_string(), metadata).unwrap();
849        
850        let retrieved = graph.node_metadata(&"main.py".to_string()).unwrap();
851        assert_eq!(retrieved.file_path, "main.py");
852        assert_eq!(retrieved.language, Some("python".to_string()));
853        assert!(retrieved.is_entrypoint);
854        assert!(!retrieved.is_test);
855        assert_eq!(retrieved.size_bytes, 1024);
856    }
857    
858    #[test]
859    fn test_language_detection() {
860        assert_eq!(detect_language_from_extension("main.py"), Some("python".to_string()));
861        assert_eq!(detect_language_from_extension("app.js"), Some("javascript".to_string()));
862        assert_eq!(detect_language_from_extension("lib.rs"), Some("rust".to_string()));
863        assert_eq!(detect_language_from_extension("server.go"), Some("go".to_string()));
864        assert_eq!(detect_language_from_extension("component.tsx"), Some("typescript".to_string()));
865        assert_eq!(detect_language_from_extension("unknown.xyz"), None);
866    }
867    
868    #[test]
869    fn test_file_classification() {
870        assert!(is_entrypoint_file("main.py"));
871        assert!(is_entrypoint_file("index.js"));
872        assert!(is_entrypoint_file("lib.rs"));
873        assert!(!is_entrypoint_file("utils.py"));
874        
875        assert!(is_test_file("test_utils.py"));
876        assert!(is_test_file("utils.test.js"));
877        assert!(is_test_file("integration_test.rs"));
878        assert!(!is_test_file("utils.py"));
879    }
880    
881    #[test]
882    fn test_pagerank_iterator() {
883        let mut graph = DependencyGraph::new();
884        
885        // Build a small graph: A -> B -> C, C -> A (creates cycle)
886        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
887        graph.add_edge("B".to_string(), "C".to_string()).unwrap();
888        graph.add_edge("C".to_string(), "A".to_string()).unwrap();
889        
890        let pagerank_data: Vec<_> = graph.pagerank_iterator().collect();
891        assert_eq!(pagerank_data.len(), 3);
892        
893        // Each node should have incoming edges (reverse edges)
894        for (node, reverse_edges) in pagerank_data {
895            assert!(reverse_edges.is_some());
896            assert!(!reverse_edges.unwrap().is_empty());
897        }
898    }
899    
900    #[test]
901    fn test_dangling_nodes() {
902        let mut graph = DependencyGraph::new();
903        
904        // A -> B, C is isolated, D has no outgoing edges
905        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
906        graph.add_node("C".to_string()).unwrap();
907        graph.add_edge("B".to_string(), "D".to_string()).unwrap();
908        
909        let dangling = graph.dangling_nodes();
910        
911        // C and D should be dangling (no outgoing edges)
912        assert_eq!(dangling.len(), 2);
913        assert!(dangling.contains(&&"C".to_string()));
914        assert!(dangling.contains(&&"D".to_string()));
915    }
916    
917    #[test]
918    fn test_concurrent_graph() {
919        let mut graph = DependencyGraph::new();
920        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
921        graph.add_edge("B".to_string(), "C".to_string()).unwrap();
922        
923        let concurrent = graph.into_concurrent();
924        
925        // Test concurrent operations
926        assert_eq!(concurrent.in_degree(&"B".to_string()), 1);
927        assert_eq!(concurrent.out_degree(&"B".to_string()), 1);
928        
929        // Add node concurrently
930        concurrent.add_node("D".to_string()).unwrap();
931        
932        // Convert back to sequential
933        let sequential = concurrent.into_sequential();
934        assert_eq!(sequential.node_count(), 4); // A, B, C, D
935    }
936    
937    #[test]
938    fn test_scc_estimation() {
939        let mut graph = DependencyGraph::new();
940        
941        // Create a graph with potential cycles: A <-> B, C -> D
942        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
943        graph.add_edge("B".to_string(), "A".to_string()).unwrap();
944        graph.add_edge("C".to_string(), "D".to_string()).unwrap();
945        graph.add_node("E".to_string()).unwrap(); // Isolated
946        
947        let scc_count = graph.estimate_scc_count();
948        
949        // Should estimate: 1 SCC (A,B have both in/out), plus isolated/chain nodes (C,D,E)
950        assert!(scc_count >= 3); // At least C, D, E as separate components
951    }
952    
953    #[test]
954    fn test_nodes_by_language() {
955        let mut graph = DependencyGraph::new();
956        
957        graph.add_node("main.py".to_string()).unwrap();
958        graph.add_node("utils.py".to_string()).unwrap();
959        graph.add_node("app.js".to_string()).unwrap();
960        graph.add_node("lib.rs".to_string()).unwrap();
961        
962        let python_nodes = graph.nodes_by_language("python");
963        let js_nodes = graph.nodes_by_language("javascript");
964        let rust_nodes = graph.nodes_by_language("rust");
965        
966        assert_eq!(python_nodes.len(), 2);
967        assert_eq!(js_nodes.len(), 1);
968        assert_eq!(rust_nodes.len(), 1);
969        
970        assert!(python_nodes.contains(&&"main.py".to_string()));
971        assert!(python_nodes.contains(&&"utils.py".to_string()));
972    }
973}