scribe_graph/
graph.rs

1//! # Graph Data Structures for PageRank Centrality
2//!
3//! Efficient graph representation optimized for dependency analysis and PageRank computation.
4//! This module implements the core graph structures used in the PageRank centrality algorithm
5//! with emphasis on reverse edges for code dependency analysis.
6//!
7//! ## Design Philosophy
8//! - **Forward edges**: A imports B (A -> B)  
9//! - **Reverse edges**: B is imported by A (B <- A)
10//! - **PageRank flows along reverse edges** (importance flows to imported files)
11//! - **Memory-efficient adjacency list representation** for large graphs (10k+ nodes)
12//! - **Fast degree queries** and statistics calculation
13
14use dashmap::DashMap;
15use parking_lot::RwLock;
16use scribe_core::{error::ScribeError, Result};
17use serde::{Deserialize, Serialize};
18use std::collections::{HashMap, HashSet};
19
20/// Internal node identifier type for efficient graph operations (usize for array indexing)
21pub type InternalNodeId = usize;
22
23/// External node identifier type for the dependency graph (file paths)
24pub type NodeId = String;
25
26/// Edge weight type (unused in unweighted PageRank, but reserved for extensions)
27pub type EdgeWeight = f64;
28
29/// Efficient dependency graph representation optimized for PageRank computation
30/// Uses integer-based internal representation for massive performance improvements
31#[derive(Debug, Clone)]
32pub struct DependencyGraph {
33    /// Forward adjacency list: internal_id -> set of internal_ids it imports
34    forward_edges: Vec<HashSet<InternalNodeId>>,
35
36    /// Reverse adjacency list: internal_id -> set of internal_ids that import it (for PageRank)
37    reverse_edges: Vec<HashSet<InternalNodeId>>,
38
39    /// Mapping from file path to internal node ID
40    path_to_id: HashMap<NodeId, InternalNodeId>,
41
42    /// Mapping from internal node ID to file path
43    id_to_path: Vec<NodeId>,
44
45    /// Node metadata cache (indexed by internal ID)
46    node_metadata: Vec<Option<NodeMetadata>>,
47
48    /// Graph statistics cache (invalidated on mutations)
49    stats_cache: Option<GraphStatistics>,
50
51    /// Next available internal node ID
52    next_id: InternalNodeId,
53}
54
55/// Metadata associated with each node in the graph
56#[derive(Debug, Clone, PartialEq)]
57pub struct NodeMetadata {
58    /// File path of the node
59    pub file_path: String,
60    /// Programming language detected
61    pub language: Option<String>,
62    /// Whether this is an entrypoint file
63    pub is_entrypoint: bool,
64    /// Whether this is a test file
65    pub is_test: bool,
66    /// File size in bytes (for statistics)
67    pub size_bytes: u64,
68}
69
70impl NodeMetadata {
71    /// Create new node metadata
72    pub fn new(file_path: String) -> Self {
73        let language = detect_language_from_extension(&file_path);
74        let is_entrypoint = is_entrypoint_file(&file_path);
75        let is_test = is_test_file(&file_path);
76
77        Self {
78            file_path,
79            language,
80            is_entrypoint,
81            is_test,
82            size_bytes: 0,
83        }
84    }
85
86    /// Create with size information
87    pub fn with_size(mut self, size_bytes: u64) -> Self {
88        self.size_bytes = size_bytes;
89        self
90    }
91}
92
93/// Graph construction and manipulation operations
94impl DependencyGraph {
95    /// Create a new empty dependency graph
96    pub fn new() -> Self {
97        Self {
98            forward_edges: Vec::new(),
99            reverse_edges: Vec::new(),
100            path_to_id: HashMap::new(),
101            id_to_path: Vec::new(),
102            node_metadata: Vec::new(),
103            stats_cache: None,
104            next_id: 0,
105        }
106    }
107
108    /// Create with initial capacity hint for performance optimization
109    pub fn with_capacity(capacity: usize) -> Self {
110        Self {
111            forward_edges: Vec::with_capacity(capacity),
112            reverse_edges: Vec::with_capacity(capacity),
113            path_to_id: HashMap::with_capacity(capacity),
114            id_to_path: Vec::with_capacity(capacity),
115            node_metadata: Vec::with_capacity(capacity),
116            stats_cache: None,
117            next_id: 0,
118        }
119    }
120
121    /// Add a node to the graph (can exist without edges)
122    pub fn add_node(&mut self, node_id: NodeId) -> Result<InternalNodeId> {
123        // Check if node already exists
124        if let Some(&existing_id) = self.path_to_id.get(&node_id) {
125            return Ok(existing_id);
126        }
127
128        let internal_id = self.next_id;
129        self.next_id += 1;
130
131        // Add to mappings
132        self.path_to_id.insert(node_id.clone(), internal_id);
133        self.id_to_path.push(node_id.clone());
134
135        // Initialize empty adjacency lists
136        self.forward_edges.push(HashSet::new());
137        self.reverse_edges.push(HashSet::new());
138
139        // Add default metadata
140        self.node_metadata.push(Some(NodeMetadata::new(node_id)));
141
142        // Invalidate cache
143        self.stats_cache = None;
144
145        Ok(internal_id)
146    }
147
148    /// Add a node with metadata
149    pub fn add_node_with_metadata(
150        &mut self,
151        node_id: NodeId,
152        metadata: NodeMetadata,
153    ) -> Result<InternalNodeId> {
154        let internal_id = self.add_node(node_id)?;
155        self.node_metadata[internal_id] = Some(metadata);
156        Ok(internal_id)
157    }
158
159    /// Add an import edge: from_file imports to_file
160    pub fn add_edge(&mut self, from_node: NodeId, to_node: NodeId) -> Result<()> {
161        // Ensure both nodes exist and get their internal IDs
162        let from_id = self.add_node(from_node)?;
163        let to_id = self.add_node(to_node)?;
164
165        // Add forward edge: from_id -> to_id
166        self.forward_edges[from_id].insert(to_id);
167
168        // Add reverse edge: to_id <- from_id
169        self.reverse_edges[to_id].insert(from_id);
170
171        // Invalidate cache
172        self.stats_cache = None;
173
174        Ok(())
175    }
176
177    /// Add multiple edges efficiently (batch operation)
178    pub fn add_edges(&mut self, edges: &[(NodeId, NodeId)]) -> Result<()> {
179        for (from_node, to_node) in edges {
180            self.add_edge(from_node.clone(), to_node.clone())?;
181        }
182        Ok(())
183    }
184
185    /// Remove a node and all its edges
186    pub fn remove_node(&mut self, node_id: &NodeId) -> Result<bool> {
187        let internal_id = match self.path_to_id.get(node_id) {
188            Some(&id) => id,
189            None => return Ok(false),
190        };
191
192        // Get outgoing edges to clean up reverse references
193        let outgoing = self.forward_edges[internal_id].clone();
194        for target_id in &outgoing {
195            self.reverse_edges[*target_id].remove(&internal_id);
196        }
197
198        // Get incoming edges to clean up forward references
199        let incoming = self.reverse_edges[internal_id].clone();
200        for source_id in &incoming {
201            self.forward_edges[*source_id].remove(&internal_id);
202        }
203
204        // Clear the adjacency lists for this node
205        self.forward_edges[internal_id].clear();
206        self.reverse_edges[internal_id].clear();
207
208        // Remove metadata
209        self.node_metadata[internal_id] = None;
210
211        // Remove from path mapping (but keep internal_id for consistency)
212        self.path_to_id.remove(node_id);
213
214        // Note: We don't remove from id_to_path to maintain index consistency
215        // Instead, we'll need to handle None cases when iterating
216
217        // Invalidate cache
218        self.stats_cache = None;
219
220        Ok(true)
221    }
222
223    /// Remove an edge between two nodes
224    pub fn remove_edge(&mut self, from_node: &NodeId, to_node: &NodeId) -> Result<bool> {
225        let from_id = match self.path_to_id.get(from_node) {
226            Some(&id) => id,
227            None => return Ok(false),
228        };
229
230        let to_id = match self.path_to_id.get(to_node) {
231            Some(&id) => id,
232            None => return Ok(false),
233        };
234
235        let forward_removed = self.forward_edges[from_id].remove(&to_id);
236        let reverse_removed = self.reverse_edges[to_id].remove(&from_id);
237
238        if forward_removed || reverse_removed {
239            self.stats_cache = None;
240        }
241
242        Ok(forward_removed || reverse_removed)
243    }
244
245    /// Check if a node exists in the graph
246    pub fn contains_node(&self, node_id: &NodeId) -> bool {
247        self.path_to_id.contains_key(node_id)
248    }
249
250    /// Check if an edge exists between two nodes
251    pub fn contains_edge(&self, from_node: &NodeId, to_node: &NodeId) -> bool {
252        match (self.path_to_id.get(from_node), self.path_to_id.get(to_node)) {
253            (Some(&from_id), Some(&to_id)) => self.forward_edges[from_id].contains(&to_id),
254            _ => false,
255        }
256    }
257
258    /// Get the number of nodes in the graph
259    pub fn node_count(&self) -> usize {
260        self.path_to_id.len()
261    }
262
263    /// Get the total number of edges in the graph
264    pub fn edge_count(&self) -> usize {
265        self.forward_edges.iter().map(|edges| edges.len()).sum()
266    }
267
268    /// Get all nodes in the graph
269    pub fn nodes(&self) -> impl Iterator<Item = &NodeId> {
270        self.path_to_id.keys()
271    }
272
273    /// Get all edges in the graph as (from, to) pairs
274    pub fn edges(&self) -> impl Iterator<Item = (String, String)> + '_ {
275        self.forward_edges
276            .iter()
277            .enumerate()
278            .flat_map(move |(from_id, targets)| {
279                let from_path = self.id_to_path[from_id].clone();
280                targets.iter().map(move |&to_id| {
281                    let to_path = self.id_to_path[to_id].clone();
282                    (from_path.clone(), to_path)
283                })
284            })
285    }
286}
287
288/// Degree and neighbor query operations
289impl DependencyGraph {
290    /// Get in-degree of a node (number of files that import this node)
291    pub fn in_degree(&self, node_id: &NodeId) -> usize {
292        match self.path_to_id.get(node_id) {
293            Some(&internal_id) => self.reverse_edges[internal_id].len(),
294            None => 0,
295        }
296    }
297
298    /// Get out-degree of a node (number of files this node imports)
299    pub fn out_degree(&self, node_id: &NodeId) -> usize {
300        match self.path_to_id.get(node_id) {
301            Some(&internal_id) => self.forward_edges[internal_id].len(),
302            None => 0,
303        }
304    }
305
306    /// Get total degree of a node (in + out)
307    pub fn degree(&self, node_id: &NodeId) -> usize {
308        self.in_degree(node_id) + self.out_degree(node_id)
309    }
310
311    /// Get nodes that this node imports (outgoing edges)
312    pub fn outgoing_neighbors(&self, node_id: &NodeId) -> Option<Vec<&NodeId>> {
313        match self.path_to_id.get(node_id) {
314            Some(&internal_id) => {
315                let neighbors: Vec<&NodeId> = self.forward_edges[internal_id]
316                    .iter()
317                    .map(|&target_id| &self.id_to_path[target_id])
318                    .collect();
319                Some(neighbors)
320            }
321            None => None,
322        }
323    }
324
325    /// Get nodes that import this node (incoming edges) - important for PageRank
326    pub fn incoming_neighbors(&self, node_id: &NodeId) -> Option<Vec<&NodeId>> {
327        match self.path_to_id.get(node_id) {
328            Some(&internal_id) => {
329                let neighbors: Vec<&NodeId> = self.reverse_edges[internal_id]
330                    .iter()
331                    .map(|&source_id| &self.id_to_path[source_id])
332                    .collect();
333                Some(neighbors)
334            }
335            None => None,
336        }
337    }
338
339    /// Get both incoming and outgoing neighbors
340    pub fn all_neighbors(&self, node_id: &NodeId) -> HashSet<&NodeId> {
341        let mut neighbors = HashSet::new();
342
343        if let Some(&internal_id) = self.path_to_id.get(node_id) {
344            // Add outgoing neighbors
345            for &target_id in &self.forward_edges[internal_id] {
346                neighbors.insert(&self.id_to_path[target_id]);
347            }
348
349            // Add incoming neighbors
350            for &source_id in &self.reverse_edges[internal_id] {
351                neighbors.insert(&self.id_to_path[source_id]);
352            }
353        }
354
355        neighbors
356    }
357
358    /// Get degree information for a node
359    pub fn get_degree_info(&self, node_id: &NodeId) -> Option<DegreeInfo> {
360        if !self.contains_node(node_id) {
361            return None;
362        }
363
364        Some(DegreeInfo {
365            node_id: node_id.clone(),
366            in_degree: self.in_degree(node_id),
367            out_degree: self.out_degree(node_id),
368            total_degree: self.degree(node_id),
369        })
370    }
371
372    /// Internal API: Get internal node ID for path (for PageRank optimization)
373    pub(crate) fn get_internal_id(&self, node_id: &NodeId) -> Option<InternalNodeId> {
374        self.path_to_id.get(node_id).copied()
375    }
376
377    /// Internal API: Get path for internal ID
378    pub(crate) fn get_path(&self, internal_id: InternalNodeId) -> Option<&NodeId> {
379        self.id_to_path.get(internal_id)
380    }
381
382    /// Internal API: Get incoming neighbors by internal ID (for PageRank)
383    pub(crate) fn incoming_neighbors_by_id(
384        &self,
385        internal_id: InternalNodeId,
386    ) -> Option<&HashSet<InternalNodeId>> {
387        self.reverse_edges.get(internal_id)
388    }
389
390    /// Internal API: Get out-degree by internal ID (for PageRank)
391    pub(crate) fn out_degree_by_id(&self, internal_id: InternalNodeId) -> usize {
392        self.forward_edges
393            .get(internal_id)
394            .map_or(0, |edges| edges.len())
395    }
396
397    /// Internal API: Get total number of active nodes (for PageRank)
398    pub(crate) fn internal_node_count(&self) -> usize {
399        self.path_to_id.len()
400    }
401
402    /// Internal API: Iterator over all internal node IDs with their paths
403    pub(crate) fn internal_nodes(&self) -> impl Iterator<Item = (InternalNodeId, &NodeId)> {
404        self.path_to_id.iter().map(|(path, &id)| (id, path))
405    }
406}
407
408/// Node metadata and information queries
409impl DependencyGraph {
410    /// Get metadata for a node
411    pub fn node_metadata(&self, node_id: &NodeId) -> Option<&NodeMetadata> {
412        match self.path_to_id.get(node_id) {
413            Some(&internal_id) => self.node_metadata[internal_id].as_ref(),
414            None => None,
415        }
416    }
417
418    /// Set metadata for a node
419    pub fn set_node_metadata(&mut self, node_id: NodeId, metadata: NodeMetadata) -> Result<()> {
420        match self.path_to_id.get(&node_id) {
421            Some(&internal_id) => {
422                self.node_metadata[internal_id] = Some(metadata);
423                Ok(())
424            }
425            None => Err(ScribeError::invalid_operation(
426                format!("Node {} does not exist in graph", node_id),
427                "set_node_metadata".to_string(),
428            )),
429        }
430    }
431
432    /// Get all entrypoint nodes
433    pub fn entrypoint_nodes(&self) -> Vec<&NodeId> {
434        self.node_metadata
435            .iter()
436            .enumerate()
437            .filter_map(|(internal_id, meta_opt)| {
438                if let Some(meta) = meta_opt {
439                    if meta.is_entrypoint {
440                        return Some(&self.id_to_path[internal_id]);
441                    }
442                }
443                None
444            })
445            .collect()
446    }
447
448    /// Get all test nodes
449    pub fn test_nodes(&self) -> Vec<&NodeId> {
450        self.node_metadata
451            .iter()
452            .enumerate()
453            .filter_map(|(internal_id, meta_opt)| {
454                if let Some(meta) = meta_opt {
455                    if meta.is_test {
456                        return Some(&self.id_to_path[internal_id]);
457                    }
458                }
459                None
460            })
461            .collect()
462    }
463
464    /// Get nodes by language
465    pub fn nodes_by_language(&self, language: &str) -> Vec<&NodeId> {
466        self.node_metadata
467            .iter()
468            .enumerate()
469            .filter_map(|(internal_id, meta_opt)| {
470                if let Some(meta) = meta_opt {
471                    if meta.language.as_deref() == Some(language) {
472                        return Some(&self.id_to_path[internal_id]);
473                    }
474                }
475                None
476            })
477            .collect()
478    }
479}
480
481/// Specialized operations for PageRank computation
482impl DependencyGraph {
483    /// Get all nodes with their reverse edge neighbors (for PageRank iteration)
484    pub fn pagerank_iterator(&self) -> impl Iterator<Item = (&NodeId, Option<Vec<&NodeId>>)> + '_ {
485        self.path_to_id.iter().map(|(node_path, &internal_id)| {
486            let incoming: Option<Vec<&NodeId>> = if !self.reverse_edges[internal_id].is_empty() {
487                Some(
488                    self.reverse_edges[internal_id]
489                        .iter()
490                        .map(|&source_id| &self.id_to_path[source_id])
491                        .collect(),
492                )
493            } else {
494                Some(Vec::new())
495            };
496            (node_path, incoming)
497        })
498    }
499
500    /// Get dangling nodes (nodes with no outgoing edges)
501    pub fn dangling_nodes(&self) -> Vec<&NodeId> {
502        self.path_to_id
503            .iter()
504            .filter(|(_, &internal_id)| self.forward_edges[internal_id].is_empty())
505            .map(|(node_path, _)| node_path)
506            .collect()
507    }
508
509    /// Get strongly connected components (simplified estimation for statistics)
510    pub fn estimate_scc_count(&self) -> usize {
511        if self.path_to_id.is_empty() {
512            return 0;
513        }
514
515        // Count nodes with both in and out edges (likely in cycles)
516        let potential_scc_nodes = self
517            .path_to_id
518            .iter()
519            .filter(|(_, &internal_id)| {
520                !self.reverse_edges[internal_id].is_empty()
521                    && !self.forward_edges[internal_id].is_empty()
522            })
523            .count();
524
525        // Rough estimate: most SCCs are small, assume average size of 3
526        let estimated_scc = if potential_scc_nodes > 0 {
527            std::cmp::max(1, potential_scc_nodes / 3)
528        } else {
529            0
530        };
531
532        // Add isolated nodes and simple chains
533        let isolated_nodes = self.path_to_id.len() - potential_scc_nodes;
534        estimated_scc + isolated_nodes
535    }
536
537    /// Check if the graph is strongly connected (simplified check)
538    pub fn is_strongly_connected(&self) -> bool {
539        if self.path_to_id.is_empty() {
540            return true;
541        }
542
543        // Simplified check: all nodes have both in and out edges
544        self.path_to_id.iter().all(|(_, &internal_id)| {
545            !self.reverse_edges[internal_id].is_empty()
546                && !self.forward_edges[internal_id].is_empty()
547        })
548    }
549}
550
551/// Concurrent graph operations for performance
552impl DependencyGraph {
553    /// Create a thread-safe concurrent graph for parallel operations
554    pub fn into_concurrent(self) -> ConcurrentDependencyGraph {
555        // Convert Vec<HashSet> to DashMap representation for concurrency
556        let forward_edges = DashMap::new();
557        let reverse_edges = DashMap::new();
558
559        for (internal_id, edge_set) in self.forward_edges.into_iter().enumerate() {
560            forward_edges.insert(internal_id, edge_set);
561        }
562
563        for (internal_id, edge_set) in self.reverse_edges.into_iter().enumerate() {
564            reverse_edges.insert(internal_id, edge_set);
565        }
566
567        ConcurrentDependencyGraph {
568            forward_edges,
569            reverse_edges,
570            path_to_id: DashMap::from_iter(self.path_to_id),
571            id_to_path: RwLock::new(self.id_to_path),
572            node_metadata: RwLock::new(self.node_metadata),
573            stats_cache: RwLock::new(self.stats_cache),
574            next_id: RwLock::new(self.next_id),
575        }
576    }
577}
578
579/// Thread-safe concurrent version of DependencyGraph
580#[derive(Debug)]
581pub struct ConcurrentDependencyGraph {
582    forward_edges: DashMap<InternalNodeId, HashSet<InternalNodeId>>,
583    reverse_edges: DashMap<InternalNodeId, HashSet<InternalNodeId>>,
584    path_to_id: DashMap<NodeId, InternalNodeId>,
585    id_to_path: RwLock<Vec<NodeId>>,
586    node_metadata: RwLock<Vec<Option<NodeMetadata>>>,
587    stats_cache: RwLock<Option<GraphStatistics>>,
588    next_id: RwLock<InternalNodeId>,
589}
590
591impl ConcurrentDependencyGraph {
592    /// Add a node concurrently
593    pub fn add_node(&self, node_id: NodeId) -> Result<InternalNodeId> {
594        // Check if node already exists
595        if let Some(existing_id) = self.path_to_id.get(&node_id) {
596            return Ok(*existing_id);
597        }
598
599        let internal_id = {
600            let mut next_id = self.next_id.write();
601            let id = *next_id;
602            *next_id += 1;
603            id
604        };
605
606        // Add to mappings
607        self.path_to_id.insert(node_id.clone(), internal_id);
608        {
609            let mut id_to_path = self.id_to_path.write();
610            id_to_path.push(node_id.clone());
611        }
612
613        // Initialize empty adjacency lists
614        self.forward_edges.insert(internal_id, HashSet::new());
615        self.reverse_edges.insert(internal_id, HashSet::new());
616
617        // Add default metadata
618        {
619            let mut metadata = self.node_metadata.write();
620            metadata.push(Some(NodeMetadata::new(node_id)));
621        }
622
623        // Invalidate stats cache
624        *self.stats_cache.write() = None;
625
626        Ok(internal_id)
627    }
628
629    /// Get in-degree concurrently
630    pub fn in_degree(&self, node_id: &NodeId) -> usize {
631        match self.path_to_id.get(node_id) {
632            Some(internal_id) => self
633                .reverse_edges
634                .get(&internal_id)
635                .map_or(0, |entry| entry.len()),
636            None => 0,
637        }
638    }
639
640    /// Get out-degree concurrently  
641    pub fn out_degree(&self, node_id: &NodeId) -> usize {
642        match self.path_to_id.get(node_id) {
643            Some(internal_id) => self
644                .forward_edges
645                .get(&internal_id)
646                .map_or(0, |entry| entry.len()),
647            None => 0,
648        }
649    }
650
651    /// Convert back to single-threaded graph
652    pub fn into_sequential(self) -> DependencyGraph {
653        let id_to_path = self.id_to_path.into_inner();
654        let node_metadata = self.node_metadata.into_inner();
655        let stats_cache = self.stats_cache.into_inner();
656        let next_id = self.next_id.into_inner();
657
658        // Convert DashMap back to Vec
659        let mut forward_edges = vec![HashSet::new(); next_id];
660        let mut reverse_edges = vec![HashSet::new(); next_id];
661
662        for (internal_id, edge_set) in self.forward_edges.into_iter() {
663            if internal_id < forward_edges.len() {
664                forward_edges[internal_id] = edge_set;
665            }
666        }
667
668        for (internal_id, edge_set) in self.reverse_edges.into_iter() {
669            if internal_id < reverse_edges.len() {
670                reverse_edges[internal_id] = edge_set;
671            }
672        }
673
674        DependencyGraph {
675            forward_edges,
676            reverse_edges,
677            path_to_id: self.path_to_id.into_iter().collect(),
678            id_to_path,
679            node_metadata,
680            stats_cache,
681            next_id,
682        }
683    }
684}
685
686/// Degree information for a node
687#[derive(Debug, Clone, PartialEq)]
688pub struct DegreeInfo {
689    pub node_id: NodeId,
690    pub in_degree: usize,
691    pub out_degree: usize,
692    pub total_degree: usize,
693}
694
695/// Graph statistics computed lazily and cached
696#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
697pub struct GraphStatistics {
698    /// Total number of nodes
699    pub total_nodes: usize,
700    /// Total number of edges
701    pub total_edges: usize,
702    /// Average in-degree
703    pub in_degree_avg: f64,
704    /// Maximum in-degree
705    pub in_degree_max: usize,
706    /// Average out-degree
707    pub out_degree_avg: f64,
708    /// Maximum out-degree
709    pub out_degree_max: usize,
710    /// Estimated number of strongly connected components
711    pub strongly_connected_components: usize,
712    /// Graph density (actual_edges / possible_edges)
713    pub graph_density: f64,
714    /// Number of isolated nodes (no edges)
715    pub isolated_nodes: usize,
716    /// Number of dangling nodes (no outgoing edges)
717    pub dangling_nodes: usize,
718}
719
720impl GraphStatistics {
721    /// Create empty statistics
722    pub fn empty() -> Self {
723        Self {
724            total_nodes: 0,
725            total_edges: 0,
726            in_degree_avg: 0.0,
727            in_degree_max: 0,
728            out_degree_avg: 0.0,
729            out_degree_max: 0,
730            strongly_connected_components: 0,
731            graph_density: 0.0,
732            isolated_nodes: 0,
733            dangling_nodes: 0,
734        }
735    }
736}
737
738/// Utility functions for node classification
739fn detect_language_from_extension(file_path: &str) -> Option<String> {
740    let ext = std::path::Path::new(file_path)
741        .extension()
742        .and_then(|s| s.to_str())
743        .map(|s| s.to_lowercase())?;
744
745    match ext.as_str() {
746        "py" => Some("python".to_string()),
747        "js" | "jsx" | "mjs" => Some("javascript".to_string()),
748        "ts" | "tsx" => Some("typescript".to_string()),
749        "rs" => Some("rust".to_string()),
750        "go" => Some("go".to_string()),
751        "java" | "kt" => Some("java".to_string()),
752        "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
753        "c" => Some("c".to_string()),
754        "rb" => Some("ruby".to_string()),
755        "php" => Some("php".to_string()),
756        "cs" => Some("csharp".to_string()),
757        "swift" => Some("swift".to_string()),
758        _ => None,
759    }
760}
761
762fn is_entrypoint_file(file_path: &str) -> bool {
763    let file_name = std::path::Path::new(file_path)
764        .file_name()
765        .and_then(|s| s.to_str())
766        .unwrap_or("")
767        .to_lowercase();
768
769    matches!(
770        file_name.as_str(),
771        "main.py"
772            | "main.rs"
773            | "main.go"
774            | "main.js"
775            | "main.ts"
776            | "index.py"
777            | "index.rs"
778            | "index.go"
779            | "index.js"
780            | "index.ts"
781            | "app.py"
782            | "app.rs"
783            | "app.go"
784            | "app.js"
785            | "app.ts"
786            | "server.py"
787            | "server.rs"
788            | "server.go"
789            | "server.js"
790            | "server.ts"
791            | "lib.rs"
792            | "__init__.py"
793    )
794}
795
796fn is_test_file(file_path: &str) -> bool {
797    let path_lower = file_path.to_lowercase();
798    path_lower.contains("test")
799        || path_lower.contains("spec")
800        || path_lower.contains("__tests__")
801        || path_lower.ends_with("_test.py")
802        || path_lower.ends_with("_test.rs")
803        || path_lower.ends_with("_test.go")
804        || path_lower.ends_with(".test.js")
805        || path_lower.ends_with(".test.ts")
806        || path_lower.ends_with("_spec.rb")
807}
808
809impl Default for DependencyGraph {
810    fn default() -> Self {
811        Self::new()
812    }
813}
814
815#[cfg(test)]
816mod tests {
817    use super::*;
818
819    #[test]
820    fn test_graph_creation() {
821        let graph = DependencyGraph::new();
822        assert_eq!(graph.node_count(), 0);
823        assert_eq!(graph.edge_count(), 0);
824    }
825
826    #[test]
827    fn test_node_operations() {
828        let mut graph = DependencyGraph::new();
829
830        // Add nodes
831        graph.add_node("main.py".to_string()).unwrap();
832        graph.add_node("utils.py".to_string()).unwrap();
833
834        assert_eq!(graph.node_count(), 2);
835        assert!(graph.contains_node(&"main.py".to_string()));
836        assert!(graph.contains_node(&"utils.py".to_string()));
837
838        // Remove node
839        let removed = graph.remove_node(&"utils.py".to_string()).unwrap();
840        assert!(removed);
841        assert_eq!(graph.node_count(), 1);
842        assert!(!graph.contains_node(&"utils.py".to_string()));
843    }
844
845    #[test]
846    fn test_edge_operations() {
847        let mut graph = DependencyGraph::new();
848
849        // Add edge (automatically creates nodes)
850        graph
851            .add_edge("main.py".to_string(), "utils.py".to_string())
852            .unwrap();
853
854        assert_eq!(graph.node_count(), 2);
855        assert_eq!(graph.edge_count(), 1);
856        assert!(graph.contains_edge(&"main.py".to_string(), &"utils.py".to_string()));
857
858        // Check degrees
859        assert_eq!(graph.out_degree(&"main.py".to_string()), 1);
860        assert_eq!(graph.in_degree(&"utils.py".to_string()), 1);
861        assert_eq!(graph.in_degree(&"main.py".to_string()), 0);
862        assert_eq!(graph.out_degree(&"utils.py".to_string()), 0);
863    }
864
865    #[test]
866    fn test_multiple_edges() {
867        let mut graph = DependencyGraph::new();
868
869        let edges = vec![
870            ("main.py".to_string(), "utils.py".to_string()),
871            ("main.py".to_string(), "config.py".to_string()),
872            ("utils.py".to_string(), "config.py".to_string()),
873        ];
874
875        graph.add_edges(&edges).unwrap();
876
877        assert_eq!(graph.node_count(), 3);
878        assert_eq!(graph.edge_count(), 3);
879
880        // main.py should have out-degree 2
881        assert_eq!(graph.out_degree(&"main.py".to_string()), 2);
882
883        // config.py should have in-degree 2
884        assert_eq!(graph.in_degree(&"config.py".to_string()), 2);
885    }
886
887    #[test]
888    fn test_node_metadata() {
889        let mut graph = DependencyGraph::new();
890
891        let metadata = NodeMetadata::new("main.py".to_string()).with_size(1024);
892        graph
893            .add_node_with_metadata("main.py".to_string(), metadata)
894            .unwrap();
895
896        let retrieved = graph.node_metadata(&"main.py".to_string()).unwrap();
897        assert_eq!(retrieved.file_path, "main.py");
898        assert_eq!(retrieved.language, Some("python".to_string()));
899        assert!(retrieved.is_entrypoint);
900        assert!(!retrieved.is_test);
901        assert_eq!(retrieved.size_bytes, 1024);
902    }
903
904    #[test]
905    fn test_language_detection() {
906        assert_eq!(
907            detect_language_from_extension("main.py"),
908            Some("python".to_string())
909        );
910        assert_eq!(
911            detect_language_from_extension("app.js"),
912            Some("javascript".to_string())
913        );
914        assert_eq!(
915            detect_language_from_extension("lib.rs"),
916            Some("rust".to_string())
917        );
918        assert_eq!(
919            detect_language_from_extension("server.go"),
920            Some("go".to_string())
921        );
922        assert_eq!(
923            detect_language_from_extension("component.tsx"),
924            Some("typescript".to_string())
925        );
926        assert_eq!(detect_language_from_extension("unknown.xyz"), None);
927    }
928
929    #[test]
930    fn test_file_classification() {
931        assert!(is_entrypoint_file("main.py"));
932        assert!(is_entrypoint_file("index.js"));
933        assert!(is_entrypoint_file("lib.rs"));
934        assert!(!is_entrypoint_file("utils.py"));
935
936        assert!(is_test_file("test_utils.py"));
937        assert!(is_test_file("utils.test.js"));
938        assert!(is_test_file("integration_test.rs"));
939        assert!(!is_test_file("utils.py"));
940    }
941
942    #[test]
943    fn test_pagerank_iterator() {
944        let mut graph = DependencyGraph::new();
945
946        // Build a small graph: A -> B -> C, C -> A (creates cycle)
947        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
948        graph.add_edge("B".to_string(), "C".to_string()).unwrap();
949        graph.add_edge("C".to_string(), "A".to_string()).unwrap();
950
951        let pagerank_data: Vec<_> = graph.pagerank_iterator().collect();
952        assert_eq!(pagerank_data.len(), 3);
953
954        // Each node should have incoming edges (reverse edges)
955        for (node, reverse_edges) in pagerank_data {
956            assert!(reverse_edges.is_some());
957            assert!(!reverse_edges.unwrap().is_empty());
958        }
959    }
960
961    #[test]
962    fn test_dangling_nodes() {
963        let mut graph = DependencyGraph::new();
964
965        // A -> B, C is isolated, D has no outgoing edges
966        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
967        graph.add_node("C".to_string()).unwrap();
968        graph.add_edge("B".to_string(), "D".to_string()).unwrap();
969
970        let dangling = graph.dangling_nodes();
971
972        // C and D should be dangling (no outgoing edges)
973        assert_eq!(dangling.len(), 2);
974        assert!(dangling.contains(&&"C".to_string()));
975        assert!(dangling.contains(&&"D".to_string()));
976    }
977
978    #[test]
979    fn test_concurrent_graph() {
980        let mut graph = DependencyGraph::new();
981        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
982        graph.add_edge("B".to_string(), "C".to_string()).unwrap();
983
984        let concurrent = graph.into_concurrent();
985
986        // Test concurrent operations
987        assert_eq!(concurrent.in_degree(&"B".to_string()), 1);
988        assert_eq!(concurrent.out_degree(&"B".to_string()), 1);
989
990        // Add node concurrently
991        concurrent.add_node("D".to_string()).unwrap();
992
993        // Convert back to sequential
994        let sequential = concurrent.into_sequential();
995        assert_eq!(sequential.node_count(), 4); // A, B, C, D
996    }
997
998    #[test]
999    fn test_scc_estimation() {
1000        let mut graph = DependencyGraph::new();
1001
1002        // Create a graph with potential cycles: A <-> B, C -> D
1003        graph.add_edge("A".to_string(), "B".to_string()).unwrap();
1004        graph.add_edge("B".to_string(), "A".to_string()).unwrap();
1005        graph.add_edge("C".to_string(), "D".to_string()).unwrap();
1006        graph.add_node("E".to_string()).unwrap(); // Isolated
1007
1008        let scc_count = graph.estimate_scc_count();
1009
1010        // Should estimate: 1 SCC (A,B have both in/out), plus isolated/chain nodes (C,D,E)
1011        assert!(scc_count >= 3); // At least C, D, E as separate components
1012    }
1013
1014    #[test]
1015    fn test_nodes_by_language() {
1016        let mut graph = DependencyGraph::new();
1017
1018        graph.add_node("main.py".to_string()).unwrap();
1019        graph.add_node("utils.py".to_string()).unwrap();
1020        graph.add_node("app.js".to_string()).unwrap();
1021        graph.add_node("lib.rs".to_string()).unwrap();
1022
1023        let python_nodes = graph.nodes_by_language("python");
1024        let js_nodes = graph.nodes_by_language("javascript");
1025        let rust_nodes = graph.nodes_by_language("rust");
1026
1027        assert_eq!(python_nodes.len(), 2);
1028        assert_eq!(js_nodes.len(), 1);
1029        assert_eq!(rust_nodes.len(), 1);
1030
1031        assert!(python_nodes.contains(&&"main.py".to_string()));
1032        assert!(python_nodes.contains(&&"utils.py".to_string()));
1033    }
1034}