Skip to main content

tsz_core/
module_graph.rs

1//! Module Dependency Graph
2//!
3//! This module provides a graph data structure for tracking module dependencies,
4//! enabling:
5//! - Circular dependency detection
6//! - Topological sorting for build ordering
7//! - Dependency analysis and tree-shaking support
8//! - Change impact analysis
9
10use crate::exports::ExportTracker;
11use crate::imports::ImportTracker;
12use crate::module_resolver::{ResolutionFailure, ResolvedModule};
13use rustc_hash::{FxHashMap, FxHashSet};
14use std::collections::VecDeque;
15use std::path::{Path, PathBuf};
16
17/// Unique identifier for a module in the graph
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub struct ModuleId(pub u32);
20
21impl ModuleId {
22    pub const NONE: Self = Self(u32::MAX);
23
24    pub const fn is_none(&self) -> bool {
25        self.0 == u32::MAX
26    }
27}
28
29/// Information about a module in the dependency graph
30#[derive(Debug)]
31pub struct ModuleInfo {
32    /// Unique identifier
33    pub id: ModuleId,
34    /// Resolved file path
35    pub path: PathBuf,
36    /// Original specifier (for external modules)
37    pub specifier: Option<String>,
38    /// Whether this is an external module (from `node_modules`)
39    pub is_external: bool,
40    /// Import tracking for this module
41    pub imports: ImportTracker,
42    /// Export tracking for this module
43    pub exports: ExportTracker,
44    /// Modules this module imports from
45    pub dependencies: FxHashSet<ModuleId>,
46    /// Modules that import this module
47    pub dependents: FxHashSet<ModuleId>,
48    /// Whether this module has been fully processed
49    pub is_processed: bool,
50    /// Resolution errors encountered
51    pub resolution_errors: Vec<ResolutionFailure>,
52}
53
54impl ModuleInfo {
55    /// Create a new module info
56    pub fn new(id: ModuleId, path: PathBuf) -> Self {
57        Self {
58            id,
59            path,
60            specifier: None,
61            is_external: false,
62            imports: ImportTracker::new(),
63            exports: ExportTracker::new(),
64            dependencies: FxHashSet::default(),
65            dependents: FxHashSet::default(),
66            is_processed: false,
67            resolution_errors: Vec::new(),
68        }
69    }
70
71    /// Mark as external module
72    pub fn external(mut self, specifier: String) -> Self {
73        self.is_external = true;
74        self.specifier = Some(specifier);
75        self
76    }
77}
78
79/// A dependency edge in the module graph
80#[derive(Debug, Clone)]
81pub struct DependencyEdge {
82    /// Source module (the importer)
83    pub from: ModuleId,
84    /// Target module (the importee)
85    pub to: ModuleId,
86    /// Import specifier used
87    pub specifier: String,
88    /// Whether this is a type-only import
89    pub is_type_only: bool,
90    /// Whether this is a dynamic import
91    pub is_dynamic: bool,
92    /// Whether this is a side-effect import
93    pub is_side_effect: bool,
94}
95
96/// Circular dependency information
97#[derive(Debug, Clone)]
98pub struct CircularDependency {
99    /// Modules forming the cycle
100    pub cycle: Vec<ModuleId>,
101    /// File paths for display
102    pub paths: Vec<PathBuf>,
103}
104
105/// Module dependency graph
106#[derive(Debug)]
107pub struct ModuleGraph {
108    /// All modules in the graph
109    modules: FxHashMap<ModuleId, ModuleInfo>,
110    /// Path to module ID mapping
111    path_to_id: FxHashMap<PathBuf, ModuleId>,
112    /// Next available module ID
113    next_id: u32,
114    /// All dependency edges
115    edges: Vec<DependencyEdge>,
116    /// Entry points
117    entry_points: Vec<ModuleId>,
118    /// Detected circular dependencies
119    circular_dependencies: Vec<CircularDependency>,
120}
121
122impl ModuleGraph {
123    /// Create a new empty module graph
124    pub fn new() -> Self {
125        Self {
126            modules: FxHashMap::default(),
127            path_to_id: FxHashMap::default(),
128            next_id: 0,
129            edges: Vec::new(),
130            entry_points: Vec::new(),
131            circular_dependencies: Vec::new(),
132        }
133    }
134
135    /// Add or get a module by path
136    pub fn add_module(&mut self, path: &Path) -> ModuleId {
137        // Canonicalize path
138        let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
139
140        if let Some(&id) = self.path_to_id.get(&canonical) {
141            return id;
142        }
143
144        let id = ModuleId(self.next_id);
145        self.next_id += 1;
146
147        let info = ModuleInfo::new(id, canonical.clone());
148        self.modules.insert(id, info);
149        self.path_to_id.insert(canonical, id);
150
151        id
152    }
153
154    /// Add an external module
155    pub fn add_external_module(&mut self, specifier: &str, resolved: &ResolvedModule) -> ModuleId {
156        let canonical = resolved
157            .resolved_path
158            .canonicalize()
159            .unwrap_or_else(|_| resolved.resolved_path.clone());
160
161        if let Some(&id) = self.path_to_id.get(&canonical) {
162            return id;
163        }
164
165        let id = ModuleId(self.next_id);
166        self.next_id += 1;
167
168        let info = ModuleInfo::new(id, canonical.clone()).external(specifier.to_string());
169        self.modules.insert(id, info);
170        self.path_to_id.insert(canonical, id);
171
172        id
173    }
174
175    /// Get a module by ID
176    pub fn get_module(&self, id: ModuleId) -> Option<&ModuleInfo> {
177        self.modules.get(&id)
178    }
179
180    /// Get a mutable module by ID
181    pub fn get_module_mut(&mut self, id: ModuleId) -> Option<&mut ModuleInfo> {
182        self.modules.get_mut(&id)
183    }
184
185    /// Get a module by path
186    pub fn get_module_by_path(&self, path: &Path) -> Option<&ModuleInfo> {
187        let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
188
189        self.path_to_id
190            .get(&canonical)
191            .and_then(|id| self.modules.get(id))
192    }
193
194    /// Get module ID by path
195    pub fn get_module_id(&self, path: &Path) -> Option<ModuleId> {
196        let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
197
198        self.path_to_id.get(&canonical).copied()
199    }
200
201    /// Add an entry point
202    pub fn add_entry_point(&mut self, module_id: ModuleId) {
203        if !self.entry_points.contains(&module_id) {
204            self.entry_points.push(module_id);
205        }
206    }
207
208    /// Add a dependency edge
209    pub fn add_dependency(&mut self, edge: DependencyEdge) {
210        // Update module dependencies
211        if let Some(from_module) = self.modules.get_mut(&edge.from) {
212            from_module.dependencies.insert(edge.to);
213        }
214
215        // Update module dependents
216        if let Some(to_module) = self.modules.get_mut(&edge.to) {
217            to_module.dependents.insert(edge.from);
218        }
219
220        self.edges.push(edge);
221    }
222
223    /// Add a dependency between two modules
224    pub fn add_simple_dependency(&mut self, from: ModuleId, to: ModuleId, specifier: &str) {
225        self.add_dependency(DependencyEdge {
226            from,
227            to,
228            specifier: specifier.to_string(),
229            is_type_only: false,
230            is_dynamic: false,
231            is_side_effect: false,
232        });
233    }
234
235    /// Detect circular dependencies using Tarjan's algorithm
236    pub fn detect_circular_dependencies(&mut self) -> &[CircularDependency] {
237        self.circular_dependencies.clear();
238
239        let mut index_counter = 0u32;
240        let mut stack: Vec<ModuleId> = Vec::new();
241        let mut on_stack: FxHashSet<ModuleId> = FxHashSet::default();
242        let mut indices: FxHashMap<ModuleId, u32> = FxHashMap::default();
243        let mut lowlinks: FxHashMap<ModuleId, u32> = FxHashMap::default();
244
245        let module_ids: Vec<ModuleId> = self.modules.keys().copied().collect();
246
247        for module_id in module_ids {
248            if !indices.contains_key(&module_id) {
249                self.strongconnect(
250                    module_id,
251                    &mut index_counter,
252                    &mut stack,
253                    &mut on_stack,
254                    &mut indices,
255                    &mut lowlinks,
256                );
257            }
258        }
259
260        &self.circular_dependencies
261    }
262
263    /// Tarjan's strongconnect helper
264    fn strongconnect(
265        &mut self,
266        v: ModuleId,
267        index_counter: &mut u32,
268        stack: &mut Vec<ModuleId>,
269        on_stack: &mut FxHashSet<ModuleId>,
270        indices: &mut FxHashMap<ModuleId, u32>,
271        lowlinks: &mut FxHashMap<ModuleId, u32>,
272    ) {
273        indices.insert(v, *index_counter);
274        lowlinks.insert(v, *index_counter);
275        *index_counter += 1;
276
277        stack.push(v);
278        on_stack.insert(v);
279
280        // Get dependencies
281        let deps: Vec<ModuleId> = self
282            .modules
283            .get(&v)
284            .map(|m| m.dependencies.iter().copied().collect())
285            .unwrap_or_default();
286
287        for w in deps {
288            if !indices.contains_key(&w) {
289                self.strongconnect(w, index_counter, stack, on_stack, indices, lowlinks);
290                let w_lowlink = *lowlinks.get(&w).unwrap();
291                let v_lowlink = lowlinks.get_mut(&v).unwrap();
292                *v_lowlink = (*v_lowlink).min(w_lowlink);
293            } else if on_stack.contains(&w) {
294                let w_index = *indices.get(&w).unwrap();
295                let v_lowlink = lowlinks.get_mut(&v).unwrap();
296                *v_lowlink = (*v_lowlink).min(w_index);
297            }
298        }
299
300        // Root of SCC
301        if lowlinks.get(&v) == indices.get(&v) {
302            let mut scc = Vec::new();
303            loop {
304                let w = stack.pop().unwrap();
305                on_stack.remove(&w);
306                scc.push(w);
307                if w == v {
308                    break;
309                }
310            }
311
312            // Only report cycles (SCC with more than one node or self-loop)
313            if scc.len() > 1 {
314                let paths: Vec<PathBuf> = scc
315                    .iter()
316                    .filter_map(|id| self.modules.get(id).map(|m| m.path.clone()))
317                    .collect();
318
319                self.circular_dependencies
320                    .push(CircularDependency { cycle: scc, paths });
321            } else if scc.len() == 1 {
322                // Check for self-loop
323                let id = scc[0];
324                if let Some(m) = self.modules.get(&id)
325                    && m.dependencies.contains(&id)
326                {
327                    self.circular_dependencies.push(CircularDependency {
328                        cycle: scc,
329                        paths: vec![m.path.clone()],
330                    });
331                }
332            }
333        }
334    }
335
336    /// Get topological sort of modules (for build ordering)
337    ///
338    /// Returns modules in dependency order: modules with no dependencies come first,
339    /// followed by modules that depend only on already-listed modules.
340    /// This is the correct order for building/processing modules.
341    pub fn topological_sort(&self) -> Result<Vec<ModuleId>, CircularDependencyError> {
342        let mut result = Vec::new();
343        let mut visited = FxHashSet::default();
344        let mut temp_visited = FxHashSet::default();
345        let mut cycle_path = Vec::new();
346
347        for &id in self.modules.keys() {
348            if !visited.contains(&id)
349                && !self.visit_topological(
350                    id,
351                    &mut visited,
352                    &mut temp_visited,
353                    &mut result,
354                    &mut cycle_path,
355                )
356            {
357                return Err(CircularDependencyError { cycle: cycle_path });
358            }
359        }
360
361        // Post-order DFS naturally produces dependencies before dependents,
362        // which is the correct build order (no reverse needed)
363        Ok(result)
364    }
365
366    /// DFS helper for topological sort
367    fn visit_topological(
368        &self,
369        id: ModuleId,
370        visited: &mut FxHashSet<ModuleId>,
371        temp_visited: &mut FxHashSet<ModuleId>,
372        result: &mut Vec<ModuleId>,
373        cycle_path: &mut Vec<ModuleId>,
374    ) -> bool {
375        if temp_visited.contains(&id) {
376            // Cycle detected
377            cycle_path.push(id);
378            return false;
379        }
380
381        if visited.contains(&id) {
382            return true;
383        }
384
385        temp_visited.insert(id);
386
387        if let Some(module) = self.modules.get(&id) {
388            for &dep in &module.dependencies {
389                if !self.visit_topological(dep, visited, temp_visited, result, cycle_path) {
390                    cycle_path.push(id);
391                    return false;
392                }
393            }
394        }
395
396        temp_visited.remove(&id);
397        visited.insert(id);
398        result.push(id);
399
400        true
401    }
402
403    /// Get all modules that depend on a given module (transitive)
404    pub fn get_dependents(&self, id: ModuleId) -> FxHashSet<ModuleId> {
405        let mut result = FxHashSet::default();
406        let mut queue = VecDeque::new();
407
408        if let Some(module) = self.modules.get(&id) {
409            for &dep in &module.dependents {
410                queue.push_back(dep);
411            }
412        }
413
414        while let Some(current) = queue.pop_front() {
415            if result.insert(current)
416                && let Some(module) = self.modules.get(&current)
417            {
418                for &dep in &module.dependents {
419                    if !result.contains(&dep) {
420                        queue.push_back(dep);
421                    }
422                }
423            }
424        }
425
426        result
427    }
428
429    /// Get all dependencies of a module (transitive)
430    pub fn get_dependencies(&self, id: ModuleId) -> FxHashSet<ModuleId> {
431        let mut result = FxHashSet::default();
432        let mut queue = VecDeque::new();
433
434        if let Some(module) = self.modules.get(&id) {
435            for &dep in &module.dependencies {
436                queue.push_back(dep);
437            }
438        }
439
440        while let Some(current) = queue.pop_front() {
441            if result.insert(current)
442                && let Some(module) = self.modules.get(&current)
443            {
444                for &dep in &module.dependencies {
445                    if !result.contains(&dep) {
446                        queue.push_back(dep);
447                    }
448                }
449            }
450        }
451
452        result
453    }
454
455    /// Check if a module depends on another (directly or transitively)
456    pub fn depends_on(&self, from: ModuleId, to: ModuleId) -> bool {
457        self.get_dependencies(from).contains(&to)
458    }
459
460    /// Get statistics about the module graph
461    pub fn stats(&self) -> ModuleGraphStats {
462        let mut internal_modules = 0;
463        let mut external_modules = 0;
464        let mut total_dependencies = 0;
465        let mut max_depth = 0;
466
467        for module in self.modules.values() {
468            if module.is_external {
469                external_modules += 1;
470            } else {
471                internal_modules += 1;
472            }
473            total_dependencies += module.dependencies.len();
474        }
475
476        // Calculate max depth from entry points
477        for &entry in &self.entry_points {
478            let depth = self.calculate_depth(entry, &mut FxHashSet::default());
479            max_depth = max_depth.max(depth);
480        }
481
482        ModuleGraphStats {
483            total_modules: self.modules.len(),
484            internal_modules,
485            external_modules,
486            total_edges: self.edges.len(),
487            entry_points: self.entry_points.len(),
488            circular_dependencies: self.circular_dependencies.len(),
489            max_depth,
490            average_dependencies: if self.modules.is_empty() {
491                0.0
492            } else {
493                total_dependencies as f64 / self.modules.len() as f64
494            },
495        }
496    }
497
498    /// Calculate depth of module from entry points
499    fn calculate_depth(&self, id: ModuleId, visited: &mut FxHashSet<ModuleId>) -> usize {
500        if visited.contains(&id) {
501            return 0;
502        }
503        visited.insert(id);
504
505        let mut max_child_depth = 0;
506        if let Some(module) = self.modules.get(&id) {
507            for &dep in &module.dependencies {
508                let child_depth = self.calculate_depth(dep, visited);
509                max_child_depth = max_child_depth.max(child_depth);
510            }
511        }
512
513        max_child_depth + 1
514    }
515
516    /// Get all modules in the graph
517    pub fn modules(&self) -> impl Iterator<Item = &ModuleInfo> {
518        self.modules.values()
519    }
520
521    /// Get number of modules
522    pub fn len(&self) -> usize {
523        self.modules.len()
524    }
525
526    /// Check if graph is empty
527    pub fn is_empty(&self) -> bool {
528        self.modules.is_empty()
529    }
530
531    /// Get all edges
532    pub fn edges(&self) -> &[DependencyEdge] {
533        &self.edges
534    }
535
536    /// Get entry points
537    pub fn entry_points(&self) -> &[ModuleId] {
538        &self.entry_points
539    }
540
541    /// Get circular dependencies
542    pub fn circular_deps(&self) -> &[CircularDependency] {
543        &self.circular_dependencies
544    }
545
546    /// Clear the graph
547    pub fn clear(&mut self) {
548        self.modules.clear();
549        self.path_to_id.clear();
550        self.next_id = 0;
551        self.edges.clear();
552        self.entry_points.clear();
553        self.circular_dependencies.clear();
554    }
555}
556
557impl Default for ModuleGraph {
558    fn default() -> Self {
559        Self::new()
560    }
561}
562
563/// Error returned when topological sort fails due to circular dependency
564#[derive(Debug)]
565pub struct CircularDependencyError {
566    pub cycle: Vec<ModuleId>,
567}
568
569impl std::fmt::Display for CircularDependencyError {
570    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
571        write!(
572            f,
573            "Circular dependency detected involving {} modules",
574            self.cycle.len()
575        )
576    }
577}
578
579impl std::error::Error for CircularDependencyError {}
580
581/// Statistics about the module graph
582#[derive(Debug, Clone, Default)]
583pub struct ModuleGraphStats {
584    pub total_modules: usize,
585    pub internal_modules: usize,
586    pub external_modules: usize,
587    pub total_edges: usize,
588    pub entry_points: usize,
589    pub circular_dependencies: usize,
590    pub max_depth: usize,
591    pub average_dependencies: f64,
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597    use std::path::PathBuf;
598
599    fn create_test_path(name: &str) -> PathBuf {
600        // Use a temp directory structure for testing
601        PathBuf::from(format!("/tmp/test_modules/{name}"))
602    }
603
604    #[test]
605    fn test_module_graph_basic() {
606        let mut graph = ModuleGraph::new();
607
608        let a = graph.add_module(&create_test_path("a.ts"));
609        let b = graph.add_module(&create_test_path("b.ts"));
610
611        assert_ne!(a, b);
612        assert_eq!(graph.len(), 2);
613    }
614
615    #[test]
616    fn test_module_graph_dedup() {
617        let mut graph = ModuleGraph::new();
618
619        let path = create_test_path("a.ts");
620        let a1 = graph.add_module(&path);
621        let a2 = graph.add_module(&path);
622
623        assert_eq!(a1, a2);
624        assert_eq!(graph.len(), 1);
625    }
626
627    #[test]
628    fn test_add_dependency() {
629        let mut graph = ModuleGraph::new();
630
631        let a = graph.add_module(&create_test_path("a.ts"));
632        let b = graph.add_module(&create_test_path("b.ts"));
633
634        graph.add_simple_dependency(a, b, "./b");
635
636        let module_a = graph.get_module(a).unwrap();
637        assert!(module_a.dependencies.contains(&b));
638
639        let module_b = graph.get_module(b).unwrap();
640        assert!(module_b.dependents.contains(&a));
641    }
642
643    #[test]
644    fn test_circular_dependency_detection() {
645        let mut graph = ModuleGraph::new();
646
647        let a = graph.add_module(&create_test_path("a.ts"));
648        let b = graph.add_module(&create_test_path("b.ts"));
649        let c = graph.add_module(&create_test_path("c.ts"));
650
651        // Create cycle: a -> b -> c -> a
652        graph.add_simple_dependency(a, b, "./b");
653        graph.add_simple_dependency(b, c, "./c");
654        graph.add_simple_dependency(c, a, "./a");
655
656        let cycles = graph.detect_circular_dependencies();
657        assert!(!cycles.is_empty());
658    }
659
660    #[test]
661    fn test_topological_sort_no_cycles() {
662        let mut graph = ModuleGraph::new();
663
664        let a = graph.add_module(&create_test_path("a.ts"));
665        let b = graph.add_module(&create_test_path("b.ts"));
666        let c = graph.add_module(&create_test_path("c.ts"));
667
668        // a -> b -> c (a depends on b, b depends on c)
669        graph.add_simple_dependency(a, b, "./b");
670        graph.add_simple_dependency(b, c, "./c");
671
672        let sorted = graph.topological_sort().unwrap();
673
674        // All modules should be present
675        assert_eq!(sorted.len(), 3);
676        assert!(sorted.contains(&a));
677        assert!(sorted.contains(&b));
678        assert!(sorted.contains(&c));
679
680        // In a valid topological order, a module appears before its dependents.
681        // Since a depends on b, and b depends on c:
682        // - c should appear before b (c has no dependencies, b depends on c)
683        // - b should appear before a (a depends on b)
684        let pos_a = sorted.iter().position(|&x| x == a).unwrap();
685        let pos_b = sorted.iter().position(|&x| x == b).unwrap();
686        let pos_c = sorted.iter().position(|&x| x == c).unwrap();
687
688        // Verify: dependencies appear before their dependents in topological order
689        // c comes before b (since b depends on c)
690        // b comes before a (since a depends on b)
691        assert!(
692            pos_c < pos_b,
693            "c (pos {pos_c}) should come before b (pos {pos_b}) since b depends on c"
694        );
695        assert!(
696            pos_b < pos_a,
697            "b (pos {pos_b}) should come before a (pos {pos_a}) since a depends on b"
698        );
699    }
700
701    #[test]
702    fn test_topological_sort_with_cycle() {
703        let mut graph = ModuleGraph::new();
704
705        let a = graph.add_module(&create_test_path("a.ts"));
706        let b = graph.add_module(&create_test_path("b.ts"));
707
708        // Create cycle: a -> b -> a
709        graph.add_simple_dependency(a, b, "./b");
710        graph.add_simple_dependency(b, a, "./a");
711
712        let result = graph.topological_sort();
713        assert!(result.is_err());
714    }
715
716    #[test]
717    fn test_get_dependents() {
718        let mut graph = ModuleGraph::new();
719
720        let a = graph.add_module(&create_test_path("a.ts"));
721        let b = graph.add_module(&create_test_path("b.ts"));
722        let c = graph.add_module(&create_test_path("c.ts"));
723
724        // a -> b, c -> b (both depend on b)
725        graph.add_simple_dependency(a, b, "./b");
726        graph.add_simple_dependency(c, b, "./b");
727
728        let dependents = graph.get_dependents(b);
729        assert!(dependents.contains(&a));
730        assert!(dependents.contains(&c));
731        assert!(!dependents.contains(&b));
732    }
733
734    #[test]
735    fn test_get_dependencies() {
736        let mut graph = ModuleGraph::new();
737
738        let a = graph.add_module(&create_test_path("a.ts"));
739        let b = graph.add_module(&create_test_path("b.ts"));
740        let c = graph.add_module(&create_test_path("c.ts"));
741
742        // a -> b -> c
743        graph.add_simple_dependency(a, b, "./b");
744        graph.add_simple_dependency(b, c, "./c");
745
746        let deps = graph.get_dependencies(a);
747        assert!(deps.contains(&b));
748        assert!(deps.contains(&c));
749        assert!(!deps.contains(&a));
750    }
751
752    #[test]
753    fn test_depends_on() {
754        let mut graph = ModuleGraph::new();
755
756        let a = graph.add_module(&create_test_path("a.ts"));
757        let b = graph.add_module(&create_test_path("b.ts"));
758        let c = graph.add_module(&create_test_path("c.ts"));
759
760        graph.add_simple_dependency(a, b, "./b");
761        graph.add_simple_dependency(b, c, "./c");
762
763        assert!(graph.depends_on(a, b));
764        assert!(graph.depends_on(a, c)); // transitive
765        assert!(!graph.depends_on(c, a));
766    }
767
768    #[test]
769    fn test_module_graph_stats() {
770        let mut graph = ModuleGraph::new();
771
772        let a = graph.add_module(&create_test_path("a.ts"));
773        let b = graph.add_module(&create_test_path("b.ts"));
774
775        graph.add_entry_point(a);
776        graph.add_simple_dependency(a, b, "./b");
777
778        let stats = graph.stats();
779        assert_eq!(stats.total_modules, 2);
780        assert_eq!(stats.internal_modules, 2);
781        assert_eq!(stats.total_edges, 1);
782        assert_eq!(stats.entry_points, 1);
783    }
784}