Skip to main content

chainsaw/
walker.rs

1//! Concurrent dependency graph construction.
2//!
3//! Starting from an entry file, discovers all reachable modules by parsing
4//! imports and resolving them against the filesystem in parallel using a
5//! lock-free work queue and rayon thread pool.
6
7use std::collections::HashSet;
8use std::path::{Path, PathBuf};
9use std::sync::Mutex;
10use std::sync::atomic::{AtomicUsize, Ordering};
11
12use crossbeam_queue::SegQueue;
13use dashmap::DashSet;
14use rayon::slice::ParallelSliceMut;
15
16use crate::cache::ParseCache;
17use crate::graph::ModuleGraph;
18use crate::lang::{LanguageSupport, RawImport};
19use crate::vfs::Vfs;
20
21fn is_parseable(path: &Path, extensions: &[&str]) -> bool {
22    path.extension()
23        .and_then(|e| e.to_str())
24        .is_some_and(|ext| extensions.contains(&ext))
25}
26
27/// Result of discovering a single file during concurrent traversal.
28struct FileResult {
29    path: PathBuf,
30    size: u64,
31    /// File modification time captured during read (avoids re-stat in cache insert).
32    mtime_nanos: Option<u128>,
33    package: Option<String>,
34    imports: Vec<(RawImport, Option<PathBuf>)>,
35    unresolvable_dynamic: usize,
36}
37
38struct DiscoverResult {
39    files: Vec<FileResult>,
40    warnings: Vec<String>,
41}
42
43/// Phase 1: Concurrent file discovery using a lock-free work queue.
44/// Returns all discovered files with their parsed imports and resolved paths.
45#[allow(clippy::too_many_lines)]
46fn concurrent_discover(
47    entry: &Path,
48    root: &Path,
49    lang: &dyn LanguageSupport,
50    vfs: &dyn Vfs,
51) -> DiscoverResult {
52    let queue: SegQueue<PathBuf> = SegQueue::new();
53    let seen: DashSet<PathBuf> = DashSet::new();
54    let results: Mutex<Vec<FileResult>> = Mutex::new(Vec::new());
55    let warnings: SegQueue<String> = SegQueue::new();
56    let active = AtomicUsize::new(1); // entry file is active
57    let extensions = lang.extensions();
58
59    queue.push(entry.to_path_buf());
60    seen.insert(entry.to_path_buf());
61
62    rayon::scope(|s| {
63        for _ in 0..rayon::current_num_threads() {
64            s.spawn(|_| {
65                let mut spin_count: u32 = 0;
66                loop {
67                    if let Some(path) = queue.pop() {
68                        spin_count = 0;
69                        let (source, meta) = match vfs.read_with_metadata(&path) {
70                            Ok(r) => r,
71                            Err(e) => {
72                                warnings.push(format!("{}: {e}", path.display()));
73                                active.fetch_sub(1, Ordering::AcqRel);
74                                continue;
75                            }
76                        };
77                        let mtime_nanos = meta.mtime_nanos;
78                        let size = meta.len;
79
80                        let result = match lang.parse(&path, &source) {
81                            Ok(r) => r,
82                            Err(e) => {
83                                warnings.push(e.to_string());
84                                active.fetch_sub(1, Ordering::AcqRel);
85                                continue;
86                            }
87                        };
88                        let package = if path == entry {
89                            None
90                        } else {
91                            lang.package_name(&path)
92                                .or_else(|| lang.workspace_package_name(&path, root))
93                        };
94
95                        // Resolve imports and discover new files
96                        #[allow(clippy::or_fun_call)]
97                        let dir = path.parent().unwrap_or(Path::new("."));
98                        let imports: Vec<(RawImport, Option<PathBuf>)> = result
99                            .imports
100                            .into_iter()
101                            .map(|imp| {
102                                let resolved = lang.resolve(dir, &imp.specifier);
103                                if let Some(ref p) = resolved
104                                    && is_parseable(p, extensions)
105                                    && seen.insert(p.clone())
106                                {
107                                    active.fetch_add(1, Ordering::AcqRel);
108                                    queue.push(p.clone());
109                                }
110                                (imp, resolved)
111                            })
112                            .collect();
113
114                        let file_result = FileResult {
115                            path,
116                            size,
117                            mtime_nanos,
118                            package,
119                            imports,
120                            unresolvable_dynamic: result.unresolvable_dynamic,
121                        };
122                        results.lock().unwrap().push(file_result);
123
124                        if active.fetch_sub(1, Ordering::AcqRel) == 1 {
125                            // This was the last active item; all work is done
126                            return;
127                        }
128                    } else if active.load(Ordering::Acquire) == 0 {
129                        return;
130                    } else if spin_count < 64 {
131                        spin_count += 1;
132                        std::hint::spin_loop();
133                    } else {
134                        spin_count = 0;
135                        std::thread::yield_now();
136                    }
137                }
138            });
139        }
140    });
141
142    let mut files = results.into_inner().unwrap();
143    files.par_sort_unstable_by(|a, b| a.path.cmp(&b.path));
144    let warnings = std::iter::from_fn(|| warnings.pop()).collect();
145    DiscoverResult { files, warnings }
146}
147
148/// Result of building a module graph.
149#[derive(Debug)]
150#[non_exhaustive]
151pub struct BuildResult {
152    pub graph: ModuleGraph,
153    /// Files containing dynamic imports with non-literal arguments, with counts.
154    pub unresolvable_dynamic: Vec<(PathBuf, usize)>,
155    /// Import specifiers that failed to resolve (for cache invalidation).
156    pub unresolved_specifiers: Vec<String>,
157    /// Warnings from files that could not be opened, read, or parsed.
158    pub file_warnings: Vec<String>,
159}
160
161/// Build a complete `ModuleGraph` from the given entry point.
162/// Phase 1 concurrently discovers files using a lock-free work queue.
163/// Phase 2 serially constructs the graph from sorted discovery results.
164pub fn build_graph(
165    entry: &Path,
166    root: &Path,
167    lang: &dyn LanguageSupport,
168    cache: &mut ParseCache,
169    vfs: &dyn Vfs,
170) -> BuildResult {
171    // Phase 1: Concurrent discovery (lock-free work queue)
172    let discovered = concurrent_discover(entry, root, lang, vfs);
173    let file_results = discovered.files;
174
175    // Phase 2: Serial graph construction from sorted results
176    let mut graph = ModuleGraph::new();
177    let mut unresolvable_files: Vec<(PathBuf, usize)> = Vec::new();
178    let mut unresolved: HashSet<String> = HashSet::new();
179
180    // First pass: add all modules (deterministic order from sorted results)
181    for fr in &file_results {
182        graph.add_module(fr.path.clone(), fr.size, fr.package.clone());
183    }
184
185    // Second pass: add edges, collect diagnostics, and populate parse cache.
186    // Consumes file_results by value to avoid redundant clones.
187    for fr in file_results {
188        let source_id = graph.path_to_id[&fr.path];
189
190        if fr.unresolvable_dynamic > 0 {
191            unresolvable_files.push((fr.path.clone(), fr.unresolvable_dynamic));
192        }
193
194        // Separate imports into raw imports and resolved paths.
195        // Edge processing borrows from these; cache takes ownership.
196        let (raw_imports, resolved_paths): (Vec<RawImport>, Vec<Option<PathBuf>>) =
197            fr.imports.into_iter().unzip();
198
199        for (raw_import, resolved_path) in raw_imports.iter().zip(resolved_paths.iter()) {
200            match resolved_path {
201                Some(p) => {
202                    if let Some(&target_id) = graph.path_to_id.get(p) {
203                        graph.add_edge(
204                            source_id,
205                            target_id,
206                            raw_import.kind,
207                            &raw_import.specifier,
208                        );
209                    }
210                    // Target not in graph = unparseable leaf (e.g. .json, .css)
211                    // Add it as a leaf module
212                    else {
213                        let size = vfs.metadata(p).map(|m| m.len).unwrap_or(0);
214                        let package = lang
215                            .package_name(p)
216                            .or_else(|| lang.workspace_package_name(p, root));
217                        let target_id = graph.add_module(p.clone(), size, package);
218                        graph.add_edge(
219                            source_id,
220                            target_id,
221                            raw_import.kind,
222                            &raw_import.specifier,
223                        );
224                    }
225                }
226                None => {
227                    unresolved.insert(raw_import.specifier.clone());
228                }
229            }
230        }
231
232        let result = crate::lang::ParseResult {
233            imports: raw_imports,
234            unresolvable_dynamic: fr.unresolvable_dynamic,
235        };
236        if let Some(mtime) = fr.mtime_nanos {
237            cache.insert(fr.path, fr.size, mtime, result, resolved_paths);
238        }
239    }
240
241    graph.compute_package_info();
242    BuildResult {
243        graph,
244        unresolvable_dynamic: unresolvable_files,
245        unresolved_specifiers: unresolved.into_iter().collect(),
246        file_warnings: discovered.warnings,
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use crate::lang::typescript::TypeScriptSupport;
254    use crate::vfs::OsVfs;
255    use std::fs;
256
257    #[test]
258    fn parse_failure_not_retried() {
259        let tmp = tempfile::tempdir().unwrap();
260        let root = tmp.path().canonicalize().unwrap();
261
262        fs::write(root.join("entry.ts"), r#"import { x } from "./broken";"#).unwrap();
263
264        fs::write(root.join("broken.ts"), [0xFF, 0xFE, 0x00, 0x01]).unwrap();
265
266        let lang = TypeScriptSupport::new(&root);
267        let mut cache = ParseCache::new();
268        let result = build_graph(&root.join("entry.ts"), &root, &lang, &mut cache, &OsVfs);
269        let graph = result.graph;
270
271        let entry_count = graph
272            .path_to_id
273            .keys()
274            .filter(|p| p.file_name().is_some_and(|n| n == "broken.ts"))
275            .count();
276        assert!(
277            entry_count <= 1,
278            "broken.ts should appear at most once, found {entry_count}"
279        );
280    }
281}