Skip to main content

fleetreach_cli/
npm_reach.rs

1//! Build-free npm reachability via a **module import graph** (the spec's R2).
2//!
3//! Unlike the grep heuristic in [`crate::reach`], this resolves *transitive* reachability: it
4//! parses every `require`/`import` specifier in the repo's own source and in each installed
5//! `node_modules` package, builds a name-level package→package graph, and asks whether a
6//! vulnerable package is reachable from the first-party code. A reached package gets a
7//! sound-positive [`ReachVerdict::Reachable`] with a witness import-chain (`your-dep → … →
8//! vuln`), exactly like the Go/govulncheck path.
9//!
10//! **Soundness of the negative.** A `NotReachable` is only emitted under an explicit opt-in
11//! (`prune`) AND only when `node_modules` is present (so the transitive graph is complete).
12//! Even then it is *best-effort sound*: JavaScript can `require(variableExpr)` or autoload via
13//! a framework, which this cannot see, so a `NotReachable` can be wrong for such code — the
14//! flag is the caller's acknowledgement of that risk. To stay safe by default:
15//!
16//! - entry points are **over-approximated** to every first-party file (any file may run), so a
17//!   dependency used only by a test/script is still `Reachable`, never falsely pruned;
18//! - package→package edges are taken from *actual parsed import specifiers* (precise, so the
19//!   prune has teeth) — the documented trade for the dynamic-import blind spot.
20//!
21//! Without `prune`, an unreached package stays [`ReachVerdict::Unknown`].
22
23use std::collections::{BTreeMap, BTreeSet};
24use std::path::Path;
25
26use fleetreach_core::{
27    DepGraph, Ecosystem, FleetReport, Occurrence, ReachVerdict, Reachability, VulnFinding,
28};
29use walkdir::WalkDir;
30
31use crate::config::Config;
32
33/// The synthetic graph root whose edges are the first-party imports, so a witness
34/// `chain_to(pkg)` reads `[(entry), direct-dep, …, pkg]` and the witness is that minus the root.
35const ENTRY: &str = "(entry)";
36
37/// First-party source extensions (the repo's own code).
38const SRC_EXTS: &[&str] = &["js", "mjs", "cjs", "ts", "tsx", "jsx"];
39/// Extensions parsed inside `node_modules` packages (published packages are usually JS).
40const DEP_EXTS: &[&str] = &["js", "mjs", "cjs"];
41
42/// The reachability verdict for one package.
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub enum Reach {
45    /// Reached from first-party code; the witness is the package import-chain
46    /// `[direct-dep, …, package]`.
47    Reachable { witness: Vec<String> },
48    /// `node_modules` was present and no import path reached the package (prune mode only).
49    NotReachable,
50    /// Not decided (no `node_modules`, or unreached without prune).
51    Unknown,
52}
53
54/// Options for [`assess`].
55pub struct Options {
56    /// Emit `NotReachable` for unreached packages (requires `node_modules`). The explicit
57    /// opt-in for the best-effort-sound negative verdict.
58    pub prune: bool,
59}
60
61/// Annotate every npm finding in `report` with import-graph reachability. Repos are analyzed
62/// once and cached. Non-npm findings are left untouched.
63pub fn assess(report: &mut FleetReport, config: &Config, opts: &Options) {
64    let mut cache: BTreeMap<String, Analysis> = BTreeMap::new();
65
66    for finding in &mut report.vulnerabilities {
67        if finding.ecosystem != Ecosystem::Npm {
68            continue;
69        }
70        let Some(verdict) = best_verdict(finding, config, opts, &mut cache) else {
71            continue;
72        };
73        finding.reachable = match &verdict {
74            ReachVerdict::Reachable { .. } => Some(true),
75            ReachVerdict::NotReachable => Some(false),
76            ReachVerdict::Unknown { .. } => None,
77        };
78        finding.reachability = Some(Reachability {
79            verdict,
80            config: "import-graph".to_string(),
81            engine: "fleetreach-npm-imports".to_string(),
82            targets: Vec::new(),
83            witness: None,
84        });
85    }
86}
87
88/// The best (most reachable) verdict for a finding across its repos: `Reachable` wins, then
89/// `Unknown`, then `NotReachable` (a finding is only pruned if unreached in *every* repo).
90fn best_verdict(
91    finding: &VulnFinding,
92    config: &Config,
93    opts: &Options,
94    cache: &mut BTreeMap<String, Analysis>,
95) -> Option<ReachVerdict> {
96    let mut best: Option<Reach> = None;
97    for occ in &finding.occurrences {
98        let Occurrence::InRepo { repo, package, .. } = occ else {
99            continue;
100        };
101        let Some(repo_cfg) = config.repos.iter().find(|r| r.id.0 == repo.0) else {
102            continue;
103        };
104        let analysis = cache
105            .entry(repo.0.clone())
106            .or_insert_with(|| analyze(&repo_cfg.path));
107        best = Some(merge(best.take(), analysis.reach(package, opts)));
108    }
109    best.map(|reach| match reach {
110        Reach::Reachable { witness } => ReachVerdict::Reachable { witness },
111        Reach::NotReachable => ReachVerdict::NotReachable,
112        Reach::Unknown => ReachVerdict::Unknown {
113            reason: "import-graph: package not reached from first-party source".into(),
114        },
115    })
116}
117
118/// `Reachable` dominates, then `Unknown`, then `NotReachable`.
119fn merge(a: Option<Reach>, b: Reach) -> Reach {
120    match (a, b) {
121        (Some(Reach::Reachable { witness }), _) | (_, Reach::Reachable { witness }) => {
122            Reach::Reachable { witness }
123        }
124        (Some(Reach::Unknown), _) | (_, Reach::Unknown) => Reach::Unknown,
125        (Some(Reach::NotReachable), Reach::NotReachable) | (None, Reach::NotReachable) => {
126            Reach::NotReachable
127        }
128    }
129}
130
131/// One repo's import graph: a [`DepGraph`] rooted at the synthetic [`ENTRY`] node (its edges are
132/// the first-party imports; the rest are the `node_modules` package→package edges), plus whether
133/// `node_modules` was present (required to assert `NotReachable`).
134struct Analysis {
135    graph: DepGraph,
136    has_node_modules: bool,
137}
138
139impl Analysis {
140    /// The reachability of `package`: a non-empty `chain_to` (via the shared BFS) is `Reachable`
141    /// with the witness import-chain (the chain minus the synthetic entry root); otherwise
142    /// `NotReachable` under prune + `node_modules`, else `Unknown`.
143    fn reach(&self, package: &str, opts: &Options) -> Reach {
144        let chain = self.graph.chain_to(package);
145        if !chain.is_empty() {
146            Reach::Reachable {
147                witness: chain.into_iter().skip(1).collect(),
148            }
149        } else if opts.prune && self.has_node_modules {
150            Reach::NotReachable
151        } else {
152            Reach::Unknown
153        }
154    }
155}
156
157/// Build one repo's import graph: the synthetic [`ENTRY`] root → first-party imports, plus each
158/// installed package → the packages it imports (from `node_modules` source).
159fn analyze(repo_dir: &Path) -> Analysis {
160    let mut graph = DepGraph::new(ENTRY);
161    // Over-approximated entry set = every package the repo's own source imports.
162    graph.add_edges(ENTRY, first_party_imports(repo_dir));
163
164    let node_modules = repo_dir.join("node_modules");
165    let has_node_modules = node_modules.is_dir();
166    if has_node_modules {
167        for (pkg, deps) in package_graph(&node_modules) {
168            graph.add_edges(&pkg, deps);
169        }
170    }
171    Analysis {
172        graph,
173        has_node_modules,
174    }
175}
176
177/// The set of package names directly imported by the repo's own source (node_modules excluded).
178fn first_party_imports(repo_dir: &Path) -> BTreeSet<String> {
179    let mut set = BTreeSet::new();
180    for entry in WalkDir::new(repo_dir)
181        .into_iter()
182        .filter_entry(|e| e.file_name() != "node_modules" && e.file_name() != ".git")
183        .filter_map(Result::ok)
184        .filter(|e| e.file_type().is_file())
185    {
186        if has_ext(entry.path(), SRC_EXTS) {
187            if let Ok(text) = std::fs::read_to_string(entry.path()) {
188                for spec in import_packages(&text) {
189                    set.insert(spec);
190                }
191            }
192        }
193    }
194    set
195}
196
197/// Build the name-level package→package import graph by scanning each installed package's source.
198fn package_graph(node_modules: &Path) -> BTreeMap<String, BTreeSet<String>> {
199    let mut graph: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
200    for pkg_dir in package_dirs(node_modules) {
201        let Some(name) = package_name_of(node_modules, &pkg_dir) else {
202            continue;
203        };
204        let mut deps = BTreeSet::new();
205        for entry in WalkDir::new(&pkg_dir)
206            .into_iter()
207            .filter_entry(|e| e.file_name() != "node_modules") // its own nested deps handled as their own nodes
208            .filter_map(Result::ok)
209            .filter(|e| e.file_type().is_file())
210        {
211            if has_ext(entry.path(), DEP_EXTS) {
212                if let Ok(text) = std::fs::read_to_string(entry.path()) {
213                    for spec in import_packages(&text) {
214                        if spec != name {
215                            deps.insert(spec);
216                        }
217                    }
218                }
219            }
220        }
221        graph.entry(name).or_default().extend(deps);
222    }
223    graph
224}
225
226/// The top-level package directories under `node_modules` (`foo`, `@scope/bar`), skipping
227/// `.bin` and dotfiles.
228fn package_dirs(node_modules: &Path) -> Vec<std::path::PathBuf> {
229    let mut dirs = Vec::new();
230    let Ok(entries) = std::fs::read_dir(node_modules) else {
231        return dirs;
232    };
233    for entry in entries.flatten() {
234        let name = entry.file_name();
235        let name = name.to_string_lossy();
236        if name.starts_with('.') {
237            continue;
238        }
239        let path = entry.path();
240        if !path.is_dir() {
241            continue;
242        }
243        if let Some(scope) = name.strip_prefix('@') {
244            let _ = scope;
245            // scoped: each subdirectory is a package
246            if let Ok(inner) = std::fs::read_dir(&path) {
247                for sub in inner.flatten() {
248                    if sub.path().is_dir() {
249                        dirs.push(sub.path());
250                    }
251                }
252            }
253        } else {
254            dirs.push(path);
255        }
256    }
257    dirs
258}
259
260/// The package name for a directory under `node_modules` (`@scope/name` for scoped).
261fn package_name_of(node_modules: &Path, pkg_dir: &Path) -> Option<String> {
262    let rel = pkg_dir.strip_prefix(node_modules).ok()?;
263    let s = rel.to_string_lossy().replace('\\', "/");
264    if s.is_empty() {
265        None
266    } else {
267        Some(s)
268    }
269}
270
271/// Extract the **bare** package names imported by a JS/TS source text: the module specifier of
272/// every `require('x')`, `import … from 'x'`, `import('x')`, `export … from 'x'`, reduced to its
273/// package name (`lodash/fp` → `lodash`, `@scope/p/sub` → `@scope/p`). Relative/absolute
274/// specifiers are skipped. Loose by design (a spurious match only adds a graph edge, which over-
275/// approximates reachability — safe).
276fn import_packages(text: &str) -> BTreeSet<String> {
277    let mut out = BTreeSet::new();
278    let bytes = text.as_bytes();
279    for (kw, off) in keyword_hits(text) {
280        // Find the next single- or double-quoted string after the keyword.
281        if let Some(spec) = quoted_after(bytes, off + kw) {
282            if let Some(pkg) = bare_package(&spec) {
283                out.insert(pkg);
284            }
285        }
286    }
287    out
288}
289
290/// Offsets just past each import-introducing keyword occurrence (`require(`, `from `, `import(`).
291fn keyword_hits(text: &str) -> Vec<(usize, usize)> {
292    let mut hits = Vec::new();
293    for kw in ["require(", "from ", "import(", "from\t"] {
294        let mut from = 0;
295        while let Some(i) = text[from..].find(kw) {
296            let at = from + i;
297            hits.push((kw.len(), at));
298            from = at + kw.len();
299        }
300    }
301    hits
302}
303
304/// The contents of the first `'…'`/`"…"` string starting within a few bytes of `start`
305/// (skipping whitespace/`(`), or `None`.
306fn quoted_after(bytes: &[u8], start: usize) -> Option<String> {
307    let mut i = start;
308    while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t' || bytes[i] == b'(') {
309        i += 1;
310    }
311    if i >= bytes.len() {
312        return None;
313    }
314    let quote = bytes[i];
315    if quote != b'\'' && quote != b'"' {
316        return None;
317    }
318    let mut j = i + 1;
319    while j < bytes.len() && bytes[j] != quote {
320        j += 1;
321    }
322    if j >= bytes.len() {
323        return None;
324    }
325    std::str::from_utf8(&bytes[i + 1..j])
326        .ok()
327        .map(str::to_string)
328}
329
330/// The package name of a module specifier, or `None` for a relative/absolute path.
331fn bare_package(spec: &str) -> Option<String> {
332    if spec.is_empty() || spec.starts_with('.') || spec.starts_with('/') {
333        return None;
334    }
335    if let Some(scoped) = spec.strip_prefix('@') {
336        let mut parts = scoped.splitn(3, '/');
337        let scope = parts.next()?;
338        let name = parts.next()?;
339        if scope.is_empty() || name.is_empty() {
340            return None;
341        }
342        Some(format!("@{scope}/{name}"))
343    } else {
344        spec.split('/')
345            .next()
346            .filter(|s| !s.is_empty())
347            .map(str::to_string)
348    }
349}
350
351fn has_ext(path: &Path, exts: &[&str]) -> bool {
352    path.extension()
353        .and_then(|x| x.to_str())
354        .is_some_and(|x| exts.contains(&x))
355}
356
357#[cfg(test)]
358mod tests {
359    #![allow(clippy::unwrap_used)]
360    use super::*;
361
362    #[test]
363    fn import_packages_extracts_bare_specifiers() {
364        let src = r#"
365            const _ = require('lodash');
366            import x from "react";
367            import { a } from 'lodash/fp';
368            const d = await import('@scope/pkg/sub');
369            export { y } from './local';      // relative, skipped
370            const e = require('./util');      // relative, skipped
371        "#;
372        let pkgs = import_packages(src);
373        assert!(pkgs.contains("lodash"));
374        assert!(pkgs.contains("react"));
375        assert!(pkgs.contains("@scope/pkg"));
376        assert!(!pkgs
377            .iter()
378            .any(|p| p.contains("local") || p.contains("util")));
379    }
380
381    #[test]
382    fn bare_package_reduces_subpaths_and_scopes() {
383        assert_eq!(bare_package("lodash"), Some("lodash".into()));
384        assert_eq!(bare_package("lodash/fp"), Some("lodash".into()));
385        assert_eq!(bare_package("@scope/pkg"), Some("@scope/pkg".into()));
386        assert_eq!(bare_package("@scope/pkg/sub"), Some("@scope/pkg".into()));
387        assert_eq!(bare_package("./rel"), None);
388        assert_eq!(bare_package("/abs"), None);
389    }
390
391    #[test]
392    fn analysis_reach_drops_synthetic_root_from_witness() {
393        // entry -> express -> body-parser -> qs ; entry -> express (direct).
394        let mut graph = DepGraph::new(ENTRY);
395        graph.add_edges(ENTRY, ["express".to_string()]);
396        graph.add_edges("express", ["body-parser".to_string()]);
397        graph.add_edges("body-parser", ["qs".to_string()]);
398        let a = Analysis {
399            graph,
400            has_node_modules: true,
401        };
402        let opts = Options { prune: true };
403        assert_eq!(
404            a.reach("qs", &opts),
405            Reach::Reachable {
406                witness: vec!["express".into(), "body-parser".into(), "qs".into()]
407            }
408        );
409        assert_eq!(
410            a.reach("express", &opts),
411            Reach::Reachable {
412                witness: vec!["express".into()]
413            }
414        );
415        // unreached + prune + node_modules -> NotReachable
416        assert_eq!(a.reach("lodash", &opts), Reach::NotReachable);
417        // unreached without prune -> Unknown
418        assert_eq!(a.reach("lodash", &Options { prune: false }), Reach::Unknown);
419    }
420
421    #[test]
422    fn merge_prefers_reachable_then_unknown() {
423        let r = || Reach::Reachable {
424            witness: vec!["a".into()],
425        };
426        assert!(matches!(
427            merge(Some(Reach::NotReachable), r()),
428            Reach::Reachable { .. }
429        ));
430        assert!(matches!(
431            merge(Some(r()), Reach::NotReachable),
432            Reach::Reachable { .. }
433        ));
434        assert!(matches!(
435            merge(Some(Reach::NotReachable), Reach::Unknown),
436            Reach::Unknown
437        ));
438        assert!(matches!(
439            merge(Some(Reach::NotReachable), Reach::NotReachable),
440            Reach::NotReachable
441        ));
442    }
443}