Skip to main content

fleetreach_cli/
orchestrate.rs

1//! The multi-repo scan loop (§10, step 4).
2//!
3//! Repos are scanned **serially** (no async in v1). Each repo degrades
4//! independently: a missing or unreadable lockfile becomes an `Errored`
5//! [`RepoOutcome`] and the run continues — but that gap is what later forces a
6//! non-clean exit (§8), since we cannot claim a repo is clean without reading it.
7//!
8//! Output here is **pre-correlation**: every finding carries a single
9//! occurrence. Grouping across the fleet happens in `correlate` (M4).
10
11use std::collections::BTreeSet;
12use std::path::{Path, PathBuf};
13
14use fleetreach_core::semver::Version;
15use fleetreach_core::{
16    Ecosystem, Occurrence, RepoId, RepoOutcome, ScanStatus, VulnFinding, WarnFinding,
17};
18use fleetreach_ghactions::{ghactions_db_path, GhActionsDb, GhaError};
19use fleetreach_go::{GoDb, GoError, SandboxPolicy};
20use fleetreach_hex::{hex_db_path, HexDb, HexError};
21use fleetreach_julia::{julia_db_path, JuliaDb, JuliaError};
22use fleetreach_maven::{maven_db_path, MavenDb, MavenError};
23use fleetreach_npm::{npm_db_path, NpmDb, NpmError};
24use fleetreach_nuget::{nuget_db_path, NuGetDb, NuGetError};
25use fleetreach_packagist::{packagist_db_path, PackagistDb, PackagistError};
26use fleetreach_pypi::{pypi_db_path, PyPiDb, PyPiError};
27use fleetreach_rubygems::{rubygems_db_path, RubyGemsDb, RubyGemsError};
28use fleetreach_scan::{scan_lockfile, scan_toolchain, AdvisoryDb, RepoScan};
29use fleetreach_swift::{swift_db_path, SwiftDb, SwiftError};
30use rayon::prelude::*;
31use walkdir::WalkDir;
32
33use crate::config::{Config, Repo};
34use crate::resolve;
35
36/// The aggregated, pre-correlation result of scanning a fleet.
37#[derive(Debug, Default, Clone)]
38pub struct ScanData {
39    pub vulnerabilities: Vec<VulnFinding>,
40    pub warnings: Vec<WarnFinding>,
41    pub outcomes: Vec<RepoOutcome>,
42    /// Total installed packages skipped across all toolchain-free repos because their
43    /// version string did not parse (see [`fleetreach_core::osv::TierCScan`]). Surfaced as a
44    /// diagnostic so the skip is visible; never an error.
45    pub skipped_unparseable: u32,
46}
47
48/// An installed toolchain to additionally scan against `Collection::Rust`.
49#[derive(Debug, Clone)]
50pub struct Toolchain {
51    pub channel: String,
52    pub version: Version,
53}
54
55/// Everything the Go scan path needs, bundled so it threads through the fleet walk as
56/// one argument. `govulncheck` is `None` when consent/binary are absent, which routes
57/// the repo to the toolchain-free Tier-C matcher (or an errored gap); the rest mirror
58/// [`fleetreach_go::GoScanOptions`].
59#[derive(Debug, Clone, Copy)]
60pub struct GoScan<'a> {
61    pub govulncheck: Option<&'a Path>,
62    pub sandbox: SandboxPolicy,
63    pub vuln_db: Option<&'a str>,
64    pub offline: bool,
65}
66
67/// Everything the npm scan path needs. npm is **toolchain-free only** (the Tier-C
68/// matcher reads `package-lock.json` and an OSV mirror, building nothing), so unlike
69/// [`GoScan`] there is no binary, sandbox, or online mode — just the `file://<dir>`
70/// OSV mirror, absent which an npm repo is an honest gap.
71#[derive(Debug, Clone, Copy)]
72pub struct NpmScan<'a> {
73    pub vuln_db: Option<&'a str>,
74}
75
76/// Everything the PyPI scan path needs. Like npm it is **toolchain-free only** (the
77/// Tier-C matcher reads a Python lockfile and an OSV mirror, building nothing), so the
78/// only input is the `file://` OSV mirror, absent which a PyPI repo is an honest gap.
79#[derive(Debug, Clone, Copy)]
80pub struct PyPiScan<'a> {
81    pub vuln_db: Option<&'a str>,
82}
83
84/// Everything the RubyGems scan path needs. Like npm/PyPI it is **toolchain-free only**
85/// (the Tier-C matcher reads `Gemfile.lock` and an OSV mirror, building nothing), so the
86/// only input is the `file://` OSV mirror, absent which a Ruby repo is an honest gap.
87#[derive(Debug, Clone, Copy)]
88pub struct RubyGemsScan<'a> {
89    pub vuln_db: Option<&'a str>,
90}
91
92/// Everything the Packagist scan path needs. Like npm/PyPI/RubyGems it is **toolchain-free
93/// only** (the Tier-C matcher reads `composer.lock` and an OSV mirror, building nothing), so
94/// the only input is the `file://` OSV mirror, absent which a PHP repo is an honest gap.
95#[derive(Debug, Clone, Copy)]
96pub struct PackagistScan<'a> {
97    pub vuln_db: Option<&'a str>,
98}
99
100/// Everything the NuGet scan path needs. Like the other Tier-C feeders it is **toolchain-free
101/// only** (the matcher reads `packages.lock.json` and an OSV mirror, building nothing), so the
102/// only input is the `file://` OSV mirror, absent which a .NET repo is an honest gap.
103#[derive(Debug, Clone, Copy)]
104pub struct NuGetScan<'a> {
105    pub vuln_db: Option<&'a str>,
106}
107
108/// Everything the Julia scan path needs. Like the other Tier-C feeders it is **toolchain-free
109/// only** (the matcher reads `Manifest.toml` and an OSV mirror, building nothing), so the only
110/// input is the `file://` OSV mirror, absent which a Julia repo is an honest gap.
111#[derive(Debug, Clone, Copy)]
112pub struct JuliaScan<'a> {
113    pub vuln_db: Option<&'a str>,
114}
115
116/// Everything the Swift scan path needs. Like the other Tier-C feeders it is **toolchain-free
117/// only** (the matcher reads `Package.resolved` and an OSV mirror, building nothing), so the
118/// only input is the `file://` OSV mirror, absent which a Swift repo is an honest gap.
119#[derive(Debug, Clone, Copy)]
120pub struct SwiftScan<'a> {
121    pub vuln_db: Option<&'a str>,
122}
123
124/// Everything the Hex scan path needs. Like the other Tier-C feeders it is **toolchain-free
125/// only** (the matcher reads `mix.lock` and an OSV mirror, building nothing), so the only input
126/// is the `file://` OSV mirror, absent which an Elixir repo is an honest gap.
127#[derive(Debug, Clone, Copy)]
128pub struct HexScan<'a> {
129    pub vuln_db: Option<&'a str>,
130}
131
132/// Everything the GitHub Actions scan path needs. Like the other Tier-C feeders it is
133/// **toolchain-free only** (the matcher reads `.github/workflows/*.yml` and an OSV mirror,
134/// building nothing), so the only input is the `file://` OSV mirror, absent which a workflow
135/// repo is an honest gap.
136#[derive(Debug, Clone, Copy)]
137pub struct GhActionsScan<'a> {
138    pub vuln_db: Option<&'a str>,
139}
140
141/// Everything the Maven scan path needs. Like the other Tier-C feeders it is **toolchain-free
142/// only** (the matcher reads `gradle.lockfile`/`pom.xml` and an OSV mirror, building nothing),
143/// so the only input is the `file://` OSV mirror, absent which a Java repo is an honest gap.
144#[derive(Debug, Clone, Copy)]
145pub struct MavenScan<'a> {
146    pub vuln_db: Option<&'a str>,
147}
148
149// Each ecosystem threads its own scan config / once-loaded DB through the fleet walk as an
150// independent, clearly-named argument; bundling them into a catch-all struct would obscure
151// more than it simplifies (they are configured and loaded separately). The count grows by
152// one per ecosystem, so the lint is allowed here rather than chased.
153#[allow(clippy::too_many_arguments)]
154/// Scan every repo in `config`, plus the toolchain if provided. When
155/// `host_triple` is `Some`, each finding is additionally annotated (via
156/// `cargo tree`) with whether the package is actually built — see
157/// [`crate::resolve`].
158pub fn scan_fleet(
159    db: &AdvisoryDb,
160    config: &Config,
161    toolchain: Option<&Toolchain>,
162    host_triple: Option<&str>,
163    go: &GoScan,
164    npm: &NpmScan,
165    pypi: &PyPiScan,
166    rubygems: &RubyGemsScan,
167    packagist: &PackagistScan,
168    nuget: &NuGetScan,
169    julia: &JuliaScan,
170    swift: &SwiftScan,
171    hex: &HexScan,
172    ghactions: &GhActionsScan,
173    maven: &MavenScan,
174) -> ScanData {
175    let mut data = ScanData::default();
176
177    // The npm OSV DB has no prebuilt index, so it is loaded ONCE here (not per repo)
178    // and shared read-only across the parallel walk. Only loaded when the fleet
179    // actually has an npm repo and a `file://` mirror was given; a load failure is
180    // carried as the `Err` so every npm repo degrades to an honest gap with the reason.
181    let npm_db: Option<Result<NpmDb, NpmError>> = if config
182        .repos
183        .iter()
184        .any(|r| effective_ecosystem(r) == Ecosystem::Npm)
185    {
186        npm.vuln_db
187            .and_then(npm_db_path)
188            .map(|root| NpmDb::load(&root))
189    } else {
190        None
191    };
192
193    // The Go Tier-C mirror is likewise loaded ONCE (the 434 KB module index + the
194    // advisories it references) and shared, instead of being re-read and re-parsed per
195    // repo — profiling showed that re-parse dominated a large Go fleet. Only when Tier-C
196    // will actually run: no govulncheck (toolchain path), a `file://` mirror, a Go repo.
197    let go_db: Option<Result<GoDb, GoError>> = if go.govulncheck.is_none()
198        && config
199            .repos
200            .iter()
201            .any(|r| effective_ecosystem(r) == Ecosystem::Go)
202    {
203        go.vuln_db
204            .and_then(fleetreach_go::offline_db_path)
205            .map(|root| GoDb::load(&root))
206    } else {
207        None
208    };
209
210    // The PyPI OSV DB, like npm, has no prebuilt index, so it is loaded ONCE and shared
211    // read-only across the walk. Only when the fleet has a PyPI repo and a `file://`
212    // mirror was given; a load failure is carried as the `Err` so every PyPI repo
213    // degrades to an honest gap with the reason.
214    let pypi_db: Option<Result<PyPiDb, PyPiError>> = if config
215        .repos
216        .iter()
217        .any(|r| effective_ecosystem(r) == Ecosystem::Pypi)
218    {
219        pypi.vuln_db
220            .and_then(pypi_db_path)
221            .map(|root| PyPiDb::load(&root))
222    } else {
223        None
224    };
225
226    // The RubyGems OSV DB, like npm/PyPI, has no prebuilt index, so it is loaded ONCE and
227    // shared read-only across the walk. Only when the fleet has a RubyGems repo and a
228    // `file://` mirror was given; a load failure is carried as the `Err` so every RubyGems
229    // repo degrades to an honest gap with the reason.
230    let rubygems_db: Option<Result<RubyGemsDb, RubyGemsError>> = if config
231        .repos
232        .iter()
233        .any(|r| effective_ecosystem(r) == Ecosystem::RubyGems)
234    {
235        rubygems
236            .vuln_db
237            .and_then(rubygems_db_path)
238            .map(|root| RubyGemsDb::load(&root))
239    } else {
240        None
241    };
242
243    // The Packagist OSV DB, like npm/PyPI/RubyGems, has no prebuilt index, so it is loaded
244    // ONCE and shared read-only across the walk. Only when the fleet has a Packagist repo and
245    // a `file://` mirror was given; a load failure is carried as the `Err` so every Packagist
246    // repo degrades to an honest gap with the reason.
247    let packagist_db: Option<Result<PackagistDb, PackagistError>> = if config
248        .repos
249        .iter()
250        .any(|r| effective_ecosystem(r) == Ecosystem::Packagist)
251    {
252        packagist
253            .vuln_db
254            .and_then(packagist_db_path)
255            .map(|root| PackagistDb::load(&root))
256    } else {
257        None
258    };
259
260    // The NuGet OSV DB, like the other Tier-C feeders, has no prebuilt index, so it is loaded
261    // ONCE and shared read-only across the walk. Only when the fleet has a NuGet repo and a
262    // `file://` mirror was given; a load failure is carried as the `Err` so every NuGet repo
263    // degrades to an honest gap with the reason.
264    let nuget_db: Option<Result<NuGetDb, NuGetError>> = if config
265        .repos
266        .iter()
267        .any(|r| effective_ecosystem(r) == Ecosystem::NuGet)
268    {
269        nuget
270            .vuln_db
271            .and_then(nuget_db_path)
272            .map(|root| NuGetDb::load(&root))
273    } else {
274        None
275    };
276
277    // The Julia OSV DB, like the other Tier-C feeders, has no prebuilt index, so it is loaded
278    // ONCE and shared read-only across the walk. Only when the fleet has a Julia repo and a
279    // `file://` mirror was given; a load failure is carried as the `Err` so every Julia repo
280    // degrades to an honest gap with the reason.
281    let julia_db: Option<Result<JuliaDb, JuliaError>> = if config
282        .repos
283        .iter()
284        .any(|r| effective_ecosystem(r) == Ecosystem::Julia)
285    {
286        julia
287            .vuln_db
288            .and_then(julia_db_path)
289            .map(|root| JuliaDb::load(&root))
290    } else {
291        None
292    };
293
294    // The Swift OSV DB, like the other Tier-C feeders, has no prebuilt index, so it is loaded
295    // ONCE and shared read-only across the walk. Only when the fleet has a Swift repo and a
296    // `file://` mirror was given; a load failure is carried as the `Err` so every Swift repo
297    // degrades to an honest gap with the reason.
298    let swift_db: Option<Result<SwiftDb, SwiftError>> = if config
299        .repos
300        .iter()
301        .any(|r| effective_ecosystem(r) == Ecosystem::Swift)
302    {
303        swift
304            .vuln_db
305            .and_then(swift_db_path)
306            .map(|root| SwiftDb::load(&root))
307    } else {
308        None
309    };
310
311    // The Hex OSV DB, like the other Tier-C feeders, has no prebuilt index, so it is loaded
312    // ONCE and shared read-only across the walk. Only when the fleet has a Hex repo and a
313    // `file://` mirror was given; a load failure is carried as the `Err` so every Hex repo
314    // degrades to an honest gap with the reason.
315    let hex_db: Option<Result<HexDb, HexError>> = if config
316        .repos
317        .iter()
318        .any(|r| effective_ecosystem(r) == Ecosystem::Hex)
319    {
320        hex.vuln_db
321            .and_then(hex_db_path)
322            .map(|root| HexDb::load(&root))
323    } else {
324        None
325    };
326
327    // The GitHub Actions OSV DB, like the other Tier-C feeders, has no prebuilt index, so it is
328    // loaded ONCE and shared read-only across the walk. Only when the fleet has a workflow repo
329    // and a `file://` mirror was given; a load failure is carried as the `Err` so every such
330    // repo degrades to an honest gap with the reason.
331    let ghactions_db: Option<Result<GhActionsDb, GhaError>> = if config
332        .repos
333        .iter()
334        .any(|r| effective_ecosystem(r) == Ecosystem::GitHubActions)
335    {
336        ghactions
337            .vuln_db
338            .and_then(ghactions_db_path)
339            .map(|root| GhActionsDb::load(&root))
340    } else {
341        None
342    };
343
344    // The Maven OSV DB, like the other Tier-C feeders, has no prebuilt index, so it is loaded
345    // ONCE and shared read-only across the walk. Only when the fleet has a Java repo and a
346    // `file://` mirror was given; a load failure is carried as the `Err` so every Maven repo
347    // degrades to an honest gap with the reason.
348    let maven_db: Option<Result<MavenDb, MavenError>> = if config
349        .repos
350        .iter()
351        .any(|r| effective_ecosystem(r) == Ecosystem::Maven)
352    {
353        maven
354            .vuln_db
355            .and_then(maven_db_path)
356            .map(|root| MavenDb::load(&root))
357    } else {
358        None
359    };
360
361    // Each repo is independent and shares only the read-only advisory DBs, so the
362    // per-repo scan (dominated by lockfile parsing) fans out across cores. Results
363    // are collected in config order and merged serially, so the assembled output
364    // is byte-identical to a serial scan regardless of completion order.
365    let per_repo: Vec<RepoResult> = config
366        .repos
367        .par_iter()
368        .map(|repo| {
369            scan_one_repo(
370                db,
371                repo,
372                host_triple,
373                go,
374                npm_db.as_ref(),
375                go_db.as_ref(),
376                pypi_db.as_ref(),
377                rubygems_db.as_ref(),
378                packagist_db.as_ref(),
379                nuget_db.as_ref(),
380                julia_db.as_ref(),
381                swift_db.as_ref(),
382                hex_db.as_ref(),
383                ghactions_db.as_ref(),
384                maven_db.as_ref(),
385            )
386        })
387        .collect();
388
389    for result in per_repo {
390        data.vulnerabilities.extend(result.vulnerabilities);
391        data.warnings.extend(result.warnings);
392        data.outcomes.push(result.outcome);
393        data.skipped_unparseable += result.skipped_unparseable;
394    }
395
396    // Toolchain advisories are global — no repo, scanned once.
397    if let Some(tc) = toolchain {
398        let ts = scan_toolchain(db, &tc.channel, &tc.version);
399        data.vulnerabilities.extend(ts.vulnerabilities);
400        data.warnings.extend(ts.warnings);
401    }
402
403    data
404}
405
406/// The findings and outcome for a single repo, returned by the parallel map so
407/// the caller can merge them in deterministic (config) order.
408struct RepoResult {
409    vulnerabilities: Vec<VulnFinding>,
410    warnings: Vec<WarnFinding>,
411    outcome: RepoOutcome,
412    /// Installed packages skipped because their version did not parse (Tier-C only; 0
413    /// otherwise). Summed across the fleet and surfaced as a diagnostic.
414    skipped_unparseable: u32,
415}
416
417impl RepoResult {
418    /// A repo that scanned cleanly to these findings.
419    fn scanned(
420        repo: &RepoId,
421        vulnerabilities: Vec<VulnFinding>,
422        warnings: Vec<WarnFinding>,
423    ) -> Self {
424        let outcome = RepoOutcome {
425            repo: repo.clone(),
426            status: ScanStatus::Scanned {
427                vulns: vulnerabilities.len(),
428                warnings: warnings.len(),
429            },
430        };
431        RepoResult {
432            vulnerabilities,
433            warnings,
434            outcome,
435            skipped_unparseable: 0,
436        }
437    }
438
439    /// Record the count of packages skipped for an unparseable version (Tier-C feeders).
440    fn with_skipped(mut self, skipped_unparseable: u32) -> Self {
441        self.skipped_unparseable = skipped_unparseable;
442        self
443    }
444
445    /// A repo that could not be fully scanned — an honest gap, never reported clean.
446    fn errored(repo: &RepoId, reason: String) -> Self {
447        RepoResult {
448            vulnerabilities: Vec::new(),
449            warnings: Vec::new(),
450            outcome: RepoOutcome {
451                repo: repo.clone(),
452                status: ScanStatus::Errored { reason },
453            },
454            skipped_unparseable: 0,
455        }
456    }
457}
458
459/// The ecosystem a repo is scanned as: an explicit `fleet.toml` override, else
460/// auto-detected from its manifests. Rust-first — a `Cargo.lock` wins, so only a
461/// `go.mod`-without-`Cargo.lock` repo auto-detects as Go.
462fn effective_ecosystem(repo: &Repo) -> Ecosystem {
463    if let Some(eco) = repo.ecosystem {
464        return eco;
465    }
466    if repo.path.join("Cargo.lock").is_file() {
467        return Ecosystem::Cargo;
468    }
469    if repo.path.join("go.mod").is_file() {
470        return Ecosystem::Go;
471    }
472    if repo.path.join("package-lock.json").is_file() {
473        return Ecosystem::Npm;
474    }
475    if fleetreach_pypi::detect(&repo.path).is_some() {
476        return Ecosystem::Pypi;
477    }
478    if repo.path.join("Gemfile.lock").is_file() {
479        return Ecosystem::RubyGems;
480    }
481    if repo.path.join("composer.lock").is_file() {
482        return Ecosystem::Packagist;
483    }
484    if repo.path.join("packages.lock.json").is_file() {
485        return Ecosystem::NuGet;
486    }
487    if repo.path.join("Manifest.toml").is_file() {
488        return Ecosystem::Julia;
489    }
490    if repo.path.join("Package.resolved").is_file() {
491        return Ecosystem::Swift;
492    }
493    if repo.path.join("mix.lock").is_file() {
494        return Ecosystem::Hex;
495    }
496    if repo.path.join("gradle.lockfile").is_file() || repo.path.join("pom.xml").is_file() {
497        return Ecosystem::Maven;
498    }
499    // GitHub Actions is checked LAST: a package repo with a `.github/workflows/` dir is scanned
500    // for its package ecosystem above; only a workflow-only repo (or an explicit
501    // `ecosystem = "githubactions"`) routes here.
502    if repo.path.join(".github").join("workflows").is_dir() {
503        return Ecosystem::GitHubActions;
504    }
505    Ecosystem::Cargo
506}
507
508/// Scan one repo, dispatching on its ecosystem. Pure (no shared mutable state) so
509/// it runs safely in parallel across repos; the only shared input is the
510/// read-only `db`.
511// One once-loaded OSV DB per Tier-C ecosystem is threaded in as its own argument; see the
512// note on `scan_fleet` for why the lint is allowed rather than bundled away.
513#[allow(clippy::too_many_arguments)]
514fn scan_one_repo(
515    db: &AdvisoryDb,
516    repo: &Repo,
517    host_triple: Option<&str>,
518    go: &GoScan,
519    npm_db: Option<&Result<NpmDb, NpmError>>,
520    go_db: Option<&Result<GoDb, GoError>>,
521    pypi_db: Option<&Result<PyPiDb, PyPiError>>,
522    rubygems_db: Option<&Result<RubyGemsDb, RubyGemsError>>,
523    packagist_db: Option<&Result<PackagistDb, PackagistError>>,
524    nuget_db: Option<&Result<NuGetDb, NuGetError>>,
525    julia_db: Option<&Result<JuliaDb, JuliaError>>,
526    swift_db: Option<&Result<SwiftDb, SwiftError>>,
527    hex_db: Option<&Result<HexDb, HexError>>,
528    ghactions_db: Option<&Result<GhActionsDb, GhaError>>,
529    maven_db: Option<&Result<MavenDb, MavenError>>,
530) -> RepoResult {
531    match effective_ecosystem(repo) {
532        Ecosystem::Go => return scan_go_repo(repo, go, go_db),
533        Ecosystem::Npm => return scan_npm_repo(repo, npm_db),
534        Ecosystem::Pypi => return scan_pypi_repo(repo, pypi_db),
535        Ecosystem::RubyGems => return scan_rubygems_repo(repo, rubygems_db),
536        Ecosystem::Packagist => return scan_packagist_repo(repo, packagist_db),
537        Ecosystem::NuGet => return scan_nuget_repo(repo, nuget_db),
538        Ecosystem::Julia => return scan_julia_repo(repo, julia_db),
539        Ecosystem::Swift => return scan_swift_repo(repo, swift_db),
540        Ecosystem::Hex => return scan_hex_repo(repo, hex_db),
541        Ecosystem::Maven => return scan_maven_repo(repo, maven_db),
542        Ecosystem::GitHubActions => return scan_ghactions_repo(repo, ghactions_db),
543        Ecosystem::Cargo => {}
544    }
545    let (lockfiles, walk_errors) = discover_lockfiles(repo);
546    let mut vulnerabilities: Vec<VulnFinding> = Vec::new();
547    let mut warnings: Vec<WarnFinding> = Vec::new();
548    let mut error: Option<String> = None;
549
550    // A directory under a glob root we could not read might hide a Cargo.lock, so the
551    // repo cannot be called clean (fail closed) even if other lockfiles were found.
552    if !walk_errors.is_empty() {
553        error.get_or_insert_with(|| {
554            format!(
555                "could not fully walk {}: {}",
556                repo.path.display(),
557                walk_errors.join("; ")
558            )
559        });
560    }
561    if lockfiles.is_empty() {
562        error.get_or_insert_with(|| format!("no Cargo.lock found under {}", repo.path.display()));
563    }
564
565    for lockfile in &lockfiles {
566        match scan_lockfile(db, &repo.id, lockfile) {
567            Ok(mut scan) => {
568                // Opt-in: mark each occurrence built/phantom for the host's
569                // default build. Best-effort — failures leave it unannotated.
570                if let (Some(host), Some(dir)) = (host_triple, lockfile.parent()) {
571                    if let Ok(built) = resolve::built_package_set(dir, host) {
572                        annotate_built(&mut scan, &built);
573                    }
574                }
575                vulnerabilities.extend(scan.vulnerabilities);
576                warnings.extend(scan.warnings);
577            }
578            // Any unreadable lockfile marks the whole repo as a gap; keep the
579            // findings we did get, but the repo can no longer be called clean.
580            Err(e) => {
581                error.get_or_insert_with(|| e.to_string());
582            }
583        }
584    }
585
586    match error {
587        // A gap keeps the findings gathered before it (still useful), but the repo can
588        // no longer be called clean — so build the Errored outcome around them directly
589        // rather than through `RepoResult::errored` (which is for the empty case).
590        Some(reason) => RepoResult {
591            outcome: RepoOutcome {
592                repo: repo.id.clone(),
593                status: ScanStatus::Errored { reason },
594            },
595            vulnerabilities,
596            warnings,
597            skipped_unparseable: 0,
598        },
599        None => RepoResult::scanned(&repo.id, vulnerabilities, warnings),
600    }
601}
602
603/// Scan a Go repo by running govulncheck as a sidecar. govulncheck **compiles**
604/// the module, so it is gated on the same untrusted-build consent as static
605/// reachability (when consent/binary are absent — `go_govulncheck` is `None` — the
606/// repo is an honest gap (`Errored`), never silently skipped) *and* confined under
607/// the same `--build-sandbox` policy.
608fn scan_go_repo(repo: &Repo, go: &GoScan, go_db: Option<&Result<GoDb, GoError>>) -> RepoResult {
609    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
610    let scanned = |vulnerabilities| RepoResult::scanned(&repo.id, vulnerabilities, Vec::new());
611
612    let Some(govulncheck) = go.govulncheck else {
613        // No govulncheck (no --allow-untrusted-builds, or no binary). Fall back to
614        // the Tier-C offline matcher against the once-loaded mirror (`go_db`): it reads
615        // go.mod and matches versions against the OSV DB, compiling nothing, so it needs
616        // no untrusted-build consent. Without a mirror it stays an honest gap.
617        return match go_db {
618            Some(Ok(db)) => match fleetreach_go::scan_offline(&repo.path, db, &repo.id) {
619                Ok(vulnerabilities) => scanned(vulnerabilities),
620                Err(e) => errored(format!("tier-c offline scan: {e}")),
621            },
622            Some(Err(e)) => errored(format!("tier-c offline DB: {e}")),
623            None => errored(
624                "Go repo (go.mod): no govulncheck available (needs --allow-untrusted-builds \
625                 and a govulncheck binary via --govulncheck <path> or PATH), and no offline \
626                 DB mirror for the toolchain-free Tier-C fallback (pass \
627                 --go-vuln-db=file://<mirror>)"
628                    .to_string(),
629            ),
630        };
631    };
632
633    let opts = fleetreach_go::GoScanOptions {
634        govulncheck,
635        sandbox: go.sandbox,
636        vuln_db: go.vuln_db,
637        offline: go.offline,
638    };
639    match fleetreach_go::scan_module(&repo.path, &repo.id, &opts) {
640        Ok(vulnerabilities) => scanned(vulnerabilities),
641        Err(e) => errored(format!("govulncheck: {e}")),
642    }
643}
644
645/// Scan an npm repo with the toolchain-free Tier-C matcher: read `package-lock.json`
646/// and match each package version against the preloaded OSV DB. Builds nothing, so it
647/// needs no consent and no sandbox. `npm_db` is the once-loaded DB: `None` means no
648/// `file://` mirror was given (an honest gap), `Some(Err)` a mirror that failed to
649/// load (the reason is surfaced, never a false-clean).
650fn scan_npm_repo(repo: &Repo, npm_db: Option<&Result<NpmDb, NpmError>>) -> RepoResult {
651    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
652
653    let db = match npm_db {
654        Some(Ok(db)) => db,
655        Some(Err(e)) => return errored(format!("npm OSV DB: {e}")),
656        None => {
657            return errored(
658                "npm repo (package-lock.json): no OSV DB mirror for the toolchain-free \
659                 matcher (pass --npm-vuln-db=file://<dir>, e.g. an unzipped osv.dev npm export)"
660                    .to_string(),
661            )
662        }
663    };
664    match fleetreach_npm::scan_offline(&repo.path, db, &repo.id) {
665        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
666            .with_skipped(scan.skipped_unparseable),
667        Err(e) => errored(format!("npm tier-c scan: {e}")),
668    }
669}
670
671/// Scan a PyPI repo with the toolchain-free Tier-C matcher: read the Python lockfile
672/// (`uv.lock`/`poetry.lock`/`Pipfile.lock`) and match each package version against the
673/// preloaded OSV DB. Builds nothing, so it needs no consent and no sandbox. `pypi_db` is
674/// the once-loaded DB: `None` means no `file://` mirror was given (an honest gap),
675/// `Some(Err)` a mirror that failed to load (the reason is surfaced, never a false-clean).
676fn scan_pypi_repo(repo: &Repo, pypi_db: Option<&Result<PyPiDb, PyPiError>>) -> RepoResult {
677    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
678
679    let db = match pypi_db {
680        Some(Ok(db)) => db,
681        Some(Err(e)) => return errored(format!("PyPI OSV DB: {e}")),
682        None => {
683            return errored(
684                "PyPI repo (uv.lock/poetry.lock/Pipfile.lock): no OSV DB mirror for the \
685                 toolchain-free matcher (pass --pypi-vuln-db=file://<path>, e.g. the osv.dev \
686                 PyPI export all.zip or an unzipped directory)"
687                    .to_string(),
688            )
689        }
690    };
691    match fleetreach_pypi::scan_offline(&repo.path, db, &repo.id) {
692        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
693            .with_skipped(scan.skipped_unparseable),
694        Err(e) => errored(format!("pypi tier-c scan: {e}")),
695    }
696}
697
698/// Scan a RubyGems repo with the toolchain-free Tier-C matcher: read `Gemfile.lock` and
699/// match each gem version against the preloaded OSV DB. Builds nothing, so it needs no
700/// consent and no sandbox. `rubygems_db` is the once-loaded DB: `None` means no `file://`
701/// mirror was given (an honest gap), `Some(Err)` a mirror that failed to load (the reason
702/// is surfaced, never a false-clean).
703fn scan_rubygems_repo(
704    repo: &Repo,
705    rubygems_db: Option<&Result<RubyGemsDb, RubyGemsError>>,
706) -> RepoResult {
707    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
708
709    let db = match rubygems_db {
710        Some(Ok(db)) => db,
711        Some(Err(e)) => return errored(format!("RubyGems OSV DB: {e}")),
712        None => {
713            return errored(
714                "RubyGems repo (Gemfile.lock): no OSV DB mirror for the toolchain-free \
715                 matcher (pass --rubygems-vuln-db=file://<path>, e.g. the osv.dev RubyGems \
716                 export all.zip or an unzipped directory)"
717                    .to_string(),
718            )
719        }
720    };
721    match fleetreach_rubygems::scan_offline(&repo.path, db, &repo.id) {
722        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
723            .with_skipped(scan.skipped_unparseable),
724        Err(e) => errored(format!("rubygems tier-c scan: {e}")),
725    }
726}
727
728/// Scan a Packagist (Composer/PHP) repo with the toolchain-free Tier-C matcher: read
729/// `composer.lock` and match each package version against the preloaded OSV DB. Builds
730/// nothing, so it needs no consent and no sandbox. `packagist_db` is the once-loaded DB:
731/// `None` means no `file://` mirror was given (an honest gap), `Some(Err)` a mirror that
732/// failed to load (the reason is surfaced, never a false-clean).
733fn scan_packagist_repo(
734    repo: &Repo,
735    packagist_db: Option<&Result<PackagistDb, PackagistError>>,
736) -> RepoResult {
737    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
738
739    let db = match packagist_db {
740        Some(Ok(db)) => db,
741        Some(Err(e)) => return errored(format!("Packagist OSV DB: {e}")),
742        None => {
743            return errored(
744                "Packagist repo (composer.lock): no OSV DB mirror for the toolchain-free \
745                 matcher (pass --packagist-vuln-db=file://<path>, e.g. the osv.dev Packagist \
746                 export all.zip or an unzipped directory)"
747                    .to_string(),
748            )
749        }
750    };
751    match fleetreach_packagist::scan_offline(&repo.path, db, &repo.id) {
752        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
753            .with_skipped(scan.skipped_unparseable),
754        Err(e) => errored(format!("packagist tier-c scan: {e}")),
755    }
756}
757
758/// Scan a NuGet (.NET) repo with the toolchain-free Tier-C matcher: read
759/// `packages.lock.json` and match each package version against the preloaded OSV DB. Builds
760/// nothing, so it needs no consent and no sandbox. `nuget_db` is the once-loaded DB: `None`
761/// means no `file://` mirror was given (an honest gap), `Some(Err)` a mirror that failed to
762/// load (the reason is surfaced, never a false-clean).
763fn scan_nuget_repo(repo: &Repo, nuget_db: Option<&Result<NuGetDb, NuGetError>>) -> RepoResult {
764    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
765
766    let db = match nuget_db {
767        Some(Ok(db)) => db,
768        Some(Err(e)) => return errored(format!("NuGet OSV DB: {e}")),
769        None => {
770            return errored(
771                "NuGet repo (packages.lock.json): no OSV DB mirror for the toolchain-free \
772                 matcher (pass --nuget-vuln-db=file://<path>, e.g. the osv.dev NuGet export \
773                 all.zip or an unzipped directory)"
774                    .to_string(),
775            )
776        }
777    };
778    match fleetreach_nuget::scan_offline(&repo.path, db, &repo.id) {
779        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
780            .with_skipped(scan.skipped_unparseable),
781        Err(e) => errored(format!("nuget tier-c scan: {e}")),
782    }
783}
784
785/// Scan a Julia repo with the toolchain-free Tier-C matcher: read `Manifest.toml` and match
786/// each package version against the preloaded OSV DB. Builds nothing, so it needs no consent
787/// and no sandbox. `julia_db` is the once-loaded DB: `None` means no `file://` mirror was
788/// given (an honest gap), `Some(Err)` a mirror that failed to load (the reason is surfaced).
789fn scan_julia_repo(repo: &Repo, julia_db: Option<&Result<JuliaDb, JuliaError>>) -> RepoResult {
790    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
791
792    let db = match julia_db {
793        Some(Ok(db)) => db,
794        Some(Err(e)) => return errored(format!("Julia OSV DB: {e}")),
795        None => {
796            return errored(
797                "Julia repo (Manifest.toml): no OSV DB mirror for the toolchain-free matcher \
798                 (pass --julia-vuln-db=file://<path>, e.g. the osv.dev Julia export all.zip or \
799                 an unzipped directory)"
800                    .to_string(),
801            )
802        }
803    };
804    match fleetreach_julia::scan_offline(&repo.path, db, &repo.id) {
805        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
806            .with_skipped(scan.skipped_unparseable),
807        Err(e) => errored(format!("julia tier-c scan: {e}")),
808    }
809}
810
811/// Scan a Swift repo with the toolchain-free Tier-C matcher: read `Package.resolved` and match
812/// each package version against the preloaded OSV DB. Builds nothing, so it needs no consent
813/// and no sandbox. `swift_db` is the once-loaded DB: `None` means no `file://` mirror was given
814/// (an honest gap), `Some(Err)` a mirror that failed to load (the reason is surfaced).
815fn scan_swift_repo(repo: &Repo, swift_db: Option<&Result<SwiftDb, SwiftError>>) -> RepoResult {
816    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
817
818    let db =
819        match swift_db {
820            Some(Ok(db)) => db,
821            Some(Err(e)) => return errored(format!("Swift OSV DB: {e}")),
822            None => return errored(
823                "Swift repo (Package.resolved): no OSV DB mirror for the toolchain-free matcher \
824                 (pass --swift-vuln-db=file://<path>, e.g. the osv.dev SwiftURL export all.zip \
825                 or an unzipped directory)"
826                    .to_string(),
827            ),
828        };
829    match fleetreach_swift::scan_offline(&repo.path, db, &repo.id) {
830        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
831            .with_skipped(scan.skipped_unparseable),
832        Err(e) => errored(format!("swift tier-c scan: {e}")),
833    }
834}
835
836/// Scan a Hex (Elixir) repo with the toolchain-free Tier-C matcher: read `mix.lock` and match
837/// each package version against the preloaded OSV DB. Builds nothing, so it needs no consent
838/// and no sandbox. `hex_db` is the once-loaded DB: `None` means no `file://` mirror was given
839/// (an honest gap), `Some(Err)` a mirror that failed to load (the reason is surfaced).
840fn scan_hex_repo(repo: &Repo, hex_db: Option<&Result<HexDb, HexError>>) -> RepoResult {
841    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
842
843    let db = match hex_db {
844        Some(Ok(db)) => db,
845        Some(Err(e)) => return errored(format!("Hex OSV DB: {e}")),
846        None => {
847            return errored(
848                "Hex repo (mix.lock): no OSV DB mirror for the toolchain-free matcher (pass \
849                 --hex-vuln-db=file://<path>, e.g. the osv.dev Hex export all.zip or an \
850                 unzipped directory)"
851                    .to_string(),
852            )
853        }
854    };
855    match fleetreach_hex::scan_offline(&repo.path, db, &repo.id) {
856        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
857            .with_skipped(scan.skipped_unparseable),
858        Err(e) => errored(format!("hex tier-c scan: {e}")),
859    }
860}
861
862/// Scan a Maven (Java) repo with the toolchain-free Tier-C matcher: read `gradle.lockfile` or
863/// `pom.xml` and match each dependency against the preloaded OSV DB. Builds nothing, so it
864/// needs no consent and no sandbox. `maven_db` is the once-loaded DB: `None` means no `file://`
865/// mirror was given (an honest gap), `Some(Err)` a mirror that failed to load (surfaced).
866fn scan_maven_repo(repo: &Repo, maven_db: Option<&Result<MavenDb, MavenError>>) -> RepoResult {
867    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
868
869    let db =
870        match maven_db {
871            Some(Ok(db)) => db,
872            Some(Err(e)) => return errored(format!("Maven OSV DB: {e}")),
873            None => return errored(
874                "Maven repo (gradle.lockfile/pom.xml): no OSV DB mirror for the toolchain-free \
875                 matcher (pass --maven-vuln-db=file://<path>, e.g. the osv.dev Maven export \
876                 all.zip or an unzipped directory)"
877                    .to_string(),
878            ),
879        };
880    match fleetreach_maven::scan_offline(&repo.path, db, &repo.id) {
881        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
882            .with_skipped(scan.skipped_unparseable),
883        Err(e) => errored(format!("maven tier-c scan: {e}")),
884    }
885}
886
887/// Scan a GitHub Actions repo with the toolchain-free Tier-C matcher: read
888/// `.github/workflows/*.yml` and match each pinned `uses:` action against the preloaded OSV
889/// DB. Builds nothing, so it needs no consent and no sandbox. `ghactions_db` is the
890/// once-loaded DB: `None` means no `file://` mirror was given (an honest gap), `Some(Err)` a
891/// mirror that failed to load (the reason is surfaced).
892fn scan_ghactions_repo(
893    repo: &Repo,
894    ghactions_db: Option<&Result<GhActionsDb, GhaError>>,
895) -> RepoResult {
896    let errored = |reason: String| RepoResult::errored(&repo.id, reason);
897
898    let db = match ghactions_db {
899        Some(Ok(db)) => db,
900        Some(Err(e)) => return errored(format!("GitHub Actions OSV DB: {e}")),
901        None => {
902            return errored(
903                "GitHub Actions repo (.github/workflows): no OSV DB mirror for the \
904                 toolchain-free matcher (pass --ghactions-vuln-db=file://<path>, e.g. the \
905                 osv.dev GitHub Actions export all.zip or an unzipped directory)"
906                    .to_string(),
907            )
908        }
909    };
910    match fleetreach_ghactions::scan_offline(&repo.path, db, &repo.id) {
911        Ok(scan) => RepoResult::scanned(&repo.id, scan.findings, Vec::new())
912            .with_skipped(scan.skipped_unparseable),
913        Err(e) => errored(format!("github-actions tier-c scan: {e}")),
914    }
915}
916
917/// Stamp each in-repo occurrence with whether its package is in the host's
918/// built set. Per-occurrence, since the same advisory may be built in one repo
919/// and a phantom optional in another.
920fn annotate_built(scan: &mut RepoScan, built: &BTreeSet<(String, Version)>) {
921    let occurrences = scan
922        .vulnerabilities
923        .iter_mut()
924        .flat_map(|v| v.occurrences.iter_mut())
925        .chain(
926            scan.warnings
927                .iter_mut()
928                .flat_map(|w| w.occurrences.iter_mut()),
929        );
930    for occurrence in occurrences {
931        if let Occurrence::InRepo {
932            package,
933            installed,
934            active,
935            ..
936        } = occurrence
937        {
938            *active = Some(built.contains(&(package.clone(), installed.clone())));
939        }
940    }
941}
942
943/// Resolve the lockfile(s) for a repo: a single `Cargo.lock` at the root, or —
944/// when `glob = true` — every `Cargo.lock` within `glob_max_depth` of the root.
945///
946/// Returns the discovered paths plus any directory-walk errors. A swallowed walk
947/// error (e.g. an unreadable subdir) could hide a `Cargo.lock`, so the caller treats
948/// a non-empty error list as a gap (fail closed) rather than reporting the repo clean.
949pub fn discover_lockfiles(repo: &Repo) -> (Vec<PathBuf>, Vec<String>) {
950    if !repo.glob {
951        let lock = repo.path.join("Cargo.lock");
952        return (if lock.is_file() { vec![lock] } else { vec![] }, Vec::new());
953    }
954
955    let mut paths = Vec::new();
956    let mut errors = Vec::new();
957    for entry in WalkDir::new(&repo.path).max_depth(repo.glob_max_depth) {
958        match entry {
959            Ok(e) if e.file_type().is_file() && e.file_name() == "Cargo.lock" => {
960                paths.push(e.into_path());
961            }
962            Ok(_) => {}
963            Err(e) => errors.push(e.to_string()),
964        }
965    }
966    (paths, errors)
967}