Skip to main content

fleetreach_core/
osv.rs

1//! OSV advisory version-range matching plus the shared toolchain-free feeder scaffolding
2//! (wire schema, parallel dir/zip loader, by-package index, severity), used by every
3//! ecosystem Tier-C feeder. The per-ecosystem knobs are passed in via [`Spec`]; this matcher
4//! is the algorithmic core.
5//!
6//! The event-walking skeleton — a matchable range with no `introduced` is affected from
7//! 0, an unparseable `introduced` fails **loud** (treated as affected, never silently
8//! clean), the reported fix is the *smallest* `fixed` above the version, and a
9//! `last_affected` closes an open interval — is identical across ecosystems. It is also
10//! exactly the logic where a missed case is a *false-clean*, the worst bug, so it lives
11//! here once rather than copied per feeder where the two could drift apart.
12//!
13//! What differs per ecosystem is only the version type and its handling, both passed in:
14//! the matcher is generic over the version type `V` (SemVer for Go/npm, PEP 440 for
15//! PyPI), and the bound handling comes in as closures — how a raw bound string parses
16//! (Go's pseudo-versions vs plain SemVer vs PEP 440, and each ecosystem's `"0"`
17//! lower-bound convention) and whether a version is at/after an `introduced` bound (Go
18//! pseudo-versions order below their release, so they need a release-tuple compare; the
19//! others just use `>=`). The `fixed`/`last_affected` upper bounds always use `V`'s own
20//! ordering.
21
22/// One advisory range reduced to what the matcher needs: whether its `type` is one this
23/// ecosystem evaluates (`matchable` — `SEMVER` for Go/npm, `ECOSYSTEM` for PyPI; other
24/// types are skipped) and its events.
25#[derive(Debug, Clone)]
26pub struct Range {
27    pub matchable: bool,
28    pub events: Vec<Event>,
29}
30
31/// One OSV range event: at most one of the three bounds is set.
32#[derive(Debug, Clone)]
33pub struct Event {
34    pub introduced: Option<String>,
35    pub fixed: Option<String>,
36    pub last_affected: Option<String>,
37}
38
39/// A bound parsed once at DB-load time. `Version` holds the parsed bound; `Unparseable`
40/// records that a bound string was present but the ecosystem's version parser rejected it
41/// (a malformed / poisoned DB) so the matcher can fail **loud** — exactly the case the
42/// string-based [`affected_fixed`] handles by treating an unparseable `introduced` as
43/// affected.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum ParsedBound<V> {
46    /// A bound that parsed to this version.
47    Version(V),
48    /// A bound string that was present but could not be parsed.
49    Unparseable,
50}
51
52/// One OSV range event with its bounds pre-parsed into the version type `V`. Built once at
53/// DB load (see [`parse_range`]) so the per-scan matcher never re-parses a bound string.
54#[derive(Debug, Clone)]
55pub struct ParsedEvent<V> {
56    /// `None` = no `introduced` event; `Some(_)` = an `introduced` bound (parsed or not).
57    pub introduced: Option<ParsedBound<V>>,
58    /// A parseable `fixed` bound, or `None` (absent OR unparseable — both simply skip, the
59    /// same as the string matcher's `and_then(parse)`).
60    pub fixed: Option<V>,
61    /// A parseable `last_affected` bound, or `None` (absent OR unparseable).
62    pub last_affected: Option<V>,
63}
64
65/// An advisory range with its event bounds pre-parsed (see [`ParsedEvent`]).
66#[derive(Debug, Clone)]
67pub struct ParsedRange<V> {
68    pub matchable: bool,
69    pub events: Vec<ParsedEvent<V>>,
70}
71
72/// Pre-parse a string-bound [`Range`] into a [`ParsedRange`] once, using the ecosystem's
73/// bound parser (the same `parse_bound` closure the string matcher takes, including its
74/// `"0"` convention). Do this at DB load so a popular advisory's bounds are parsed once per
75/// load rather than once per repo scanned.
76pub fn parse_range<V>(range: &Range, parse_bound: impl Fn(&str) -> Option<V>) -> ParsedRange<V> {
77    ParsedRange {
78        matchable: range.matchable,
79        events: range
80            .events
81            .iter()
82            .map(|e| ParsedEvent {
83                introduced: e.introduced.as_deref().map(|raw| {
84                    parse_bound(raw).map_or(ParsedBound::Unparseable, ParsedBound::Version)
85                }),
86                fixed: e.fixed.as_deref().and_then(&parse_bound),
87                last_affected: e.last_affected.as_deref().and_then(&parse_bound),
88            })
89            .collect(),
90    }
91}
92
93/// The pre-parsed counterpart of [`affected_fixed`]: identical semantics, but the bounds
94/// were parsed once at DB load (see [`parse_range`]) so this hot per-scan path does no
95/// string parsing. `at_or_after_introduced` is unchanged (plain `>=` for SemVer/PEP 440, a
96/// pseudo-version-aware compare for Go).
97pub fn affected_fixed_parsed<V: Ord + Clone>(
98    version: &V,
99    ranges: &[ParsedRange<V>],
100    at_or_after_introduced: impl Fn(&V, &V) -> bool,
101) -> Match<V> {
102    for range in ranges {
103        if !range.matchable {
104            continue;
105        }
106        // OSV convention: a matchable range with no `introduced` event is affected from 0.
107        let mut affected = !range.events.iter().any(|e| e.introduced.is_some());
108        let mut patch: Option<V> = None;
109        for event in &range.events {
110            match &event.introduced {
111                Some(ParsedBound::Version(v)) if at_or_after_introduced(version, v) => {
112                    affected = true
113                }
114                Some(ParsedBound::Version(_)) => {}
115                // A present-but-unparseable lower bound must fail LOUD — treat as affected.
116                Some(ParsedBound::Unparseable) => affected = true,
117                None => {}
118            }
119            if let Some(v) = &event.fixed {
120                if version >= v {
121                    affected = false;
122                } else {
123                    patch = Some(patch.map_or_else(|| v.clone(), |p| p.min(v.clone())));
124                }
125            }
126            if let Some(v) = &event.last_affected {
127                if version > v {
128                    affected = false;
129                }
130            }
131        }
132        if affected {
133            return Match::Affected { fixed: patch };
134        }
135    }
136    Match::NotAffected
137}
138
139/// Whether an installed version is covered by an advisory's ranges. Generic over the
140/// version type `V` so it carries back a SemVer or PEP 440 fix verbatim.
141#[derive(Debug, PartialEq, Eq)]
142pub enum Match<V> {
143    /// The version falls in no affected range.
144    NotAffected,
145    /// The version is affected. `fixed` is the patch that closes its interval, if the
146    /// DB names one (`None` for an open `last_affected`-only range, or no fix yet).
147    Affected { fixed: Option<V> },
148}
149
150/// Evaluate the matchable `ranges` for `version`. Ranges whose `type` this ecosystem does
151/// not evaluate (`matchable == false`) are skipped.
152///
153/// Generic over the version type `V`: `parse_bound` parses a raw bound string with the
154/// ecosystem's version adapter (and its `"0"` convention), and `at_or_after_introduced`
155/// decides whether `version` is at/after an `introduced` lower bound — plain `>=` for
156/// SemVer/PEP 440, a pseudo-version-aware compare for Go. Both are called many times, so
157/// they are taken by reference internally.
158pub fn affected_fixed<V: Ord + Clone>(
159    version: &V,
160    ranges: &[Range],
161    parse_bound: impl Fn(&str) -> Option<V>,
162    at_or_after_introduced: impl Fn(&V, &V) -> bool,
163) -> Match<V> {
164    for range in ranges {
165        if !range.matchable {
166            continue;
167        }
168        // OSV convention: a matchable range with no `introduced` event is introduced at 0,
169        // i.e. affected from the start. Defaulting to `false` would make such a range
170        // (and a poisoned advisory that simply omits `introduced`) silently read clean.
171        let mut affected = !range.events.iter().any(|e| e.introduced.is_some());
172        let mut patch: Option<V> = None;
173        for event in &range.events {
174            if let Some(raw) = event.introduced.as_deref() {
175                match parse_bound(raw) {
176                    Some(v) if at_or_after_introduced(version, &v) => affected = true,
177                    Some(_) => {}
178                    // An unparseable lower bound (malformed / poisoned DB) must fail
179                    // LOUD — treat as affected — never silently clean.
180                    None => affected = true,
181                }
182            }
183            if let Some(v) = event.fixed.as_deref().and_then(&parse_bound) {
184                if *version >= v {
185                    affected = false;
186                } else {
187                    // The patch is the *smallest* fixed above `version` (the fix that
188                    // closes its interval), not merely the last fixed event seen — a
189                    // later interval's fix must not overwrite an earlier one.
190                    patch = Some(patch.map_or(v.clone(), |p| p.min(v)));
191                }
192            }
193            if let Some(v) = event.last_affected.as_deref().and_then(&parse_bound) {
194                if *version > v {
195                    affected = false;
196                }
197            }
198        }
199        if affected {
200            return Match::Affected { fixed: patch };
201        }
202    }
203    Match::NotAffected
204}
205
206// =============================================================================
207// Shared toolchain-free OSV feeder scaffolding
208// =============================================================================
209//
210// Every ecosystem Tier-C feeder (npm, PyPI, RubyGems, Packagist, NuGet, Julia, Swift, Hex,
211// GitHub Actions, Maven) reads the same osv.dev export shape — a directory of OSV JSON
212// records, or the `all.zip` — and indexes it by package name. The only per-ecosystem
213// differences are captured in [`Spec`]: the version type, how a bound/version string parses,
214// how a package name normalizes, which OSV `ecosystem`/range `type` strings this feeder
215// consumes, whether enumerated `versions` are used, and how severity is derived. The load,
216// the wire schema, the parallel dir/zip reader, the by-package index, and the parse-once
217// range building all live here, once — the place where a missed case is a false-clean.
218
219use std::collections::BTreeMap;
220use std::io::{self, Cursor, Read as _};
221use std::path::{Path, PathBuf};
222use std::str::FromStr;
223use std::sync::Arc;
224
225use rayon::prelude::*;
226use serde::Deserialize;
227use thiserror::Error;
228
229use crate::semver::{Version, VersionReq};
230use crate::{DependencyKind, Ecosystem, Occurrence, Reachability, RepoId, Severity, VulnFinding};
231
232/// One advisory's facts for a single affected package, version type `V`. The range bounds
233/// and enumerated `versions` are parsed once at load (see [`load`]).
234#[derive(Debug, Clone)]
235pub struct Advisory<V> {
236    pub id: String,
237    pub aliases: Vec<String>,
238    pub summary: Option<String>,
239    pub severity: Severity,
240    /// A CVSS base score when the record carries a parseable CVSS_V3 vector (always `None`
241    /// for feeders whose [`Spec::severity`] does not extract one, e.g. npm).
242    pub cvss_score: Option<f32>,
243    /// `matchable` ranges with bounds pre-parsed (see [`parse_range`]).
244    pub ranges: Vec<ParsedRange<V>>,
245    /// Enumerated affected versions, sorted+deduped for `binary_search` (empty when
246    /// [`Spec::use_versions`] is false).
247    pub versions: Vec<V>,
248}
249
250/// The offline OSV DB: `normalized package name -> advisories`, built once at load.
251#[derive(Debug)]
252pub struct OsvDb<V> {
253    by_package: BTreeMap<String, Vec<Advisory<V>>>,
254}
255
256impl<V> Default for OsvDb<V> {
257    fn default() -> Self {
258        OsvDb {
259            by_package: BTreeMap::new(),
260        }
261    }
262}
263
264impl<V> OsvDb<V> {
265    /// The advisories indexed under `key` (a name already run through
266    /// [`Spec::normalize_name`]), or an empty slice if none.
267    pub fn advisories_for(&self, key: &str) -> &[Advisory<V>] {
268        self.by_package.get(key).map_or(&[], Vec::as_slice)
269    }
270    /// Total advisory-package index entries — for diagnostics/tests.
271    pub fn len(&self) -> usize {
272        self.by_package.values().map(Vec::len).sum()
273    }
274    /// Whether the DB indexed no advisories at all.
275    pub fn is_empty(&self) -> bool {
276        self.by_package.is_empty()
277    }
278}
279
280/// The per-ecosystem knobs the generic [`load`]/[`advisories_from`] need. Everything else
281/// (wire schema, parallel reader, indexing, parse-once) is shared.
282pub struct Spec<V: 'static> {
283    /// The OSV `affected[].package.ecosystem` string this feeder consumes (e.g. `"Maven"`).
284    pub ecosystem: &'static str,
285    /// The OSV range `type` this feeder evaluates (`"ECOSYSTEM"` or `"SEMVER"`); other types
286    /// are not `matchable`.
287    pub range_type: &'static str,
288    /// Parse a bound/version string with this ecosystem's version adapter (incl. its `"0"`
289    /// convention). Used at load to pre-parse range bounds and the enumerated `versions`.
290    pub parse_version: fn(&str) -> Option<V>,
291    /// Normalize a package name into the index key (identity, lowercase, PEP 503, URL, ...).
292    pub normalize_name: fn(&str) -> String,
293    /// Whether to collect the enumerated `versions` list (false for npm, which is range-only).
294    pub use_versions: bool,
295    /// Derive `(severity, cvss_score)` from a record. [`default_severity`] suits every feeder
296    /// whose records carry the GHSA band / CVSS_V3 vector; npm passes its own.
297    pub severity: fn(&OsvRecord) -> (Severity, Option<f32>),
298}
299
300/// Why an OSV mirror file could not be read or parsed — shared by every feeder's error type.
301/// A present-but-broken input fails **closed** (an honest gap, never a false-clean).
302#[derive(Debug, Error)]
303#[non_exhaustive]
304pub enum DbError {
305    /// The file could not be read.
306    #[error("read failed: {0}")]
307    Read(#[from] io::Error),
308    /// An OSV record was not valid JSON.
309    #[error("invalid JSON: {0}")]
310    Parse(#[from] serde_json::Error),
311    /// The OSV mirror `.zip` could not be opened or decompressed.
312    #[error("invalid zip archive: {0}")]
313    Archive(String),
314}
315
316impl From<zip::result::ZipError> for DbError {
317    fn from(e: zip::result::ZipError) -> Self {
318        match e {
319            zip::result::ZipError::Io(io) => DbError::Read(io),
320            other => DbError::Archive(other.to_string()),
321        }
322    }
323}
324
325/// A load failure carrying the offending `path` and its cause, so a feeder can wrap it into
326/// its own `Db { path, source }` error verbatim.
327#[derive(Debug)]
328pub struct LoadError {
329    pub path: PathBuf,
330    pub source: DbError,
331}
332
333impl LoadError {
334    fn new(path: impl Into<PathBuf>, source: impl Into<DbError>) -> Self {
335        LoadError {
336            path: path.into(),
337            source: source.into(),
338        }
339    }
340}
341
342/// Load the OSV mirror at `root` — a directory of `*.json` records (what the osv.dev
343/// `<Ecosystem>/all.zip` unzips to) or the `.zip` itself — into an indexed [`OsvDb`],
344/// parsing every range bound and enumerated version once.
345///
346/// # Errors
347///
348/// Returns [`LoadError`] if `root` cannot be read, the archive cannot be decompressed, or a
349/// record is not valid JSON — failing closed.
350pub fn load<V>(root: &Path, spec: &Spec<V>) -> Result<OsvDb<V>, LoadError>
351where
352    V: Ord + Clone + Send,
353{
354    if root.is_dir() {
355        load_dir(root, spec)
356    } else {
357        load_zip(root, spec)
358    }
359}
360
361fn load_dir<V>(root: &Path, spec: &Spec<V>) -> Result<OsvDb<V>, LoadError>
362where
363    V: Ord + Clone + Send,
364{
365    let mut paths: Vec<PathBuf> = std::fs::read_dir(root)
366        .map_err(|e| LoadError::new(root, e))?
367        .map(|entry| entry.map(|e| e.path()).map_err(|e| LoadError::new(root, e)))
368        .collect::<Result<_, _>>()?;
369    paths.retain(|p| p.extension().and_then(|e| e.to_str()) == Some("json"));
370    paths.sort();
371
372    let per_file: Vec<Vec<(String, Advisory<V>)>> = paths
373        .par_iter()
374        .map(|path| {
375            let body = std::fs::read_to_string(path).map_err(|e| LoadError::new(path, e))?;
376            let osv: OsvRecord =
377                serde_json::from_str(&body).map_err(|e| LoadError::new(path, e))?;
378            Ok(advisories_from(osv, spec))
379        })
380        .collect::<Result<_, LoadError>>()?;
381
382    Ok(OsvDb {
383        by_package: merge(per_file),
384    })
385}
386
387fn load_zip<V>(path: &Path, spec: &Spec<V>) -> Result<OsvDb<V>, LoadError>
388where
389    V: Ord + Clone + Send,
390{
391    let bytes: Arc<[u8]> = std::fs::read(path)
392        .map_err(|e| LoadError::new(path, e))?
393        .into();
394    let archive = zip::ZipArchive::new(Cursor::new(bytes)).map_err(|e| LoadError::new(path, e))?;
395
396    let per_entry: Vec<Vec<(String, Advisory<V>)>> = (0..archive.len())
397        .into_par_iter()
398        .map_init(
399            || archive.clone(),
400            |archive, i| {
401                let mut entry = archive.by_index(i).map_err(|e| LoadError::new(path, e))?;
402                if !entry.name().ends_with(".json") {
403                    return Ok(Vec::new());
404                }
405                let mut body = String::new();
406                entry
407                    .read_to_string(&mut body)
408                    .map_err(|e| LoadError::new(path, e))?;
409                let osv: OsvRecord =
410                    serde_json::from_str(&body).map_err(|e| LoadError::new(path, e))?;
411                Ok(advisories_from(osv, spec))
412            },
413        )
414        .collect::<Result<_, LoadError>>()?;
415
416    Ok(OsvDb {
417        by_package: merge(per_entry),
418    })
419}
420
421/// Fold per-record advisory lists (in their original `affected[]` order) into the by-package
422/// index.
423fn merge<V>(per_file: Vec<Vec<(String, Advisory<V>)>>) -> BTreeMap<String, Vec<Advisory<V>>> {
424    let mut by_package: BTreeMap<String, Vec<Advisory<V>>> = BTreeMap::new();
425    for record in per_file {
426        for (name, advisory) in record {
427            by_package.entry(name).or_default().push(advisory);
428        }
429    }
430    by_package
431}
432
433/// The `(normalized name, advisory)` pairs one OSV record contributes: one per `affected[]`
434/// entry whose `package.ecosystem` matches [`Spec::ecosystem`]. Bounds and versions are
435/// parsed once here. Exposed for feeder unit tests.
436pub fn advisories_from<V>(osv: OsvRecord, spec: &Spec<V>) -> Vec<(String, Advisory<V>)>
437where
438    V: Ord + Clone,
439{
440    let (severity, cvss_score) = (spec.severity)(&osv);
441    osv.affected
442        .iter()
443        .filter(|a| a.package.ecosystem.as_deref() == Some(spec.ecosystem))
444        .filter_map(|affected| {
445            let name = (spec.normalize_name)(affected.package.name.as_deref()?);
446            let ranges = affected
447                .ranges
448                .iter()
449                .map(|r| {
450                    let range = Range {
451                        matchable: r.kind.as_deref() == Some(spec.range_type),
452                        events: r
453                            .events
454                            .iter()
455                            .map(|e| Event {
456                                introduced: e.introduced.clone(),
457                                fixed: e.fixed.clone(),
458                                last_affected: e.last_affected.clone(),
459                            })
460                            .collect(),
461                    };
462                    parse_range(&range, spec.parse_version)
463                })
464                .collect();
465            let mut versions: Vec<V> = if spec.use_versions {
466                affected
467                    .versions
468                    .iter()
469                    .flatten()
470                    .filter_map(|v| (spec.parse_version)(v))
471                    .collect()
472            } else {
473                Vec::new()
474            };
475            versions.sort();
476            versions.dedup();
477            Some((
478                name,
479                Advisory {
480                    id: osv.id.clone(),
481                    aliases: osv.aliases.clone().unwrap_or_default(),
482                    summary: osv.summary.clone(),
483                    severity,
484                    cvss_score,
485                    ranges,
486                    versions,
487                },
488            ))
489        })
490        .collect()
491}
492
493/// The default `(severity, cvss_score)` derivation: prefer the curated GHSA band
494/// (`database_specific.severity`), and take the best CVSS_V3 base score across the record's
495/// `severity[]` vectors (deriving the band from it when no GHSA band is present).
496pub fn default_severity(osv: &OsvRecord) -> (Severity, Option<f32>) {
497    let band = osv
498        .database_specific
499        .as_ref()
500        .and_then(|d| d.severity.as_deref())
501        .map(band_from_label)
502        .unwrap_or(Severity::Unknown);
503
504    let scored: Option<(Severity, f32)> = osv
505        .severity
506        .iter()
507        .filter(|s| s.kind.as_deref() == Some("CVSS_V3"))
508        .filter_map(|s| cvss::v3::Base::from_str(s.score.as_deref()?).ok())
509        .map(|base| {
510            let score = base.score();
511            (band_from_cvss(score.severity()), score.value() as f32)
512        })
513        .max_by(|a, b| a.1.total_cmp(&b.1));
514
515    let cvss_score = scored.map(|(_, v)| v);
516    let severity = if band != Severity::Unknown {
517        band
518    } else {
519        scored.map(|(b, _)| b).unwrap_or(Severity::Unknown)
520    };
521    (severity, cvss_score)
522}
523
524/// Map a GHSA/OSV severity band string onto [`Severity`].
525pub fn band_from_label(label: &str) -> Severity {
526    match label.to_ascii_uppercase().as_str() {
527        "LOW" => Severity::Low,
528        "MODERATE" | "MEDIUM" => Severity::Medium,
529        "HIGH" => Severity::High,
530        "CRITICAL" => Severity::Critical,
531        _ => Severity::Unknown,
532    }
533}
534
535/// Map a parsed CVSS v3 severity band onto [`Severity`].
536fn band_from_cvss(sev: cvss::Severity) -> Severity {
537    match sev {
538        cvss::Severity::None => Severity::Unknown,
539        cvss::Severity::Low => Severity::Low,
540        cvss::Severity::Medium => Severity::Medium,
541        cvss::Severity::High => Severity::High,
542        cvss::Severity::Critical => Severity::Critical,
543    }
544}
545
546// --- Shared Tier-C finding construction ---
547//
548// Every toolchain-free feeder turns an "this advisory affects this installed package at this
549// version" decision into the same `VulnFinding` shape, then sorts and dedups the same way.
550// Only the *decision* (range vs versions-list match, the per-ecosystem version type and its
551// coercion to the stored SemVer form) differs per feeder; the construction does not. Owning it
552// here keeps the finding contract — the `osv.dev` URL, the per-occurrence skeleton, the
553// `(advisory, package)` sort/dedup — in one place instead of copy-pasted ten times, where a
554// field added in one feeder and forgotten in another would be a silent inconsistency.
555
556/// The outcome of a toolchain-free `scan_offline`: the findings, plus how many installed
557/// packages were skipped because their version string did not parse.
558///
559/// A skip is benign for a non-registry pin (a VCS/URL/path-pinned dependency has no registry
560/// release, so no registry advisory can apply). But surfacing the *count* keeps the skip
561/// visible rather than silent — so a malformed-but-real version the parser wrongly rejects is
562/// not indistinguishable from a clean result. The orchestrator sums these and reports a nonzero
563/// total; it is never an error (the scan is still as complete as the lockfile allows).
564#[derive(Debug, Default, Clone)]
565pub struct TierCScan {
566    /// The deduplicated, sorted findings.
567    pub findings: Vec<VulnFinding>,
568    /// Count of installed packages skipped because their version did not parse.
569    pub skipped_unparseable: u32,
570}
571
572/// The canonical advisory URL on `osv.dev` — every id (GHSA, CVE, PYSEC, MAL, …) resolves there.
573#[must_use]
574pub fn advisory_url(id: &str) -> String {
575    format!("https://osv.dev/vulnerability/{id}")
576}
577
578/// The package name of a finding's first occurrence, the sort/dedup key alongside the advisory
579/// id. Tier-C findings always carry exactly one `InRepo` occurrence.
580#[must_use]
581pub fn occ_package(v: &VulnFinding) -> &str {
582    match v.occurrences.first() {
583        Some(Occurrence::InRepo { package, .. }) => package,
584        _ => "",
585    }
586}
587
588/// Sort by `(advisory id, package)` and dedup on that pair — the deterministic ordering every
589/// feeder emits. The same package+version can resolve many times in a lockfile, but a
590/// multi-package advisory legitimately yields one finding per *distinct* package, so the dedup
591/// key is the pair, not the advisory alone.
592pub fn sort_dedup_findings(out: &mut Vec<VulnFinding>) {
593    out.sort_by(|a, b| {
594        a.advisory_id
595            .cmp(&b.advisory_id)
596            .then_with(|| occ_package(a).cmp(occ_package(b)))
597    });
598    out.dedup_by(|a, b| a.advisory_id == b.advisory_id && occ_package(a) == occ_package(b));
599}
600
601/// The inputs a feeder supplies to build one Tier-C [`VulnFinding`]. The version-matching
602/// decision (and any `to_semver` coercion of the installed/patched versions) happens in the
603/// feeder; everything here is already in the shared model's types.
604pub struct TierCFinding<'a> {
605    /// The ecosystem this finding came from.
606    pub ecosystem: Ecosystem,
607    /// The advisory id (the `osv.dev` URL is derived from it).
608    pub advisory_id: String,
609    /// CVE/GHSA/… cross-reference aliases.
610    pub aliases: Vec<String>,
611    /// Display title — typically the advisory summary, falling back to the id.
612    pub title: String,
613    /// Severity band.
614    pub severity: Severity,
615    /// CVSS base score when the advisory carries one (`None` where the feeder does not
616    /// extract it).
617    pub cvss_score: Option<f32>,
618    /// The affected package name.
619    pub package: String,
620    /// The installed version, in the shared SemVer model (already coerced by the feeder).
621    pub installed: Version,
622    /// Versions that fix the advisory; empty means "no fix available".
623    pub patched: Vec<VersionReq>,
624    /// Whether the package is a direct dependency.
625    pub direct: bool,
626    /// A representative introducer chain `[root, …, package]`; empty when the feeder cannot
627    /// compute a dependency graph.
628    pub dependency_path: Vec<String>,
629    /// The repo the package was found in.
630    pub repo: &'a RepoId,
631    /// The reason string for the Tier-C `Unknown` reachability verdict (the feeder names the
632    /// fidelity, e.g. "package-level scan (no toolchain): version match only").
633    pub reach_reason: &'static str,
634}
635
636impl TierCFinding<'_> {
637    /// Assemble the [`VulnFinding`]. Package-level only: `affected_functions` is empty,
638    /// `reachable` is `None`, and reachability is the Tier-C `Unknown` contract (never
639    /// `NotReachable` — see [`Reachability::tier_c_unknown`]).
640    #[must_use]
641    pub fn build(self) -> VulnFinding {
642        VulnFinding {
643            advisory_id: self.advisory_id.clone(),
644            aliases: self.aliases,
645            ecosystem: self.ecosystem,
646            title: self.title,
647            severity: self.severity,
648            cvss_score: self.cvss_score,
649            url: Some(advisory_url(&self.advisory_id)),
650            occurrences: vec![Occurrence::InRepo {
651                repo: self.repo.clone(),
652                package: self.package,
653                installed: self.installed,
654                patched: self.patched,
655                dependency_kind: if self.direct {
656                    DependencyKind::Direct
657                } else {
658                    DependencyKind::Transitive
659                },
660                dependency_path: self.dependency_path,
661                active: None,
662                source: Default::default(),
663            }],
664            affected_functions: Vec::new(),
665            reachable: None,
666            reachability: Some(Reachability::tier_c_unknown(self.reach_reason)),
667            exploit: Default::default(),
668        }
669    }
670}
671
672// --- OSV wire schema (the subset every feeder reads). ---
673
674/// One OSV advisory record, deserialized from a single `*.json` export file.
675#[derive(Debug, Deserialize)]
676pub struct OsvRecord {
677    pub id: String,
678    pub aliases: Option<Vec<String>>,
679    pub summary: Option<String>,
680    #[serde(default)]
681    pub affected: Vec<Affected>,
682    #[serde(default)]
683    pub severity: Vec<SeverityEntry>,
684    pub database_specific: Option<DatabaseSpecific>,
685}
686
687/// One `severity[]` vector (e.g. a `CVSS_V3` base-score string).
688#[derive(Debug, Deserialize)]
689pub struct SeverityEntry {
690    #[serde(rename = "type")]
691    pub kind: Option<String>,
692    pub score: Option<String>,
693}
694
695/// The `database_specific` block, read for its curated GHSA severity band.
696#[derive(Debug, Deserialize)]
697pub struct DatabaseSpecific {
698    pub severity: Option<String>,
699}
700
701/// One `affected[]` entry: a package, its ranges, and any enumerated versions.
702#[derive(Debug, Deserialize)]
703pub struct Affected {
704    #[serde(default)]
705    pub package: Package,
706    #[serde(default)]
707    pub ranges: Vec<RawRange>,
708    pub versions: Option<Vec<String>>,
709}
710
711/// The `affected[].package` identity.
712#[derive(Debug, Deserialize, Default)]
713pub struct Package {
714    pub name: Option<String>,
715    pub ecosystem: Option<String>,
716}
717
718/// One `affected[].ranges[]` entry (raw string bounds; parsed once by [`advisories_from`]).
719#[derive(Debug, Deserialize)]
720pub struct RawRange {
721    #[serde(rename = "type")]
722    pub kind: Option<String>,
723    #[serde(default)]
724    pub events: Vec<RawEvent>,
725}
726
727/// One `ranges[].events[]` entry: at most one bound set.
728#[derive(Debug, Deserialize)]
729pub struct RawEvent {
730    pub introduced: Option<String>,
731    pub fixed: Option<String>,
732    pub last_affected: Option<String>,
733}
734
735#[cfg(test)]
736mod tests {
737    #![allow(clippy::unwrap_used)]
738    use super::*;
739    use semver::Version;
740
741    fn v(s: &str) -> Version {
742        Version::parse(s).unwrap()
743    }
744
745    // Plain-SemVer bounds, the simplest instantiation (the npm shape).
746    fn plain(version: &Version, ranges: &[Range]) -> Match<Version> {
747        affected_fixed(
748            version,
749            ranges,
750            |raw| {
751                if raw == "0" {
752                    Some(Version::new(0, 0, 0))
753                } else {
754                    Version::parse(raw).ok()
755                }
756            },
757            |ver, bound| ver >= bound,
758        )
759    }
760
761    fn range(events: &[(&str, &str)]) -> Range {
762        Range {
763            matchable: true,
764            events: events
765                .iter()
766                .map(|(k, val)| Event {
767                    introduced: (*k == "introduced").then(|| val.to_string()),
768                    fixed: (*k == "fixed").then(|| val.to_string()),
769                    last_affected: (*k == "last_affected").then(|| val.to_string()),
770                })
771                .collect(),
772        }
773    }
774
775    #[test]
776    fn affected_below_fix_reports_smallest_patch() {
777        let r = [
778            range(&[("introduced", "0"), ("fixed", "1.0.1")]),
779            range(&[("introduced", "2.0.0"), ("fixed", "2.0.3")]),
780        ];
781        // 0.9.0 is in [0,1.0.1): fix is 1.0.1, not 2.0.3.
782        assert_eq!(
783            plain(&v("0.9.0"), &r),
784            Match::Affected {
785                fixed: Some(v("1.0.1"))
786            }
787        );
788        assert_eq!(
789            plain(&v("1.5.0"), &r),
790            Match::NotAffected,
791            "between windows"
792        );
793    }
794
795    #[test]
796    fn no_introduced_event_is_affected_from_zero() {
797        let r = [Range {
798            matchable: true,
799            events: vec![Event {
800                introduced: None,
801                fixed: Some("2.0.0".into()),
802                last_affected: None,
803            }],
804        }];
805        assert_eq!(
806            plain(&v("1.5.0"), &r),
807            Match::Affected {
808                fixed: Some(v("2.0.0"))
809            }
810        );
811        assert_eq!(plain(&v("2.0.0"), &r), Match::NotAffected);
812    }
813
814    #[test]
815    fn unparseable_introduced_fails_loud() {
816        let r = [range(&[("introduced", "garbage"), ("fixed", "99.0.0")])];
817        assert_eq!(
818            plain(&v("1.0.0"), &r),
819            Match::Affected {
820                fixed: Some(v("99.0.0"))
821            },
822            "a malformed lower bound must read affected, never clean"
823        );
824    }
825
826    #[test]
827    fn non_semver_ranges_are_skipped() {
828        let r = [Range {
829            matchable: false,
830            events: vec![Event {
831                introduced: Some("0".into()),
832                fixed: None,
833                last_affected: None,
834            }],
835        }];
836        assert_eq!(plain(&v("1.0.0"), &r), Match::NotAffected);
837    }
838
839    #[test]
840    fn last_affected_closes_an_open_interval() {
841        let r = [range(&[
842            ("introduced", "1.0.0"),
843            ("last_affected", "1.4.0"),
844        ])];
845        assert_eq!(plain(&v("1.3.0"), &r), Match::Affected { fixed: None });
846        assert_eq!(plain(&v("1.5.0"), &r), Match::NotAffected);
847    }
848
849    // The pre-parsed matcher must agree with the string matcher on every shape: that is the
850    // whole safety contract of the parse-once optimization (a divergence = a false-clean).
851    #[test]
852    fn parsed_matcher_agrees_with_string_matcher() {
853        let parse = |raw: &str| {
854            if raw == "0" {
855                Some(Version::new(0, 0, 0))
856            } else {
857                Version::parse(raw).ok()
858            }
859        };
860        let ranges = [
861            range(&[("introduced", "0"), ("fixed", "1.0.1")]),
862            range(&[("introduced", "2.0.0"), ("fixed", "2.0.3")]),
863            range(&[("introduced", "1.0.0"), ("last_affected", "1.4.0")]),
864            range(&[("introduced", "garbage"), ("fixed", "99.0.0")]),
865            Range {
866                matchable: false,
867                events: vec![Event {
868                    introduced: Some("0".into()),
869                    fixed: None,
870                    last_affected: None,
871                }],
872            },
873        ];
874        let parsed: Vec<ParsedRange<Version>> =
875            ranges.iter().map(|r| parse_range(r, parse)).collect();
876        for s in [
877            "0.0.1", "0.9.0", "1.0.0", "1.0.1", "1.3.0", "1.5.0", "2.0.0", "2.0.3", "5.0.0",
878        ] {
879            let ver = v(s);
880            let want = plain(&ver, &ranges);
881            let got = affected_fixed_parsed(&ver, &parsed, |a, b| a >= b);
882            assert_eq!(want, got, "disagreement at {s}");
883        }
884    }
885
886    #[test]
887    fn custom_introduced_comparator_is_honored() {
888        // A comparator that treats nothing as at/after `introduced` ⇒ never affected
889        // via an introduced bound (proves the closure actually drives the decision).
890        let r = [range(&[("introduced", "1.0.0"), ("fixed", "2.0.0")])];
891        let never = affected_fixed(
892            &v("1.5.0"),
893            &r,
894            |raw| Version::parse(raw).ok(),
895            |_, _| false,
896        );
897        assert_eq!(never, Match::NotAffected);
898    }
899}