fleetreach_core/osv.rs
1//! OSV advisory version-range matching plus the shared toolchain-free feeder scaffolding
2//! (wire schema, parallel dir/zip loader, by-package index, severity), used by every
3//! ecosystem Tier-C feeder. The per-ecosystem knobs are passed in via [`Spec`]; this matcher
4//! is the algorithmic core.
5//!
6//! The event-walking skeleton — a matchable range with no `introduced` is affected from
7//! 0, an unparseable `introduced` fails **loud** (treated as affected, never silently
8//! clean), the reported fix is the *smallest* `fixed` above the version, and a
9//! `last_affected` closes an open interval — is identical across ecosystems. It is also
10//! exactly the logic where a missed case is a *false-clean*, the worst bug, so it lives
11//! here once rather than copied per feeder where the two could drift apart.
12//!
13//! What differs per ecosystem is only the version type and its handling, both passed in:
14//! the matcher is generic over the version type `V` (SemVer for Go/npm, PEP 440 for
15//! PyPI), and the bound handling comes in as closures — how a raw bound string parses
16//! (Go's pseudo-versions vs plain SemVer vs PEP 440, and each ecosystem's `"0"`
17//! lower-bound convention) and whether a version is at/after an `introduced` bound (Go
18//! pseudo-versions order below their release, so they need a release-tuple compare; the
19//! others just use `>=`). The `fixed`/`last_affected` upper bounds always use `V`'s own
20//! ordering.
21
22/// One advisory range reduced to what the matcher needs: whether its `type` is one this
23/// ecosystem evaluates (`matchable` — `SEMVER` for Go/npm, `ECOSYSTEM` for PyPI; other
24/// types are skipped) and its events.
25#[derive(Debug, Clone)]
26pub struct Range {
27 pub matchable: bool,
28 pub events: Vec<Event>,
29}
30
31/// One OSV range event: at most one of the three bounds is set.
32#[derive(Debug, Clone)]
33pub struct Event {
34 pub introduced: Option<String>,
35 pub fixed: Option<String>,
36 pub last_affected: Option<String>,
37}
38
39/// A bound parsed once at DB-load time. `Version` holds the parsed bound; `Unparseable`
40/// records that a bound string was present but the ecosystem's version parser rejected it
41/// (a malformed / poisoned DB) so the matcher can fail **loud** — exactly the case the
42/// string-based [`affected_fixed`] handles by treating an unparseable `introduced` as
43/// affected.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum ParsedBound<V> {
46 /// A bound that parsed to this version.
47 Version(V),
48 /// A bound string that was present but could not be parsed.
49 Unparseable,
50}
51
52/// One OSV range event with its bounds pre-parsed into the version type `V`. Built once at
53/// DB load (see [`parse_range`]) so the per-scan matcher never re-parses a bound string.
54#[derive(Debug, Clone)]
55pub struct ParsedEvent<V> {
56 /// `None` = no `introduced` event; `Some(_)` = an `introduced` bound (parsed or not).
57 pub introduced: Option<ParsedBound<V>>,
58 /// A parseable `fixed` bound, or `None` (absent OR unparseable — both simply skip, the
59 /// same as the string matcher's `and_then(parse)`).
60 pub fixed: Option<V>,
61 /// A parseable `last_affected` bound, or `None` (absent OR unparseable).
62 pub last_affected: Option<V>,
63}
64
65/// An advisory range with its event bounds pre-parsed (see [`ParsedEvent`]).
66#[derive(Debug, Clone)]
67pub struct ParsedRange<V> {
68 pub matchable: bool,
69 pub events: Vec<ParsedEvent<V>>,
70}
71
72/// Pre-parse a string-bound [`Range`] into a [`ParsedRange`] once, using the ecosystem's
73/// bound parser (the same `parse_bound` closure the string matcher takes, including its
74/// `"0"` convention). Do this at DB load so a popular advisory's bounds are parsed once per
75/// load rather than once per repo scanned.
76pub fn parse_range<V>(range: &Range, parse_bound: impl Fn(&str) -> Option<V>) -> ParsedRange<V> {
77 ParsedRange {
78 matchable: range.matchable,
79 events: range
80 .events
81 .iter()
82 .map(|e| ParsedEvent {
83 introduced: e.introduced.as_deref().map(|raw| {
84 parse_bound(raw).map_or(ParsedBound::Unparseable, ParsedBound::Version)
85 }),
86 fixed: e.fixed.as_deref().and_then(&parse_bound),
87 last_affected: e.last_affected.as_deref().and_then(&parse_bound),
88 })
89 .collect(),
90 }
91}
92
93/// The pre-parsed counterpart of [`affected_fixed`]: identical semantics, but the bounds
94/// were parsed once at DB load (see [`parse_range`]) so this hot per-scan path does no
95/// string parsing. `at_or_after_introduced` is unchanged (plain `>=` for SemVer/PEP 440, a
96/// pseudo-version-aware compare for Go).
97pub fn affected_fixed_parsed<V: Ord + Clone>(
98 version: &V,
99 ranges: &[ParsedRange<V>],
100 at_or_after_introduced: impl Fn(&V, &V) -> bool,
101) -> Match<V> {
102 for range in ranges {
103 if !range.matchable {
104 continue;
105 }
106 // OSV convention: a matchable range with no `introduced` event is affected from 0.
107 let mut affected = !range.events.iter().any(|e| e.introduced.is_some());
108 let mut patch: Option<V> = None;
109 for event in &range.events {
110 match &event.introduced {
111 Some(ParsedBound::Version(v)) if at_or_after_introduced(version, v) => {
112 affected = true
113 }
114 Some(ParsedBound::Version(_)) => {}
115 // A present-but-unparseable lower bound must fail LOUD — treat as affected.
116 Some(ParsedBound::Unparseable) => affected = true,
117 None => {}
118 }
119 if let Some(v) = &event.fixed {
120 if version >= v {
121 affected = false;
122 } else {
123 patch = Some(patch.map_or_else(|| v.clone(), |p| p.min(v.clone())));
124 }
125 }
126 if let Some(v) = &event.last_affected {
127 if version > v {
128 affected = false;
129 }
130 }
131 }
132 if affected {
133 return Match::Affected { fixed: patch };
134 }
135 }
136 Match::NotAffected
137}
138
139/// Whether an installed version is covered by an advisory's ranges. Generic over the
140/// version type `V` so it carries back a SemVer or PEP 440 fix verbatim.
141#[derive(Debug, PartialEq, Eq)]
142pub enum Match<V> {
143 /// The version falls in no affected range.
144 NotAffected,
145 /// The version is affected. `fixed` is the patch that closes its interval, if the
146 /// DB names one (`None` for an open `last_affected`-only range, or no fix yet).
147 Affected { fixed: Option<V> },
148}
149
150/// Evaluate the matchable `ranges` for `version`. Ranges whose `type` this ecosystem does
151/// not evaluate (`matchable == false`) are skipped.
152///
153/// Generic over the version type `V`: `parse_bound` parses a raw bound string with the
154/// ecosystem's version adapter (and its `"0"` convention), and `at_or_after_introduced`
155/// decides whether `version` is at/after an `introduced` lower bound — plain `>=` for
156/// SemVer/PEP 440, a pseudo-version-aware compare for Go. Both are called many times, so
157/// they are taken by reference internally.
158pub fn affected_fixed<V: Ord + Clone>(
159 version: &V,
160 ranges: &[Range],
161 parse_bound: impl Fn(&str) -> Option<V>,
162 at_or_after_introduced: impl Fn(&V, &V) -> bool,
163) -> Match<V> {
164 for range in ranges {
165 if !range.matchable {
166 continue;
167 }
168 // OSV convention: a matchable range with no `introduced` event is introduced at 0,
169 // i.e. affected from the start. Defaulting to `false` would make such a range
170 // (and a poisoned advisory that simply omits `introduced`) silently read clean.
171 let mut affected = !range.events.iter().any(|e| e.introduced.is_some());
172 let mut patch: Option<V> = None;
173 for event in &range.events {
174 if let Some(raw) = event.introduced.as_deref() {
175 match parse_bound(raw) {
176 Some(v) if at_or_after_introduced(version, &v) => affected = true,
177 Some(_) => {}
178 // An unparseable lower bound (malformed / poisoned DB) must fail
179 // LOUD — treat as affected — never silently clean.
180 None => affected = true,
181 }
182 }
183 if let Some(v) = event.fixed.as_deref().and_then(&parse_bound) {
184 if *version >= v {
185 affected = false;
186 } else {
187 // The patch is the *smallest* fixed above `version` (the fix that
188 // closes its interval), not merely the last fixed event seen — a
189 // later interval's fix must not overwrite an earlier one.
190 patch = Some(patch.map_or(v.clone(), |p| p.min(v)));
191 }
192 }
193 if let Some(v) = event.last_affected.as_deref().and_then(&parse_bound) {
194 if *version > v {
195 affected = false;
196 }
197 }
198 }
199 if affected {
200 return Match::Affected { fixed: patch };
201 }
202 }
203 Match::NotAffected
204}
205
206// =============================================================================
207// Shared toolchain-free OSV feeder scaffolding
208// =============================================================================
209//
210// Every ecosystem Tier-C feeder (npm, PyPI, RubyGems, Packagist, NuGet, Julia, Swift, Hex,
211// GitHub Actions, Maven) reads the same osv.dev export shape — a directory of OSV JSON
212// records, or the `all.zip` — and indexes it by package name. The only per-ecosystem
213// differences are captured in [`Spec`]: the version type, how a bound/version string parses,
214// how a package name normalizes, which OSV `ecosystem`/range `type` strings this feeder
215// consumes, whether enumerated `versions` are used, and how severity is derived. The load,
216// the wire schema, the parallel dir/zip reader, the by-package index, and the parse-once
217// range building all live here, once — the place where a missed case is a false-clean.
218
219use std::collections::BTreeMap;
220use std::io::{self, Cursor, Read as _};
221use std::path::{Path, PathBuf};
222use std::str::FromStr;
223use std::sync::Arc;
224
225use rayon::prelude::*;
226use serde::Deserialize;
227use thiserror::Error;
228
229use crate::semver::{Version, VersionReq};
230use crate::{DependencyKind, Ecosystem, Occurrence, Reachability, RepoId, Severity, VulnFinding};
231
232/// One advisory's facts for a single affected package, version type `V`. The range bounds
233/// and enumerated `versions` are parsed once at load (see [`load`]).
234#[derive(Debug, Clone)]
235pub struct Advisory<V> {
236 pub id: String,
237 pub aliases: Vec<String>,
238 pub summary: Option<String>,
239 pub severity: Severity,
240 /// A CVSS base score when the record carries a parseable CVSS_V3 vector (always `None`
241 /// for feeders whose [`Spec::severity`] does not extract one, e.g. npm).
242 pub cvss_score: Option<f32>,
243 /// `matchable` ranges with bounds pre-parsed (see [`parse_range`]).
244 pub ranges: Vec<ParsedRange<V>>,
245 /// Enumerated affected versions, sorted+deduped for `binary_search` (empty when
246 /// [`Spec::use_versions`] is false).
247 pub versions: Vec<V>,
248}
249
250/// The offline OSV DB: `normalized package name -> advisories`, built once at load.
251#[derive(Debug)]
252pub struct OsvDb<V> {
253 by_package: BTreeMap<String, Vec<Advisory<V>>>,
254}
255
256impl<V> Default for OsvDb<V> {
257 fn default() -> Self {
258 OsvDb {
259 by_package: BTreeMap::new(),
260 }
261 }
262}
263
264impl<V> OsvDb<V> {
265 /// The advisories indexed under `key` (a name already run through
266 /// [`Spec::normalize_name`]), or an empty slice if none.
267 pub fn advisories_for(&self, key: &str) -> &[Advisory<V>] {
268 self.by_package.get(key).map_or(&[], Vec::as_slice)
269 }
270 /// Total advisory-package index entries — for diagnostics/tests.
271 pub fn len(&self) -> usize {
272 self.by_package.values().map(Vec::len).sum()
273 }
274 /// Whether the DB indexed no advisories at all.
275 pub fn is_empty(&self) -> bool {
276 self.by_package.is_empty()
277 }
278}
279
280/// The per-ecosystem knobs the generic [`load`]/[`advisories_from`] need. Everything else
281/// (wire schema, parallel reader, indexing, parse-once) is shared.
282pub struct Spec<V: 'static> {
283 /// The OSV `affected[].package.ecosystem` string this feeder consumes (e.g. `"Maven"`).
284 pub ecosystem: &'static str,
285 /// The OSV range `type` this feeder evaluates (`"ECOSYSTEM"` or `"SEMVER"`); other types
286 /// are not `matchable`.
287 pub range_type: &'static str,
288 /// Parse a bound/version string with this ecosystem's version adapter (incl. its `"0"`
289 /// convention). Used at load to pre-parse range bounds and the enumerated `versions`.
290 pub parse_version: fn(&str) -> Option<V>,
291 /// Normalize a package name into the index key (identity, lowercase, PEP 503, URL, ...).
292 pub normalize_name: fn(&str) -> String,
293 /// Whether to collect the enumerated `versions` list (false for npm, which is range-only).
294 pub use_versions: bool,
295 /// Derive `(severity, cvss_score)` from a record. [`default_severity`] suits every feeder
296 /// whose records carry the GHSA band / CVSS_V3 vector; npm passes its own.
297 pub severity: fn(&OsvRecord) -> (Severity, Option<f32>),
298}
299
300/// Why an OSV mirror file could not be read or parsed — shared by every feeder's error type.
301/// A present-but-broken input fails **closed** (an honest gap, never a false-clean).
302#[derive(Debug, Error)]
303#[non_exhaustive]
304pub enum DbError {
305 /// The file could not be read.
306 #[error("read failed: {0}")]
307 Read(#[from] io::Error),
308 /// An OSV record was not valid JSON.
309 #[error("invalid JSON: {0}")]
310 Parse(#[from] serde_json::Error),
311 /// The OSV mirror `.zip` could not be opened or decompressed.
312 #[error("invalid zip archive: {0}")]
313 Archive(String),
314}
315
316impl From<zip::result::ZipError> for DbError {
317 fn from(e: zip::result::ZipError) -> Self {
318 match e {
319 zip::result::ZipError::Io(io) => DbError::Read(io),
320 other => DbError::Archive(other.to_string()),
321 }
322 }
323}
324
325/// A load failure carrying the offending `path` and its cause, so a feeder can wrap it into
326/// its own `Db { path, source }` error verbatim.
327#[derive(Debug)]
328pub struct LoadError {
329 pub path: PathBuf,
330 pub source: DbError,
331}
332
333impl LoadError {
334 fn new(path: impl Into<PathBuf>, source: impl Into<DbError>) -> Self {
335 LoadError {
336 path: path.into(),
337 source: source.into(),
338 }
339 }
340}
341
342/// Load the OSV mirror at `root` — a directory of `*.json` records (what the osv.dev
343/// `<Ecosystem>/all.zip` unzips to) or the `.zip` itself — into an indexed [`OsvDb`],
344/// parsing every range bound and enumerated version once.
345///
346/// # Errors
347///
348/// Returns [`LoadError`] if `root` cannot be read, the archive cannot be decompressed, or a
349/// record is not valid JSON — failing closed.
350pub fn load<V>(root: &Path, spec: &Spec<V>) -> Result<OsvDb<V>, LoadError>
351where
352 V: Ord + Clone + Send,
353{
354 if root.is_dir() {
355 load_dir(root, spec)
356 } else {
357 load_zip(root, spec)
358 }
359}
360
361fn load_dir<V>(root: &Path, spec: &Spec<V>) -> Result<OsvDb<V>, LoadError>
362where
363 V: Ord + Clone + Send,
364{
365 let mut paths: Vec<PathBuf> = std::fs::read_dir(root)
366 .map_err(|e| LoadError::new(root, e))?
367 .map(|entry| entry.map(|e| e.path()).map_err(|e| LoadError::new(root, e)))
368 .collect::<Result<_, _>>()?;
369 paths.retain(|p| p.extension().and_then(|e| e.to_str()) == Some("json"));
370 paths.sort();
371
372 let per_file: Vec<Vec<(String, Advisory<V>)>> = paths
373 .par_iter()
374 .map(|path| {
375 let body = std::fs::read_to_string(path).map_err(|e| LoadError::new(path, e))?;
376 let osv: OsvRecord =
377 serde_json::from_str(&body).map_err(|e| LoadError::new(path, e))?;
378 Ok(advisories_from(osv, spec))
379 })
380 .collect::<Result<_, LoadError>>()?;
381
382 Ok(OsvDb {
383 by_package: merge(per_file),
384 })
385}
386
387fn load_zip<V>(path: &Path, spec: &Spec<V>) -> Result<OsvDb<V>, LoadError>
388where
389 V: Ord + Clone + Send,
390{
391 let bytes: Arc<[u8]> = std::fs::read(path)
392 .map_err(|e| LoadError::new(path, e))?
393 .into();
394 let archive = zip::ZipArchive::new(Cursor::new(bytes)).map_err(|e| LoadError::new(path, e))?;
395
396 let per_entry: Vec<Vec<(String, Advisory<V>)>> = (0..archive.len())
397 .into_par_iter()
398 .map_init(
399 || archive.clone(),
400 |archive, i| {
401 let mut entry = archive.by_index(i).map_err(|e| LoadError::new(path, e))?;
402 if !entry.name().ends_with(".json") {
403 return Ok(Vec::new());
404 }
405 let mut body = String::new();
406 entry
407 .read_to_string(&mut body)
408 .map_err(|e| LoadError::new(path, e))?;
409 let osv: OsvRecord =
410 serde_json::from_str(&body).map_err(|e| LoadError::new(path, e))?;
411 Ok(advisories_from(osv, spec))
412 },
413 )
414 .collect::<Result<_, LoadError>>()?;
415
416 Ok(OsvDb {
417 by_package: merge(per_entry),
418 })
419}
420
421/// Fold per-record advisory lists (in their original `affected[]` order) into the by-package
422/// index.
423fn merge<V>(per_file: Vec<Vec<(String, Advisory<V>)>>) -> BTreeMap<String, Vec<Advisory<V>>> {
424 let mut by_package: BTreeMap<String, Vec<Advisory<V>>> = BTreeMap::new();
425 for record in per_file {
426 for (name, advisory) in record {
427 by_package.entry(name).or_default().push(advisory);
428 }
429 }
430 by_package
431}
432
433/// The `(normalized name, advisory)` pairs one OSV record contributes: one per `affected[]`
434/// entry whose `package.ecosystem` matches [`Spec::ecosystem`]. Bounds and versions are
435/// parsed once here. Exposed for feeder unit tests.
436pub fn advisories_from<V>(osv: OsvRecord, spec: &Spec<V>) -> Vec<(String, Advisory<V>)>
437where
438 V: Ord + Clone,
439{
440 let (severity, cvss_score) = (spec.severity)(&osv);
441 osv.affected
442 .iter()
443 .filter(|a| a.package.ecosystem.as_deref() == Some(spec.ecosystem))
444 .filter_map(|affected| {
445 let name = (spec.normalize_name)(affected.package.name.as_deref()?);
446 let ranges = affected
447 .ranges
448 .iter()
449 .map(|r| {
450 let range = Range {
451 matchable: r.kind.as_deref() == Some(spec.range_type),
452 events: r
453 .events
454 .iter()
455 .map(|e| Event {
456 introduced: e.introduced.clone(),
457 fixed: e.fixed.clone(),
458 last_affected: e.last_affected.clone(),
459 })
460 .collect(),
461 };
462 parse_range(&range, spec.parse_version)
463 })
464 .collect();
465 let mut versions: Vec<V> = if spec.use_versions {
466 affected
467 .versions
468 .iter()
469 .flatten()
470 .filter_map(|v| (spec.parse_version)(v))
471 .collect()
472 } else {
473 Vec::new()
474 };
475 versions.sort();
476 versions.dedup();
477 Some((
478 name,
479 Advisory {
480 id: osv.id.clone(),
481 aliases: osv.aliases.clone().unwrap_or_default(),
482 summary: osv.summary.clone(),
483 severity,
484 cvss_score,
485 ranges,
486 versions,
487 },
488 ))
489 })
490 .collect()
491}
492
493/// The default `(severity, cvss_score)` derivation: prefer the curated GHSA band
494/// (`database_specific.severity`), and take the best CVSS_V3 base score across the record's
495/// `severity[]` vectors (deriving the band from it when no GHSA band is present).
496pub fn default_severity(osv: &OsvRecord) -> (Severity, Option<f32>) {
497 let band = osv
498 .database_specific
499 .as_ref()
500 .and_then(|d| d.severity.as_deref())
501 .map(band_from_label)
502 .unwrap_or(Severity::Unknown);
503
504 let scored: Option<(Severity, f32)> = osv
505 .severity
506 .iter()
507 .filter(|s| s.kind.as_deref() == Some("CVSS_V3"))
508 .filter_map(|s| cvss::v3::Base::from_str(s.score.as_deref()?).ok())
509 .map(|base| {
510 let score = base.score();
511 (band_from_cvss(score.severity()), score.value() as f32)
512 })
513 .max_by(|a, b| a.1.total_cmp(&b.1));
514
515 let cvss_score = scored.map(|(_, v)| v);
516 let severity = if band != Severity::Unknown {
517 band
518 } else {
519 scored.map(|(b, _)| b).unwrap_or(Severity::Unknown)
520 };
521 (severity, cvss_score)
522}
523
524/// Map a GHSA/OSV severity band string onto [`Severity`].
525pub fn band_from_label(label: &str) -> Severity {
526 match label.to_ascii_uppercase().as_str() {
527 "LOW" => Severity::Low,
528 "MODERATE" | "MEDIUM" => Severity::Medium,
529 "HIGH" => Severity::High,
530 "CRITICAL" => Severity::Critical,
531 _ => Severity::Unknown,
532 }
533}
534
535/// Map a parsed CVSS v3 severity band onto [`Severity`].
536fn band_from_cvss(sev: cvss::Severity) -> Severity {
537 match sev {
538 cvss::Severity::None => Severity::Unknown,
539 cvss::Severity::Low => Severity::Low,
540 cvss::Severity::Medium => Severity::Medium,
541 cvss::Severity::High => Severity::High,
542 cvss::Severity::Critical => Severity::Critical,
543 }
544}
545
546// --- Shared Tier-C finding construction ---
547//
548// Every toolchain-free feeder turns an "this advisory affects this installed package at this
549// version" decision into the same `VulnFinding` shape, then sorts and dedups the same way.
550// Only the *decision* (range vs versions-list match, the per-ecosystem version type and its
551// coercion to the stored SemVer form) differs per feeder; the construction does not. Owning it
552// here keeps the finding contract — the `osv.dev` URL, the per-occurrence skeleton, the
553// `(advisory, package)` sort/dedup — in one place instead of copy-pasted ten times, where a
554// field added in one feeder and forgotten in another would be a silent inconsistency.
555
556/// The outcome of a toolchain-free `scan_offline`: the findings, plus how many installed
557/// packages were skipped because their version string did not parse.
558///
559/// A skip is benign for a non-registry pin (a VCS/URL/path-pinned dependency has no registry
560/// release, so no registry advisory can apply). But surfacing the *count* keeps the skip
561/// visible rather than silent — so a malformed-but-real version the parser wrongly rejects is
562/// not indistinguishable from a clean result. The orchestrator sums these and reports a nonzero
563/// total; it is never an error (the scan is still as complete as the lockfile allows).
564#[derive(Debug, Default, Clone)]
565pub struct TierCScan {
566 /// The deduplicated, sorted findings.
567 pub findings: Vec<VulnFinding>,
568 /// Count of installed packages skipped because their version did not parse.
569 pub skipped_unparseable: u32,
570}
571
572/// The canonical advisory URL on `osv.dev` — every id (GHSA, CVE, PYSEC, MAL, …) resolves there.
573#[must_use]
574pub fn advisory_url(id: &str) -> String {
575 format!("https://osv.dev/vulnerability/{id}")
576}
577
578/// The package name of a finding's first occurrence, the sort/dedup key alongside the advisory
579/// id. Tier-C findings always carry exactly one `InRepo` occurrence.
580#[must_use]
581pub fn occ_package(v: &VulnFinding) -> &str {
582 match v.occurrences.first() {
583 Some(Occurrence::InRepo { package, .. }) => package,
584 _ => "",
585 }
586}
587
588/// Sort by `(advisory id, package)` and dedup on that pair — the deterministic ordering every
589/// feeder emits. The same package+version can resolve many times in a lockfile, but a
590/// multi-package advisory legitimately yields one finding per *distinct* package, so the dedup
591/// key is the pair, not the advisory alone.
592pub fn sort_dedup_findings(out: &mut Vec<VulnFinding>) {
593 out.sort_by(|a, b| {
594 a.advisory_id
595 .cmp(&b.advisory_id)
596 .then_with(|| occ_package(a).cmp(occ_package(b)))
597 });
598 out.dedup_by(|a, b| a.advisory_id == b.advisory_id && occ_package(a) == occ_package(b));
599}
600
601/// The inputs a feeder supplies to build one Tier-C [`VulnFinding`]. The version-matching
602/// decision (and any `to_semver` coercion of the installed/patched versions) happens in the
603/// feeder; everything here is already in the shared model's types.
604pub struct TierCFinding<'a> {
605 /// The ecosystem this finding came from.
606 pub ecosystem: Ecosystem,
607 /// The advisory id (the `osv.dev` URL is derived from it).
608 pub advisory_id: String,
609 /// CVE/GHSA/… cross-reference aliases.
610 pub aliases: Vec<String>,
611 /// Display title — typically the advisory summary, falling back to the id.
612 pub title: String,
613 /// Severity band.
614 pub severity: Severity,
615 /// CVSS base score when the advisory carries one (`None` where the feeder does not
616 /// extract it).
617 pub cvss_score: Option<f32>,
618 /// The affected package name.
619 pub package: String,
620 /// The installed version, in the shared SemVer model (already coerced by the feeder).
621 pub installed: Version,
622 /// Versions that fix the advisory; empty means "no fix available".
623 pub patched: Vec<VersionReq>,
624 /// Whether the package is a direct dependency.
625 pub direct: bool,
626 /// A representative introducer chain `[root, …, package]`; empty when the feeder cannot
627 /// compute a dependency graph.
628 pub dependency_path: Vec<String>,
629 /// The repo the package was found in.
630 pub repo: &'a RepoId,
631 /// The reason string for the Tier-C `Unknown` reachability verdict (the feeder names the
632 /// fidelity, e.g. "package-level scan (no toolchain): version match only").
633 pub reach_reason: &'static str,
634}
635
636impl TierCFinding<'_> {
637 /// Assemble the [`VulnFinding`]. Package-level only: `affected_functions` is empty,
638 /// `reachable` is `None`, and reachability is the Tier-C `Unknown` contract (never
639 /// `NotReachable` — see [`Reachability::tier_c_unknown`]).
640 #[must_use]
641 pub fn build(self) -> VulnFinding {
642 VulnFinding {
643 advisory_id: self.advisory_id.clone(),
644 aliases: self.aliases,
645 ecosystem: self.ecosystem,
646 title: self.title,
647 severity: self.severity,
648 cvss_score: self.cvss_score,
649 url: Some(advisory_url(&self.advisory_id)),
650 occurrences: vec![Occurrence::InRepo {
651 repo: self.repo.clone(),
652 package: self.package,
653 installed: self.installed,
654 patched: self.patched,
655 dependency_kind: if self.direct {
656 DependencyKind::Direct
657 } else {
658 DependencyKind::Transitive
659 },
660 dependency_path: self.dependency_path,
661 active: None,
662 source: Default::default(),
663 }],
664 affected_functions: Vec::new(),
665 reachable: None,
666 reachability: Some(Reachability::tier_c_unknown(self.reach_reason)),
667 exploit: Default::default(),
668 }
669 }
670}
671
672// --- OSV wire schema (the subset every feeder reads). ---
673
674/// One OSV advisory record, deserialized from a single `*.json` export file.
675#[derive(Debug, Deserialize)]
676pub struct OsvRecord {
677 pub id: String,
678 pub aliases: Option<Vec<String>>,
679 pub summary: Option<String>,
680 #[serde(default)]
681 pub affected: Vec<Affected>,
682 #[serde(default)]
683 pub severity: Vec<SeverityEntry>,
684 pub database_specific: Option<DatabaseSpecific>,
685}
686
687/// One `severity[]` vector (e.g. a `CVSS_V3` base-score string).
688#[derive(Debug, Deserialize)]
689pub struct SeverityEntry {
690 #[serde(rename = "type")]
691 pub kind: Option<String>,
692 pub score: Option<String>,
693}
694
695/// The `database_specific` block, read for its curated GHSA severity band.
696#[derive(Debug, Deserialize)]
697pub struct DatabaseSpecific {
698 pub severity: Option<String>,
699}
700
701/// One `affected[]` entry: a package, its ranges, and any enumerated versions.
702#[derive(Debug, Deserialize)]
703pub struct Affected {
704 #[serde(default)]
705 pub package: Package,
706 #[serde(default)]
707 pub ranges: Vec<RawRange>,
708 pub versions: Option<Vec<String>>,
709}
710
711/// The `affected[].package` identity.
712#[derive(Debug, Deserialize, Default)]
713pub struct Package {
714 pub name: Option<String>,
715 pub ecosystem: Option<String>,
716}
717
718/// One `affected[].ranges[]` entry (raw string bounds; parsed once by [`advisories_from`]).
719#[derive(Debug, Deserialize)]
720pub struct RawRange {
721 #[serde(rename = "type")]
722 pub kind: Option<String>,
723 #[serde(default)]
724 pub events: Vec<RawEvent>,
725}
726
727/// One `ranges[].events[]` entry: at most one bound set.
728#[derive(Debug, Deserialize)]
729pub struct RawEvent {
730 pub introduced: Option<String>,
731 pub fixed: Option<String>,
732 pub last_affected: Option<String>,
733}
734
735#[cfg(test)]
736mod tests {
737 #![allow(clippy::unwrap_used)]
738 use super::*;
739 use semver::Version;
740
741 fn v(s: &str) -> Version {
742 Version::parse(s).unwrap()
743 }
744
745 // Plain-SemVer bounds, the simplest instantiation (the npm shape).
746 fn plain(version: &Version, ranges: &[Range]) -> Match<Version> {
747 affected_fixed(
748 version,
749 ranges,
750 |raw| {
751 if raw == "0" {
752 Some(Version::new(0, 0, 0))
753 } else {
754 Version::parse(raw).ok()
755 }
756 },
757 |ver, bound| ver >= bound,
758 )
759 }
760
761 fn range(events: &[(&str, &str)]) -> Range {
762 Range {
763 matchable: true,
764 events: events
765 .iter()
766 .map(|(k, val)| Event {
767 introduced: (*k == "introduced").then(|| val.to_string()),
768 fixed: (*k == "fixed").then(|| val.to_string()),
769 last_affected: (*k == "last_affected").then(|| val.to_string()),
770 })
771 .collect(),
772 }
773 }
774
775 #[test]
776 fn affected_below_fix_reports_smallest_patch() {
777 let r = [
778 range(&[("introduced", "0"), ("fixed", "1.0.1")]),
779 range(&[("introduced", "2.0.0"), ("fixed", "2.0.3")]),
780 ];
781 // 0.9.0 is in [0,1.0.1): fix is 1.0.1, not 2.0.3.
782 assert_eq!(
783 plain(&v("0.9.0"), &r),
784 Match::Affected {
785 fixed: Some(v("1.0.1"))
786 }
787 );
788 assert_eq!(
789 plain(&v("1.5.0"), &r),
790 Match::NotAffected,
791 "between windows"
792 );
793 }
794
795 #[test]
796 fn no_introduced_event_is_affected_from_zero() {
797 let r = [Range {
798 matchable: true,
799 events: vec![Event {
800 introduced: None,
801 fixed: Some("2.0.0".into()),
802 last_affected: None,
803 }],
804 }];
805 assert_eq!(
806 plain(&v("1.5.0"), &r),
807 Match::Affected {
808 fixed: Some(v("2.0.0"))
809 }
810 );
811 assert_eq!(plain(&v("2.0.0"), &r), Match::NotAffected);
812 }
813
814 #[test]
815 fn unparseable_introduced_fails_loud() {
816 let r = [range(&[("introduced", "garbage"), ("fixed", "99.0.0")])];
817 assert_eq!(
818 plain(&v("1.0.0"), &r),
819 Match::Affected {
820 fixed: Some(v("99.0.0"))
821 },
822 "a malformed lower bound must read affected, never clean"
823 );
824 }
825
826 #[test]
827 fn non_semver_ranges_are_skipped() {
828 let r = [Range {
829 matchable: false,
830 events: vec![Event {
831 introduced: Some("0".into()),
832 fixed: None,
833 last_affected: None,
834 }],
835 }];
836 assert_eq!(plain(&v("1.0.0"), &r), Match::NotAffected);
837 }
838
839 #[test]
840 fn last_affected_closes_an_open_interval() {
841 let r = [range(&[
842 ("introduced", "1.0.0"),
843 ("last_affected", "1.4.0"),
844 ])];
845 assert_eq!(plain(&v("1.3.0"), &r), Match::Affected { fixed: None });
846 assert_eq!(plain(&v("1.5.0"), &r), Match::NotAffected);
847 }
848
849 // The pre-parsed matcher must agree with the string matcher on every shape: that is the
850 // whole safety contract of the parse-once optimization (a divergence = a false-clean).
851 #[test]
852 fn parsed_matcher_agrees_with_string_matcher() {
853 let parse = |raw: &str| {
854 if raw == "0" {
855 Some(Version::new(0, 0, 0))
856 } else {
857 Version::parse(raw).ok()
858 }
859 };
860 let ranges = [
861 range(&[("introduced", "0"), ("fixed", "1.0.1")]),
862 range(&[("introduced", "2.0.0"), ("fixed", "2.0.3")]),
863 range(&[("introduced", "1.0.0"), ("last_affected", "1.4.0")]),
864 range(&[("introduced", "garbage"), ("fixed", "99.0.0")]),
865 Range {
866 matchable: false,
867 events: vec![Event {
868 introduced: Some("0".into()),
869 fixed: None,
870 last_affected: None,
871 }],
872 },
873 ];
874 let parsed: Vec<ParsedRange<Version>> =
875 ranges.iter().map(|r| parse_range(r, parse)).collect();
876 for s in [
877 "0.0.1", "0.9.0", "1.0.0", "1.0.1", "1.3.0", "1.5.0", "2.0.0", "2.0.3", "5.0.0",
878 ] {
879 let ver = v(s);
880 let want = plain(&ver, &ranges);
881 let got = affected_fixed_parsed(&ver, &parsed, |a, b| a >= b);
882 assert_eq!(want, got, "disagreement at {s}");
883 }
884 }
885
886 #[test]
887 fn custom_introduced_comparator_is_honored() {
888 // A comparator that treats nothing as at/after `introduced` ⇒ never affected
889 // via an introduced bound (proves the closure actually drives the decision).
890 let r = [range(&[("introduced", "1.0.0"), ("fixed", "2.0.0")])];
891 let never = affected_fixed(
892 &v("1.5.0"),
893 &r,
894 |raw| Version::parse(raw).ok(),
895 |_, _| false,
896 );
897 assert_eq!(never, Match::NotAffected);
898 }
899}