pub mod cyclonedx;
pub mod spdx;
pub mod syft;
use serde_json::Value;
use thiserror::Error;
use crate::model::{Ecosystem, HashAlg, Sbom, SbomFormat};
#[derive(Error, Debug)]
pub enum ParseError {
#[error("invalid JSON: {0}")]
Json(#[from] serde_json::Error),
#[error("unknown SBOM format — no `bomFormat`, `spdxVersion`, or Syft `schema` marker found")]
UnknownFormat,
#[error("{format} parsing not implemented yet (tracking issue: v0.1.0)")]
NotImplemented { format: SbomFormat },
}
pub trait SbomParser {
fn parse(value: Value) -> Result<Sbom, ParseError>;
}
pub fn detect_format(value: &Value) -> Result<SbomFormat, ParseError> {
if let Some(s) = value.get("bomFormat").and_then(Value::as_str)
&& s.eq_ignore_ascii_case("cyclonedx")
{
return Ok(SbomFormat::CycloneDx);
}
if value.get("spdxVersion").is_some() {
return Ok(SbomFormat::Spdx);
}
if let Some(url) = value
.get("schema")
.and_then(|s| s.get("url"))
.and_then(Value::as_str)
&& url.contains("anchore.io/schema/syft")
{
return Ok(SbomFormat::Syft);
}
Err(ParseError::UnknownFormat)
}
pub fn parse(value: Value) -> Result<Sbom, ParseError> {
parse_with_format(value, None)
}
pub fn parse_with_format(value: Value, hint: Option<SbomFormat>) -> Result<Sbom, ParseError> {
let format = match hint {
Some(f) => f,
None => detect_format(&value)?,
};
match format {
SbomFormat::CycloneDx => cyclonedx::CycloneDxParser::parse(value),
SbomFormat::Spdx => spdx::SpdxParser::parse(value),
SbomFormat::Syft => syft::SyftParser::parse(value),
}
}
pub(crate) fn ecosystem_from_purl(purl: &str) -> Option<Ecosystem> {
let after = purl.strip_prefix("pkg:")?;
let ty = after.split(['/', '@']).next()?;
Some(match ty {
"npm" => Ecosystem::Npm,
"pypi" => Ecosystem::PyPI,
"cargo" => Ecosystem::Cargo,
"maven" => Ecosystem::Maven,
"golang" => Ecosystem::Go,
"gem" => Ecosystem::Gem,
"nuget" => Ecosystem::NuGet,
"composer" => Ecosystem::Composer,
other if !other.is_empty() => Ecosystem::Other(other.to_string()),
_ => return None,
})
}
pub fn filter_file_components(sbom: &mut Sbom) {
sbom.components
.retain(|c| !matches!(&c.ecosystem, Ecosystem::Other(s) if s == "file"));
}
pub(crate) fn hash_alg(s: &str) -> HashAlg {
match s.to_ascii_uppercase().as_str() {
"SHA-1" | "SHA1" => HashAlg::Sha1,
"SHA-256" | "SHA256" => HashAlg::Sha256,
"SHA-512" | "SHA512" => HashAlg::Sha512,
"MD5" => HashAlg::Md5,
_ => HashAlg::Other(s.to_string()),
}
}
#[cfg(test)]
mod tests {
#![allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::todo,
clippy::unimplemented
)]
use super::*;
use serde_json::json;
#[test]
fn detects_cyclonedx() {
let v = json!({"bomFormat": "CycloneDX", "specVersion": "1.5"});
assert_eq!(detect_format(&v).unwrap(), SbomFormat::CycloneDx);
}
#[test]
fn detects_cyclonedx_case_insensitive() {
let v = json!({"bomFormat": "cyclonedx"});
assert_eq!(detect_format(&v).unwrap(), SbomFormat::CycloneDx);
}
#[test]
fn detects_spdx() {
let v = json!({"spdxVersion": "SPDX-2.3", "SPDXID": "SPDXRef-DOCUMENT"});
assert_eq!(detect_format(&v).unwrap(), SbomFormat::Spdx);
}
#[test]
fn detects_syft() {
let v = json!({
"schema": {"version": "16.0.0", "url": "https://raw.githubusercontent.com/anchore/syft/main/internal/jsonschema/anchore.io/schema/syft/json/16.0.0/document.json"},
"artifacts": []
});
assert_eq!(detect_format(&v).unwrap(), SbomFormat::Syft);
}
#[test]
fn rejects_unknown() {
let v = json!({"foo": "bar"});
assert!(matches!(detect_format(&v), Err(ParseError::UnknownFormat)));
}
#[test]
fn parse_with_format_none_falls_back_to_detection() {
let v = json!({
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"components": []
});
let sbom = parse_with_format(v, None).expect("auto-detect succeeds");
assert_eq!(sbom.format, SbomFormat::CycloneDx);
}
#[test]
fn parse_with_format_hint_bypasses_detection() {
let v = json!({"foo": "bar"});
let auto = parse_with_format(v.clone(), None);
assert!(matches!(auto, Err(ParseError::UnknownFormat)));
let hinted = parse_with_format(v, Some(SbomFormat::Spdx))
.expect("SPDX parser tolerates an empty document");
assert_eq!(
hinted.format,
SbomFormat::Spdx,
"hint must steer dispatch into the SPDX parser regardless of the body"
);
}
#[test]
fn parse_with_format_steers_to_chosen_parser_even_when_body_matches_a_different_format() {
let v = json!({
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"components": [],
"schema": {"version": "16.0.0", "url": "https://example.invalid/"}
});
let hinted = parse_with_format(v, Some(SbomFormat::Syft))
.expect("Syft parser accepts an artifacts-less document");
assert_eq!(hinted.format, SbomFormat::Syft);
}
#[test]
fn purl_ecosystem_inference() {
assert_eq!(
ecosystem_from_purl("pkg:npm/axios@1.14.0"),
Some(Ecosystem::Npm)
);
assert_eq!(
ecosystem_from_purl("pkg:pypi/requests@2.31.0"),
Some(Ecosystem::PyPI)
);
assert_eq!(
ecosystem_from_purl("pkg:cargo/serde@1.0.0"),
Some(Ecosystem::Cargo)
);
assert_eq!(
ecosystem_from_purl("pkg:maven/org.apache.commons/commons-lang3@3.12.0"),
Some(Ecosystem::Maven)
);
assert_eq!(
ecosystem_from_purl("pkg:golang/github.com/spf13/cobra@v1.8.0"),
Some(Ecosystem::Go)
);
assert_eq!(
ecosystem_from_purl("pkg:gem/rails@7.1.0"),
Some(Ecosystem::Gem)
);
assert_eq!(
ecosystem_from_purl("pkg:nuget/Newtonsoft.Json@13.0.3"),
Some(Ecosystem::NuGet)
);
assert_eq!(
ecosystem_from_purl("pkg:composer/symfony/console@v6.4.0"),
Some(Ecosystem::Composer)
);
assert_eq!(
ecosystem_from_purl("pkg:hex/phoenix@1.7.0"),
Some(Ecosystem::Other("hex".to_string()))
);
assert_eq!(ecosystem_from_purl("not-a-purl"), None);
}
#[test]
fn filter_file_components_drops_only_file_pseudo_components() {
use crate::model::{Component, Relationship};
fn comp(name: &str, eco: Ecosystem) -> Component {
Component {
name: name.to_string(),
version: "1.0.0".to_string(),
ecosystem: eco,
purl: None,
licenses: Vec::new(),
supplier: None,
hashes: Vec::new(),
relationship: Relationship::Unknown,
source_url: None,
bom_ref: None,
}
}
let mut sbom = Sbom {
format: SbomFormat::Syft,
serial: None,
components: vec![
comp("axios", Ecosystem::Npm),
comp(".github/workflows/ci.yml", Ecosystem::Other("file".into())),
comp("requests", Ecosystem::PyPI),
comp("phoenix", Ecosystem::Other("hex".into())),
comp("Cargo.lock", Ecosystem::Other("file".into())),
],
};
filter_file_components(&mut sbom);
assert_eq!(
sbom.components.len(),
3,
"only the two file: components should be dropped"
);
let names: Vec<&str> = sbom.components.iter().map(|c| c.name.as_str()).collect();
assert_eq!(names, vec!["axios", "requests", "phoenix"]);
}
#[test]
fn filter_file_components_is_a_noop_when_none_present() {
use crate::model::{Component, Relationship};
let mut sbom = Sbom {
format: SbomFormat::CycloneDx,
serial: None,
components: vec![Component {
name: "axios".into(),
version: "1.14.0".into(),
ecosystem: Ecosystem::Npm,
purl: Some("pkg:npm/axios@1.14.0".into()),
licenses: Vec::new(),
supplier: None,
hashes: Vec::new(),
relationship: Relationship::Unknown,
source_url: None,
bom_ref: None,
}],
};
let snapshot = sbom.clone();
filter_file_components(&mut sbom);
assert_eq!(sbom, snapshot);
}
#[test]
fn hash_alg_normalization() {
assert_eq!(hash_alg("SHA-256"), HashAlg::Sha256);
assert_eq!(hash_alg("sha256"), HashAlg::Sha256);
assert_eq!(hash_alg("MD5"), HashAlg::Md5);
assert_eq!(hash_alg("BLAKE3"), HashAlg::Other("BLAKE3".to_string()));
}
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(1024))]
#[test]
fn parse_pipeline_does_not_panic_on_arbitrary_bytes(bytes in proptest::collection::vec(any::<u8>(), 0..2048)) {
let _ = serde_json::from_slice::<serde_json::Value>(&bytes)
.ok()
.and_then(|v| parse_with_format(v, None).ok());
}
#[test]
fn parse_pipeline_does_not_panic_on_arbitrary_json(v in arb_json()) {
let _ = parse_with_format(v, None);
}
#[test]
fn parse_pipeline_does_not_panic_with_format_hint(v in arb_json(), hint_idx in 0u8..3) {
let hint = match hint_idx {
0 => Some(SbomFormat::CycloneDx),
1 => Some(SbomFormat::Spdx),
_ => Some(SbomFormat::Syft),
};
let _ = parse_with_format(v, hint);
}
#[test]
fn ecosystem_from_purl_does_not_panic(s in any::<String>()) {
let _ = ecosystem_from_purl(&s);
}
#[test]
fn hash_alg_does_not_panic(s in any::<String>()) {
let _ = hash_alg(&s);
}
}
fn arb_json() -> impl Strategy<Value = serde_json::Value> {
let leaf = prop_oneof![
Just(serde_json::Value::Null),
any::<bool>().prop_map(serde_json::Value::Bool),
any::<i64>().prop_map(|n| serde_json::Value::Number(n.into())),
".*".prop_map(serde_json::Value::String),
];
leaf.prop_recursive(3, 32, 8, |inner| {
prop_oneof![
proptest::collection::vec(inner.clone(), 0..6).prop_map(serde_json::Value::Array),
proptest::collection::hash_map(".*", inner, 0..6)
.prop_map(|m| serde_json::Value::Object(m.into_iter().collect())),
]
})
}
}