use super::traits::{FormatConfidence, FormatDetection, ParseError, SbomParser};
use super::{CycloneDxParser, Spdx3Parser, SpdxParser};
use crate::model::NormalizedSbom;
use std::io::BufRead;
pub const MIN_CONFIDENCE_THRESHOLD: f32 = 0.25;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ParserKind {
CycloneDx,
Spdx,
Spdx3,
}
impl ParserKind {
#[must_use]
pub const fn name(&self) -> &'static str {
match self {
Self::CycloneDx => "CycloneDX",
Self::Spdx | Self::Spdx3 => "SPDX",
}
}
}
#[derive(Debug, Clone)]
pub struct DetectionResult {
pub parser: Option<ParserKind>,
pub confidence: FormatConfidence,
pub variant: Option<String>,
pub version: Option<String>,
pub warnings: Vec<String>,
}
impl DetectionResult {
#[must_use]
pub fn unknown(reason: &str) -> Self {
Self {
parser: None,
confidence: FormatConfidence::NONE,
variant: None,
version: None,
warnings: vec![reason.to_string()],
}
}
#[must_use]
pub fn cyclonedx(detection: FormatDetection) -> Self {
Self {
parser: Some(ParserKind::CycloneDx),
confidence: detection.confidence,
variant: detection.variant,
version: detection.version,
warnings: detection.warnings,
}
}
#[must_use]
pub fn spdx(detection: FormatDetection) -> Self {
Self {
parser: Some(ParserKind::Spdx),
confidence: detection.confidence,
variant: detection.variant,
version: detection.version,
warnings: detection.warnings,
}
}
#[must_use]
pub fn spdx3(detection: FormatDetection) -> Self {
Self {
parser: Some(ParserKind::Spdx3),
confidence: detection.confidence,
variant: detection.variant,
version: detection.version,
warnings: detection.warnings,
}
}
#[must_use]
pub fn can_parse(&self) -> bool {
self.parser.is_some() && self.confidence.value() >= MIN_CONFIDENCE_THRESHOLD
}
}
pub struct FormatDetector {
cyclonedx: CycloneDxParser,
spdx: SpdxParser,
spdx3: Spdx3Parser,
min_confidence: f32,
}
impl Default for FormatDetector {
fn default() -> Self {
Self::new()
}
}
impl FormatDetector {
#[must_use]
pub const fn new() -> Self {
Self {
cyclonedx: CycloneDxParser::new(),
spdx: SpdxParser::new(),
spdx3: Spdx3Parser::new(),
min_confidence: MIN_CONFIDENCE_THRESHOLD,
}
}
#[must_use]
pub const fn with_threshold(min_confidence: f32) -> Self {
Self {
cyclonedx: CycloneDxParser::new(),
spdx: SpdxParser::new(),
spdx3: Spdx3Parser::new(),
min_confidence: min_confidence.clamp(0.0, 1.0),
}
}
#[must_use]
pub fn detect_from_content(&self, content: &str) -> DetectionResult {
let spdx3_detection = self.spdx3.detect(content);
if spdx3_detection.confidence.value() >= FormatConfidence::HIGH.value() {
return DetectionResult::spdx3(spdx3_detection);
}
let cdx_detection = self.cyclonedx.detect(content);
let spdx_detection = self.spdx.detect(content);
if spdx3_detection.confidence.value() >= self.min_confidence
&& spdx3_detection.confidence.value() > cdx_detection.confidence.value()
&& spdx3_detection.confidence.value() > spdx_detection.confidence.value()
{
return DetectionResult::spdx3(spdx3_detection);
}
self.select_best_parser(cdx_detection, spdx_detection)
}
#[must_use]
pub fn detect_from_peek(&self, peek: &[u8]) -> DetectionResult {
let first_char = peek.iter().find(|&&b| !b.is_ascii_whitespace());
match first_char {
Some(b'{' | b'<') => {
let preview = String::from_utf8_lossy(peek);
let spdx3_detection = self.spdx3.detect(&preview);
if spdx3_detection.confidence.value() >= FormatConfidence::HIGH.value() {
return DetectionResult::spdx3(spdx3_detection);
}
let cdx_detection = self.cyclonedx.detect(&preview);
let spdx_detection = self.spdx.detect(&preview);
if spdx3_detection.confidence.value() >= self.min_confidence
&& spdx3_detection.confidence.value() > cdx_detection.confidence.value()
&& spdx3_detection.confidence.value() > spdx_detection.confidence.value()
{
return DetectionResult::spdx3(spdx3_detection);
}
self.select_best_parser(cdx_detection, spdx_detection)
}
Some(c) if c.is_ascii_alphabetic() => {
let preview = String::from_utf8_lossy(peek);
let cdx_detection = self.cyclonedx.detect(&preview);
let spdx_detection = self.spdx.detect(&preview);
self.select_best_parser(cdx_detection, spdx_detection)
}
Some(_) => DetectionResult::unknown("Unrecognized content format"),
None => DetectionResult::unknown("Empty content"),
}
}
fn select_best_parser(
&self,
cdx_detection: FormatDetection,
spdx_detection: FormatDetection,
) -> DetectionResult {
let cdx_conf = cdx_detection.confidence.value();
let spdx_conf = spdx_detection.confidence.value();
tracing::debug!(
"Format detection: CycloneDX={:.2}, SPDX={:.2}, threshold={:.2}",
cdx_conf,
spdx_conf,
self.min_confidence
);
if cdx_conf >= self.min_confidence && cdx_conf > spdx_conf {
DetectionResult::cyclonedx(cdx_detection)
} else if spdx_conf >= self.min_confidence {
DetectionResult::spdx(spdx_detection)
} else {
let mut result =
DetectionResult::unknown("Could not detect SBOM format with sufficient confidence");
if cdx_conf > 0.0 {
result.warnings.push(format!(
"CycloneDX detection: {:.0}% confidence (threshold: {:.0}%)",
cdx_conf * 100.0,
self.min_confidence * 100.0
));
}
if spdx_conf > 0.0 {
result.warnings.push(format!(
"SPDX detection: {:.0}% confidence (threshold: {:.0}%)",
spdx_conf * 100.0,
self.min_confidence * 100.0
));
}
result
}
}
pub fn parse_str(&self, content: &str) -> Result<NormalizedSbom, ParseError> {
let detection = self.detect_from_content(content);
for warning in &detection.warnings {
tracing::warn!("{}", warning);
}
match detection.parser {
Some(ParserKind::CycloneDx) if detection.can_parse() => {
self.cyclonedx.parse_str(content)
}
Some(ParserKind::Spdx) if detection.can_parse() => self.spdx.parse_str(content),
Some(ParserKind::Spdx3) if detection.can_parse() => self.spdx3.parse_str(content),
_ => Err(ParseError::UnknownFormat(
"Could not detect SBOM format. Expected CycloneDX or SPDX.".to_string(),
)),
}
}
pub fn parse_reader<R: BufRead>(&self, mut reader: R) -> Result<NormalizedSbom, ParseError> {
let peek = reader
.fill_buf()
.map_err(|e| ParseError::IoError(e.to_string()))?;
if peek.is_empty() {
return Err(ParseError::IoError("Empty content".to_string()));
}
let detection = self.detect_from_peek(peek);
for warning in &detection.warnings {
tracing::warn!("{}", warning);
}
match detection.parser {
Some(ParserKind::CycloneDx) if detection.can_parse() => {
let is_xml = detection.variant.as_deref() == Some("XML");
if is_xml {
let mut content = String::new();
reader
.read_to_string(&mut content)
.map_err(|e| ParseError::IoError(e.to_string()))?;
self.cyclonedx.parse_str(&content)
} else {
self.cyclonedx.parse_json_reader(reader)
}
}
Some(ParserKind::Spdx) if detection.can_parse() => {
let needs_string =
matches!(detection.variant.as_deref(), Some("tag-value" | "RDF"));
if needs_string {
let mut content = String::new();
reader
.read_to_string(&mut content)
.map_err(|e| ParseError::IoError(e.to_string()))?;
self.spdx.parse_str(&content)
} else {
self.spdx.parse_json_reader(reader)
}
}
Some(ParserKind::Spdx3) if detection.can_parse() => {
let mut content = String::new();
reader
.read_to_string(&mut content)
.map_err(|e| ParseError::IoError(e.to_string()))?;
self.spdx3.parse_str(&content)
}
_ => Err(ParseError::UnknownFormat(
"Could not detect SBOM format. Expected CycloneDX or SPDX.".to_string(),
)),
}
}
#[must_use]
pub const fn cyclonedx_parser(&self) -> &CycloneDxParser {
&self.cyclonedx
}
#[must_use]
pub const fn spdx_parser(&self) -> &SpdxParser {
&self.spdx
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_cyclonedx_json() {
let detector = FormatDetector::new();
let content = r#"{"bomFormat": "CycloneDX", "specVersion": "1.5"}"#;
let result = detector.detect_from_content(content);
assert_eq!(result.parser, Some(ParserKind::CycloneDx));
assert!(result.can_parse());
assert_eq!(result.variant, Some("JSON".to_string()));
}
#[test]
fn test_detect_spdx_json() {
let detector = FormatDetector::new();
let content = r#"{"spdxVersion": "SPDX-2.3", "SPDXID": "SPDXRef-DOCUMENT"}"#;
let result = detector.detect_from_content(content);
assert_eq!(result.parser, Some(ParserKind::Spdx));
assert!(result.can_parse());
assert_eq!(result.variant, Some("JSON".to_string()));
}
#[test]
fn test_detect_from_peek_cyclonedx() {
let detector = FormatDetector::new();
let peek = br#"{"bomFormat": "CycloneDX", "specVersion": "1.5", "components": []}"#;
let result = detector.detect_from_peek(peek);
assert_eq!(result.parser, Some(ParserKind::CycloneDx));
assert!(result.can_parse());
}
#[test]
fn test_detect_unknown_format() {
let detector = FormatDetector::new();
let content = r#"{"some": "random", "json": "content"}"#;
let result = detector.detect_from_content(content);
assert!(result.parser.is_none());
assert!(!result.can_parse());
}
#[test]
fn test_no_default_bias() {
let detector = FormatDetector::new();
let content = r#"{"data": "test"}"#;
let result = detector.detect_from_content(content);
assert!(result.parser.is_none());
assert!(!result.can_parse());
}
#[test]
fn test_threshold_enforcement() {
let detector = FormatDetector::with_threshold(0.5);
let content = r#"{"specVersion": "1.5", "components": []}"#;
let result = detector.detect_from_content(content);
if result.confidence.value() < 0.5 {
assert!(!result.can_parse());
}
}
}