Skip to main content

sbom_tools/parsers/
mod.rs

1//! SBOM format parsers.
2//!
3//! This module provides parsers for `CycloneDX` and SPDX SBOM formats,
4//! converting them to the normalized intermediate representation.
5//!
6//! ## Format Detection
7//!
8//! The module uses a confidence-based detection system to identify SBOM formats:
9//! - Each parser reports a confidence score (0.0-1.0) for handling content
10//! - The parser with the highest confidence is selected
11//! - Detection includes format variant (JSON, XML, tag-value) and version information
12//!
13//! ## Usage
14//!
15//! ```no_run
16//! use sbom_tools::parsers::{parse_sbom, detect_format};
17//! use std::path::Path;
18//!
19//! // Auto-detect and parse
20//! let sbom = parse_sbom(Path::new("sbom.json")).unwrap();
21//!
22//! // Check format before parsing
23//! let content = std::fs::read_to_string("sbom.json").unwrap();
24//! if let Some(detection) = detect_format(&content) {
25//!     println!("Detected: {} ({})", detection.format_name, detection.confidence);
26//! }
27//! ```
28
29mod cyclonedx;
30mod detection;
31mod spdx;
32pub mod streaming;
33mod traits;
34
35pub use cyclonedx::CycloneDxParser;
36pub use detection::{DetectionResult, FormatDetector, MIN_CONFIDENCE_THRESHOLD, ParserKind};
37pub use spdx::SpdxParser;
38pub use streaming::{ParseEvent, ParseProgress, StreamingConfig, StreamingParser};
39pub use traits::{FormatConfidence, FormatDetection, ParseError, SbomParser};
40
41use crate::model::NormalizedSbom;
42use std::path::Path;
43
44/// Result of format detection
45#[derive(Debug, Clone)]
46pub struct DetectedFormat {
47    /// Name of the detected format
48    pub format_name: String,
49    /// Confidence score (0.0-1.0)
50    pub confidence: f32,
51    /// Detected variant (e.g., "JSON", "XML", "tag-value")
52    pub variant: Option<String>,
53    /// Detected version if available
54    pub version: Option<String>,
55    /// Any warnings about the detection
56    pub warnings: Vec<String>,
57}
58
59/// Detect SBOM format from content without parsing
60///
61/// Returns None if no format could be detected with sufficient confidence.
62#[must_use]
63pub fn detect_format(content: &str) -> Option<DetectedFormat> {
64    let detector = FormatDetector::new();
65    let result = detector.detect_from_content(content);
66
67    if result.can_parse() {
68        Some(DetectedFormat {
69            format_name: result
70                .parser
71                .map(|p| p.name().to_string())
72                .unwrap_or_default(),
73            confidence: result.confidence.value(),
74            variant: result.variant,
75            version: result.version,
76            warnings: result.warnings,
77        })
78    } else {
79        None
80    }
81}
82
83/// Maximum SBOM file size (512 MB). Files larger than this should use the streaming parser.
84const MAX_SBOM_FILE_SIZE: u64 = 512 * 1024 * 1024;
85
86/// Detect SBOM format from file content and parse accordingly
87///
88/// Uses confidence-based detection to select the best parser.
89/// Returns an error if the file exceeds [`MAX_SBOM_FILE_SIZE`] to prevent OOM.
90/// For very large files, use the streaming parser instead.
91pub fn parse_sbom(path: &Path) -> Result<NormalizedSbom, ParseError> {
92    let metadata = std::fs::metadata(path).map_err(|e| ParseError::IoError(e.to_string()))?;
93    if metadata.len() > MAX_SBOM_FILE_SIZE {
94        return Err(ParseError::IoError(format!(
95            "SBOM file is {} MB, exceeding the {} MB limit. Use the streaming parser for large files.",
96            metadata.len() / (1024 * 1024),
97            MAX_SBOM_FILE_SIZE / (1024 * 1024),
98        )));
99    }
100    let content = std::fs::read_to_string(path).map_err(|e| ParseError::IoError(e.to_string()))?;
101    parse_sbom_str(&content)
102}
103
104/// Parse SBOM from string content
105///
106/// Uses confidence-based detection to select the best parser.
107pub fn parse_sbom_str(content: &str) -> Result<NormalizedSbom, ParseError> {
108    let detector = FormatDetector::new();
109    detector.parse_str(content)
110}
111
112// Legacy detection functions - kept for backwards compatibility but deprecated
113
114/// Check if content looks like `CycloneDX`
115#[deprecated(
116    since = "0.2.0",
117    note = "Use detect_format() or CycloneDxParser::detect() instead"
118)]
119#[must_use]
120pub fn is_cyclonedx(content: &str) -> bool {
121    CycloneDxParser::new().can_parse(content)
122}
123
124/// Check if content looks like SPDX
125#[deprecated(
126    since = "0.2.0",
127    note = "Use detect_format() or SpdxParser::detect() instead"
128)]
129#[must_use]
130pub fn is_spdx(content: &str) -> bool {
131    SpdxParser::new().can_parse(content)
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn test_detect_cyclonedx_json() {
140        let content = r#"{"bomFormat": "CycloneDX", "specVersion": "1.5"}"#;
141        let detected = detect_format(content).expect("Should detect format");
142        assert_eq!(detected.format_name, "CycloneDX");
143        assert!(detected.confidence >= 0.75);
144        assert_eq!(detected.variant, Some("JSON".to_string()));
145        assert_eq!(detected.version, Some("1.5".to_string()));
146    }
147
148    #[test]
149    fn test_detect_spdx_json() {
150        let content = r#"{"spdxVersion": "SPDX-2.3", "SPDXID": "SPDXRef-DOCUMENT"}"#;
151        let detected = detect_format(content).expect("Should detect format");
152        assert_eq!(detected.format_name, "SPDX");
153        assert!(detected.confidence >= 0.75);
154        assert_eq!(detected.variant, Some("JSON".to_string()));
155        assert_eq!(detected.version, Some("2.3".to_string()));
156    }
157
158    #[test]
159    fn test_detect_spdx_tag_value() {
160        let content = "SPDXVersion: SPDX-2.3\nDataLicense: CC0-1.0\nSPDXID: SPDXRef-DOCUMENT";
161        let detected = detect_format(content).expect("Should detect format");
162        assert_eq!(detected.format_name, "SPDX");
163        assert!(detected.confidence >= 0.75);
164        assert_eq!(detected.variant, Some("tag-value".to_string()));
165        assert_eq!(detected.version, Some("2.3".to_string()));
166    }
167
168    #[test]
169    fn test_detect_unknown_format() {
170        let content = r#"{"some": "random", "json": "content"}"#;
171        let detected = detect_format(content);
172        assert!(detected.is_none());
173    }
174
175    #[test]
176    fn test_confidence_based_selection() {
177        // CycloneDX should have higher confidence for this content
178        let cdx_content = r#"{"bomFormat": "CycloneDX", "specVersion": "1.6", "components": []}"#;
179        let cdx_parser = CycloneDxParser::new();
180        let spdx_parser = SpdxParser::new();
181
182        let cdx_conf = cdx_parser.confidence(cdx_content);
183        let spdx_conf = spdx_parser.confidence(cdx_content);
184
185        assert!(cdx_conf.value() > spdx_conf.value());
186    }
187}