sbom_tools/parsers/
detection.rs1use super::traits::{FormatConfidence, FormatDetection, ParseError, SbomParser};
8use super::{CycloneDxParser, SpdxParser};
9use crate::model::NormalizedSbom;
10use std::io::BufRead;
11
12pub const MIN_CONFIDENCE_THRESHOLD: f32 = 0.25;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum ParserKind {
19 CycloneDx,
20 Spdx,
21}
22
23impl ParserKind {
24 #[must_use]
26 pub const fn name(&self) -> &'static str {
27 match self {
28 Self::CycloneDx => "CycloneDX",
29 Self::Spdx => "SPDX",
30 }
31 }
32}
33
34#[derive(Debug, Clone)]
36pub struct DetectionResult {
37 pub parser: Option<ParserKind>,
39 pub confidence: FormatConfidence,
41 pub variant: Option<String>,
43 pub version: Option<String>,
45 pub warnings: Vec<String>,
47}
48
49impl DetectionResult {
50 #[must_use]
52 pub fn unknown(reason: &str) -> Self {
53 Self {
54 parser: None,
55 confidence: FormatConfidence::NONE,
56 variant: None,
57 version: None,
58 warnings: vec![reason.to_string()],
59 }
60 }
61
62 #[must_use]
64 pub fn cyclonedx(detection: FormatDetection) -> Self {
65 Self {
66 parser: Some(ParserKind::CycloneDx),
67 confidence: detection.confidence,
68 variant: detection.variant,
69 version: detection.version,
70 warnings: detection.warnings,
71 }
72 }
73
74 #[must_use]
76 pub fn spdx(detection: FormatDetection) -> Self {
77 Self {
78 parser: Some(ParserKind::Spdx),
79 confidence: detection.confidence,
80 variant: detection.variant,
81 version: detection.version,
82 warnings: detection.warnings,
83 }
84 }
85
86 #[must_use]
88 pub fn can_parse(&self) -> bool {
89 self.parser.is_some() && self.confidence.value() >= MIN_CONFIDENCE_THRESHOLD
90 }
91}
92
93pub struct FormatDetector {
97 cyclonedx: CycloneDxParser,
98 spdx: SpdxParser,
99 min_confidence: f32,
100}
101
102impl Default for FormatDetector {
103 fn default() -> Self {
104 Self::new()
105 }
106}
107
108impl FormatDetector {
109 #[must_use]
111 pub const fn new() -> Self {
112 Self {
113 cyclonedx: CycloneDxParser::new(),
114 spdx: SpdxParser::new(),
115 min_confidence: MIN_CONFIDENCE_THRESHOLD,
116 }
117 }
118
119 #[must_use]
121 pub const fn with_threshold(min_confidence: f32) -> Self {
122 Self {
123 cyclonedx: CycloneDxParser::new(),
124 spdx: SpdxParser::new(),
125 min_confidence: min_confidence.clamp(0.0, 1.0),
126 }
127 }
128
129 #[must_use]
133 pub fn detect_from_content(&self, content: &str) -> DetectionResult {
134 let cdx_detection = self.cyclonedx.detect(content);
135 let spdx_detection = self.spdx.detect(content);
136
137 self.select_best_parser(cdx_detection, spdx_detection)
138 }
139
140 #[must_use]
145 pub fn detect_from_peek(&self, peek: &[u8]) -> DetectionResult {
146 let first_char = peek.iter().find(|&&b| !b.is_ascii_whitespace());
148
149 match first_char {
150 Some(b'{' | b'<') => {
151 let preview = String::from_utf8_lossy(peek);
153
154 let cdx_detection = self.cyclonedx.detect(&preview);
156 let spdx_detection = self.spdx.detect(&preview);
157
158 self.select_best_parser(cdx_detection, spdx_detection)
159 }
160 Some(c) if c.is_ascii_alphabetic() => {
161 let preview = String::from_utf8_lossy(peek);
163 let cdx_detection = self.cyclonedx.detect(&preview);
164 let spdx_detection = self.spdx.detect(&preview);
165
166 self.select_best_parser(cdx_detection, spdx_detection)
167 }
168 Some(_) => DetectionResult::unknown("Unrecognized content format"),
169 None => DetectionResult::unknown("Empty content"),
170 }
171 }
172
173 fn select_best_parser(
178 &self,
179 cdx_detection: FormatDetection,
180 spdx_detection: FormatDetection,
181 ) -> DetectionResult {
182 let cdx_conf = cdx_detection.confidence.value();
183 let spdx_conf = spdx_detection.confidence.value();
184
185 tracing::debug!(
187 "Format detection: CycloneDX={:.2}, SPDX={:.2}, threshold={:.2}",
188 cdx_conf,
189 spdx_conf,
190 self.min_confidence
191 );
192
193 if cdx_conf >= self.min_confidence && cdx_conf > spdx_conf {
195 DetectionResult::cyclonedx(cdx_detection)
196 } else if spdx_conf >= self.min_confidence {
197 DetectionResult::spdx(spdx_detection)
198 } else {
199 let mut result =
201 DetectionResult::unknown("Could not detect SBOM format with sufficient confidence");
202
203 if cdx_conf > 0.0 {
205 result.warnings.push(format!(
206 "CycloneDX detection: {:.0}% confidence (threshold: {:.0}%)",
207 cdx_conf * 100.0,
208 self.min_confidence * 100.0
209 ));
210 }
211 if spdx_conf > 0.0 {
212 result.warnings.push(format!(
213 "SPDX detection: {:.0}% confidence (threshold: {:.0}%)",
214 spdx_conf * 100.0,
215 self.min_confidence * 100.0
216 ));
217 }
218
219 result
220 }
221 }
222
223 pub fn parse_str(&self, content: &str) -> Result<NormalizedSbom, ParseError> {
227 let detection = self.detect_from_content(content);
228
229 for warning in &detection.warnings {
231 tracing::warn!("{}", warning);
232 }
233
234 match detection.parser {
235 Some(ParserKind::CycloneDx) if detection.can_parse() => {
236 self.cyclonedx.parse_str(content)
237 }
238 Some(ParserKind::Spdx) if detection.can_parse() => self.spdx.parse_str(content),
239 _ => Err(ParseError::UnknownFormat(
240 "Could not detect SBOM format. Expected CycloneDX or SPDX.".to_string(),
241 )),
242 }
243 }
244
245 pub fn parse_reader<R: BufRead>(&self, mut reader: R) -> Result<NormalizedSbom, ParseError> {
250 let peek = reader
252 .fill_buf()
253 .map_err(|e| ParseError::IoError(e.to_string()))?;
254
255 if peek.is_empty() {
256 return Err(ParseError::IoError("Empty content".to_string()));
257 }
258
259 let detection = self.detect_from_peek(peek);
260
261 for warning in &detection.warnings {
263 tracing::warn!("{}", warning);
264 }
265
266 match detection.parser {
267 Some(ParserKind::CycloneDx) if detection.can_parse() => {
268 let is_xml = detection.variant.as_deref() == Some("XML");
270 if is_xml {
271 let mut content = String::new();
272 reader
273 .read_to_string(&mut content)
274 .map_err(|e| ParseError::IoError(e.to_string()))?;
275 self.cyclonedx.parse_str(&content)
276 } else {
277 self.cyclonedx.parse_json_reader(reader)
278 }
279 }
280 Some(ParserKind::Spdx) if detection.can_parse() => {
281 let needs_string =
283 matches!(detection.variant.as_deref(), Some("tag-value" | "RDF"));
284 if needs_string {
285 let mut content = String::new();
286 reader
287 .read_to_string(&mut content)
288 .map_err(|e| ParseError::IoError(e.to_string()))?;
289 self.spdx.parse_str(&content)
290 } else {
291 self.spdx.parse_json_reader(reader)
292 }
293 }
294 _ => Err(ParseError::UnknownFormat(
295 "Could not detect SBOM format. Expected CycloneDX or SPDX.".to_string(),
296 )),
297 }
298 }
299
300 #[must_use]
302 pub const fn cyclonedx_parser(&self) -> &CycloneDxParser {
303 &self.cyclonedx
304 }
305
306 #[must_use]
308 pub const fn spdx_parser(&self) -> &SpdxParser {
309 &self.spdx
310 }
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316
317 #[test]
318 fn test_detect_cyclonedx_json() {
319 let detector = FormatDetector::new();
320 let content = r#"{"bomFormat": "CycloneDX", "specVersion": "1.5"}"#;
321 let result = detector.detect_from_content(content);
322
323 assert_eq!(result.parser, Some(ParserKind::CycloneDx));
324 assert!(result.can_parse());
325 assert_eq!(result.variant, Some("JSON".to_string()));
326 }
327
328 #[test]
329 fn test_detect_spdx_json() {
330 let detector = FormatDetector::new();
331 let content = r#"{"spdxVersion": "SPDX-2.3", "SPDXID": "SPDXRef-DOCUMENT"}"#;
332 let result = detector.detect_from_content(content);
333
334 assert_eq!(result.parser, Some(ParserKind::Spdx));
335 assert!(result.can_parse());
336 assert_eq!(result.variant, Some("JSON".to_string()));
337 }
338
339 #[test]
340 fn test_detect_from_peek_cyclonedx() {
341 let detector = FormatDetector::new();
342 let peek = br#"{"bomFormat": "CycloneDX", "specVersion": "1.5", "components": []}"#;
343 let result = detector.detect_from_peek(peek);
344
345 assert_eq!(result.parser, Some(ParserKind::CycloneDx));
346 assert!(result.can_parse());
347 }
348
349 #[test]
350 fn test_detect_unknown_format() {
351 let detector = FormatDetector::new();
352 let content = r#"{"some": "random", "json": "content"}"#;
353 let result = detector.detect_from_content(content);
354
355 assert!(result.parser.is_none());
356 assert!(!result.can_parse());
357 }
358
359 #[test]
360 fn test_no_default_bias() {
361 let detector = FormatDetector::new();
362 let content = r#"{"data": "test"}"#;
364 let result = detector.detect_from_content(content);
365
366 assert!(result.parser.is_none());
368 assert!(!result.can_parse());
369 }
370
371 #[test]
372 fn test_threshold_enforcement() {
373 let detector = FormatDetector::with_threshold(0.5);
374 let content = r#"{"specVersion": "1.5", "components": []}"#;
376 let result = detector.detect_from_content(content);
377
378 if result.confidence.value() < 0.5 {
380 assert!(!result.can_parse());
381 }
382 }
383}