sbom_tools/quality/
metrics.rs

1//! Quality metrics for SBOM assessment.
2//!
3//! Provides detailed metrics for different aspects of SBOM quality.
4
5use std::collections::{BTreeMap, HashMap, HashSet};
6
7use crate::model::{
8    CompletenessDeclaration, CreatorType, EolStatus, ExternalRefType, HashAlgorithm,
9    NormalizedSbom, StalenessLevel,
10};
11use serde::{Deserialize, Serialize};
12
13/// Overall completeness metrics for an SBOM
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct CompletenessMetrics {
16    /// Percentage of components with versions (0-100)
17    pub components_with_version: f32,
18    /// Percentage of components with PURLs (0-100)
19    pub components_with_purl: f32,
20    /// Percentage of components with CPEs (0-100)
21    pub components_with_cpe: f32,
22    /// Percentage of components with suppliers (0-100)
23    pub components_with_supplier: f32,
24    /// Percentage of components with hashes (0-100)
25    pub components_with_hashes: f32,
26    /// Percentage of components with licenses (0-100)
27    pub components_with_licenses: f32,
28    /// Percentage of components with descriptions (0-100)
29    pub components_with_description: f32,
30    /// Whether document has creator information
31    pub has_creator_info: bool,
32    /// Whether document has timestamp
33    pub has_timestamp: bool,
34    /// Whether document has serial number/ID
35    pub has_serial_number: bool,
36    /// Total component count
37    pub total_components: usize,
38}
39
40impl CompletenessMetrics {
41    /// Calculate completeness metrics from an SBOM
42    #[must_use]
43    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
44        let total = sbom.components.len();
45        if total == 0 {
46            return Self::empty();
47        }
48
49        let mut with_version = 0;
50        let mut with_purl = 0;
51        let mut with_cpe = 0;
52        let mut with_supplier = 0;
53        let mut with_hashes = 0;
54        let mut with_licenses = 0;
55        let mut with_description = 0;
56
57        for comp in sbom.components.values() {
58            if comp.version.is_some() {
59                with_version += 1;
60            }
61            if comp.identifiers.purl.is_some() {
62                with_purl += 1;
63            }
64            if !comp.identifiers.cpe.is_empty() {
65                with_cpe += 1;
66            }
67            if comp.supplier.is_some() {
68                with_supplier += 1;
69            }
70            if !comp.hashes.is_empty() {
71                with_hashes += 1;
72            }
73            if !comp.licenses.declared.is_empty() || comp.licenses.concluded.is_some() {
74                with_licenses += 1;
75            }
76            if comp.description.is_some() {
77                with_description += 1;
78            }
79        }
80
81        let pct = |count: usize| (count as f32 / total as f32) * 100.0;
82
83        Self {
84            components_with_version: pct(with_version),
85            components_with_purl: pct(with_purl),
86            components_with_cpe: pct(with_cpe),
87            components_with_supplier: pct(with_supplier),
88            components_with_hashes: pct(with_hashes),
89            components_with_licenses: pct(with_licenses),
90            components_with_description: pct(with_description),
91            has_creator_info: !sbom.document.creators.is_empty(),
92            has_timestamp: true, // Always set in our model
93            has_serial_number: sbom.document.serial_number.is_some(),
94            total_components: total,
95        }
96    }
97
98    /// Create empty metrics
99    #[must_use]
100    pub const fn empty() -> Self {
101        Self {
102            components_with_version: 0.0,
103            components_with_purl: 0.0,
104            components_with_cpe: 0.0,
105            components_with_supplier: 0.0,
106            components_with_hashes: 0.0,
107            components_with_licenses: 0.0,
108            components_with_description: 0.0,
109            has_creator_info: false,
110            has_timestamp: false,
111            has_serial_number: false,
112            total_components: 0,
113        }
114    }
115
116    /// Calculate overall completeness score (0-100)
117    #[must_use]
118    pub fn overall_score(&self, weights: &CompletenessWeights) -> f32 {
119        let mut score = 0.0;
120        let mut total_weight = 0.0;
121
122        // Component field scores
123        score += self.components_with_version * weights.version;
124        total_weight += weights.version * 100.0;
125
126        score += self.components_with_purl * weights.purl;
127        total_weight += weights.purl * 100.0;
128
129        score += self.components_with_cpe * weights.cpe;
130        total_weight += weights.cpe * 100.0;
131
132        score += self.components_with_supplier * weights.supplier;
133        total_weight += weights.supplier * 100.0;
134
135        score += self.components_with_hashes * weights.hashes;
136        total_weight += weights.hashes * 100.0;
137
138        score += self.components_with_licenses * weights.licenses;
139        total_weight += weights.licenses * 100.0;
140
141        // Document metadata scores
142        if self.has_creator_info {
143            score += 100.0 * weights.creator_info;
144        }
145        total_weight += weights.creator_info * 100.0;
146
147        if self.has_serial_number {
148            score += 100.0 * weights.serial_number;
149        }
150        total_weight += weights.serial_number * 100.0;
151
152        if total_weight > 0.0 {
153            (score / total_weight) * 100.0
154        } else {
155            0.0
156        }
157    }
158}
159
160/// Weights for completeness score calculation
161#[derive(Debug, Clone)]
162pub struct CompletenessWeights {
163    pub version: f32,
164    pub purl: f32,
165    pub cpe: f32,
166    pub supplier: f32,
167    pub hashes: f32,
168    pub licenses: f32,
169    pub creator_info: f32,
170    pub serial_number: f32,
171}
172
173impl Default for CompletenessWeights {
174    fn default() -> Self {
175        Self {
176            version: 1.0,
177            purl: 1.5, // Higher weight for PURL
178            cpe: 0.5,  // Lower weight, nice to have
179            supplier: 1.0,
180            hashes: 1.0,
181            licenses: 1.2, // Important for compliance
182            creator_info: 0.3,
183            serial_number: 0.2,
184        }
185    }
186}
187
188// ============================================================================
189// Hash quality metrics
190// ============================================================================
191
192/// Hash/integrity quality metrics
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct HashQualityMetrics {
195    /// Components with any hash
196    pub components_with_any_hash: usize,
197    /// Components with at least one strong hash (SHA-256+, SHA-3, BLAKE, Blake3)
198    pub components_with_strong_hash: usize,
199    /// Components with only weak hashes (MD5, SHA-1) and no strong backup
200    pub components_with_weak_only: usize,
201    /// Distribution of hash algorithms across all components
202    pub algorithm_distribution: BTreeMap<String, usize>,
203    /// Total hash entries across all components
204    pub total_hashes: usize,
205}
206
207impl HashQualityMetrics {
208    /// Calculate hash quality metrics from an SBOM
209    #[must_use]
210    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
211        let mut with_any = 0;
212        let mut with_strong = 0;
213        let mut with_weak_only = 0;
214        let mut distribution: BTreeMap<String, usize> = BTreeMap::new();
215        let mut total_hashes = 0;
216
217        for comp in sbom.components.values() {
218            if comp.hashes.is_empty() {
219                continue;
220            }
221            with_any += 1;
222            total_hashes += comp.hashes.len();
223
224            let mut has_strong = false;
225            let mut has_weak = false;
226
227            for hash in &comp.hashes {
228                let label = hash_algorithm_label(&hash.algorithm);
229                *distribution.entry(label).or_insert(0) += 1;
230
231                if is_strong_hash(&hash.algorithm) {
232                    has_strong = true;
233                } else {
234                    has_weak = true;
235                }
236            }
237
238            if has_strong {
239                with_strong += 1;
240            } else if has_weak {
241                with_weak_only += 1;
242            }
243        }
244
245        Self {
246            components_with_any_hash: with_any,
247            components_with_strong_hash: with_strong,
248            components_with_weak_only: with_weak_only,
249            algorithm_distribution: distribution,
250            total_hashes,
251        }
252    }
253
254    /// Calculate integrity quality score (0-100)
255    ///
256    /// Base 60% for any-hash coverage + 40% bonus for strong-hash coverage,
257    /// with a penalty for weak-only components.
258    #[must_use]
259    pub fn quality_score(&self, total_components: usize) -> f32 {
260        if total_components == 0 {
261            return 0.0;
262        }
263
264        let any_coverage = self.components_with_any_hash as f32 / total_components as f32;
265        let strong_coverage = self.components_with_strong_hash as f32 / total_components as f32;
266        let weak_only_ratio = self.components_with_weak_only as f32 / total_components as f32;
267
268        let base = any_coverage * 60.0;
269        let strong_bonus = strong_coverage * 40.0;
270        let weak_penalty = weak_only_ratio * 10.0;
271
272        (base + strong_bonus - weak_penalty).clamp(0.0, 100.0)
273    }
274}
275
276/// Whether a hash algorithm is considered cryptographically strong
277fn is_strong_hash(algo: &HashAlgorithm) -> bool {
278    matches!(
279        algo,
280        HashAlgorithm::Sha256
281            | HashAlgorithm::Sha384
282            | HashAlgorithm::Sha512
283            | HashAlgorithm::Sha3_256
284            | HashAlgorithm::Sha3_384
285            | HashAlgorithm::Sha3_512
286            | HashAlgorithm::Blake2b256
287            | HashAlgorithm::Blake2b384
288            | HashAlgorithm::Blake2b512
289            | HashAlgorithm::Blake3
290    )
291}
292
293/// Human-readable label for a hash algorithm
294fn hash_algorithm_label(algo: &HashAlgorithm) -> String {
295    match algo {
296        HashAlgorithm::Md5 => "MD5".to_string(),
297        HashAlgorithm::Sha1 => "SHA-1".to_string(),
298        HashAlgorithm::Sha256 => "SHA-256".to_string(),
299        HashAlgorithm::Sha384 => "SHA-384".to_string(),
300        HashAlgorithm::Sha512 => "SHA-512".to_string(),
301        HashAlgorithm::Sha3_256 => "SHA3-256".to_string(),
302        HashAlgorithm::Sha3_384 => "SHA3-384".to_string(),
303        HashAlgorithm::Sha3_512 => "SHA3-512".to_string(),
304        HashAlgorithm::Blake2b256 => "BLAKE2b-256".to_string(),
305        HashAlgorithm::Blake2b384 => "BLAKE2b-384".to_string(),
306        HashAlgorithm::Blake2b512 => "BLAKE2b-512".to_string(),
307        HashAlgorithm::Blake3 => "BLAKE3".to_string(),
308        HashAlgorithm::Other(s) => s.clone(),
309    }
310}
311
312// ============================================================================
313// Identifier quality metrics
314// ============================================================================
315
316/// Identifier quality metrics
317#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct IdentifierMetrics {
319    /// Components with valid PURLs
320    pub valid_purls: usize,
321    /// Components with invalid/malformed PURLs
322    pub invalid_purls: usize,
323    /// Components with valid CPEs
324    pub valid_cpes: usize,
325    /// Components with invalid/malformed CPEs
326    pub invalid_cpes: usize,
327    /// Components with SWID tags
328    pub with_swid: usize,
329    /// Unique ecosystems identified
330    pub ecosystems: Vec<String>,
331    /// Components missing all identifiers (only name)
332    pub missing_all_identifiers: usize,
333}
334
335impl IdentifierMetrics {
336    /// Calculate identifier metrics from an SBOM
337    #[must_use]
338    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
339        let mut valid_purls = 0;
340        let mut invalid_purls = 0;
341        let mut valid_cpes = 0;
342        let mut invalid_cpes = 0;
343        let mut with_swid = 0;
344        let mut missing_all = 0;
345        let mut ecosystems = std::collections::HashSet::new();
346
347        for comp in sbom.components.values() {
348            let has_purl = comp.identifiers.purl.is_some();
349            let has_cpe = !comp.identifiers.cpe.is_empty();
350            let has_swid = comp.identifiers.swid.is_some();
351
352            if let Some(ref purl) = comp.identifiers.purl {
353                if is_valid_purl(purl) {
354                    valid_purls += 1;
355                    // Extract ecosystem from PURL
356                    if let Some(eco) = extract_ecosystem_from_purl(purl) {
357                        ecosystems.insert(eco);
358                    }
359                } else {
360                    invalid_purls += 1;
361                }
362            }
363
364            for cpe in &comp.identifiers.cpe {
365                if is_valid_cpe(cpe) {
366                    valid_cpes += 1;
367                } else {
368                    invalid_cpes += 1;
369                }
370            }
371
372            if has_swid {
373                with_swid += 1;
374            }
375
376            if !has_purl && !has_cpe && !has_swid {
377                missing_all += 1;
378            }
379        }
380
381        let mut ecosystem_list: Vec<String> = ecosystems.into_iter().collect();
382        ecosystem_list.sort();
383
384        Self {
385            valid_purls,
386            invalid_purls,
387            valid_cpes,
388            invalid_cpes,
389            with_swid,
390            ecosystems: ecosystem_list,
391            missing_all_identifiers: missing_all,
392        }
393    }
394
395    /// Calculate identifier quality score (0-100)
396    #[must_use]
397    pub fn quality_score(&self, total_components: usize) -> f32 {
398        if total_components == 0 {
399            return 0.0;
400        }
401
402        let with_valid_id = self.valid_purls + self.valid_cpes + self.with_swid;
403        let coverage =
404            (with_valid_id.min(total_components) as f32 / total_components as f32) * 100.0;
405
406        // Penalize invalid identifiers
407        let invalid_count = self.invalid_purls + self.invalid_cpes;
408        let penalty = (invalid_count as f32 / total_components as f32) * 20.0;
409
410        (coverage - penalty).clamp(0.0, 100.0)
411    }
412}
413
414/// License quality metrics
415#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct LicenseMetrics {
417    /// Components with declared licenses
418    pub with_declared: usize,
419    /// Components with concluded licenses
420    pub with_concluded: usize,
421    /// Components with valid SPDX expressions
422    pub valid_spdx_expressions: usize,
423    /// Components with non-standard license names
424    pub non_standard_licenses: usize,
425    /// Components with NOASSERTION license
426    pub noassertion_count: usize,
427    /// Components with deprecated SPDX license identifiers
428    pub deprecated_licenses: usize,
429    /// Components with restrictive/copyleft licenses (GPL family)
430    pub restrictive_licenses: usize,
431    /// Specific copyleft license identifiers found
432    pub copyleft_license_ids: Vec<String>,
433    /// Unique licenses found
434    pub unique_licenses: Vec<String>,
435}
436
437impl LicenseMetrics {
438    /// Calculate license metrics from an SBOM
439    #[must_use]
440    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
441        let mut with_declared = 0;
442        let mut with_concluded = 0;
443        let mut valid_spdx = 0;
444        let mut non_standard = 0;
445        let mut noassertion = 0;
446        let mut deprecated = 0;
447        let mut restrictive = 0;
448        let mut licenses = HashSet::new();
449        let mut copyleft_ids = HashSet::new();
450
451        for comp in sbom.components.values() {
452            if !comp.licenses.declared.is_empty() {
453                with_declared += 1;
454                for lic in &comp.licenses.declared {
455                    let expr = &lic.expression;
456                    licenses.insert(expr.clone());
457
458                    if expr == "NOASSERTION" {
459                        noassertion += 1;
460                    } else if is_valid_spdx_license(expr) {
461                        valid_spdx += 1;
462                    } else {
463                        non_standard += 1;
464                    }
465
466                    if is_deprecated_spdx_license(expr) {
467                        deprecated += 1;
468                    }
469                    if is_restrictive_license(expr) {
470                        restrictive += 1;
471                        copyleft_ids.insert(expr.clone());
472                    }
473                }
474            }
475
476            if comp.licenses.concluded.is_some() {
477                with_concluded += 1;
478            }
479        }
480
481        let mut license_list: Vec<String> = licenses.into_iter().collect();
482        license_list.sort();
483
484        let mut copyleft_list: Vec<String> = copyleft_ids.into_iter().collect();
485        copyleft_list.sort();
486
487        Self {
488            with_declared,
489            with_concluded,
490            valid_spdx_expressions: valid_spdx,
491            non_standard_licenses: non_standard,
492            noassertion_count: noassertion,
493            deprecated_licenses: deprecated,
494            restrictive_licenses: restrictive,
495            copyleft_license_ids: copyleft_list,
496            unique_licenses: license_list,
497        }
498    }
499
500    /// Calculate license quality score (0-100)
501    #[must_use]
502    pub fn quality_score(&self, total_components: usize) -> f32 {
503        if total_components == 0 {
504            return 0.0;
505        }
506
507        let coverage = (self.with_declared as f32 / total_components as f32) * 60.0;
508
509        // Bonus for SPDX compliance
510        let spdx_ratio = if self.with_declared > 0 {
511            self.valid_spdx_expressions as f32 / self.with_declared as f32
512        } else {
513            0.0
514        };
515        let spdx_bonus = spdx_ratio * 30.0;
516
517        // Penalty for NOASSERTION
518        let noassertion_penalty =
519            (self.noassertion_count as f32 / total_components.max(1) as f32) * 10.0;
520
521        // Penalty for deprecated licenses (2 points each, capped)
522        let deprecated_penalty = (self.deprecated_licenses as f32 * 2.0).min(10.0);
523
524        (coverage + spdx_bonus - noassertion_penalty - deprecated_penalty).clamp(0.0, 100.0)
525    }
526}
527
528/// Vulnerability information quality metrics
529#[derive(Debug, Clone, Serialize, Deserialize)]
530pub struct VulnerabilityMetrics {
531    /// Components with vulnerability information
532    pub components_with_vulns: usize,
533    /// Total vulnerabilities reported
534    pub total_vulnerabilities: usize,
535    /// Vulnerabilities with CVSS scores
536    pub with_cvss: usize,
537    /// Vulnerabilities with CWE information
538    pub with_cwe: usize,
539    /// Vulnerabilities with remediation info
540    pub with_remediation: usize,
541    /// Components with VEX status
542    pub with_vex_status: usize,
543}
544
545impl VulnerabilityMetrics {
546    /// Calculate vulnerability metrics from an SBOM
547    #[must_use]
548    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
549        let mut components_with_vulns = 0;
550        let mut total_vulns = 0;
551        let mut with_cvss = 0;
552        let mut with_cwe = 0;
553        let mut with_remediation = 0;
554        let mut with_vex = 0;
555
556        for comp in sbom.components.values() {
557            if !comp.vulnerabilities.is_empty() {
558                components_with_vulns += 1;
559            }
560
561            for vuln in &comp.vulnerabilities {
562                total_vulns += 1;
563
564                if !vuln.cvss.is_empty() {
565                    with_cvss += 1;
566                }
567                if !vuln.cwes.is_empty() {
568                    with_cwe += 1;
569                }
570                if vuln.remediation.is_some() {
571                    with_remediation += 1;
572                }
573            }
574
575            if comp.vex_status.is_some()
576                || comp.vulnerabilities.iter().any(|v| v.vex_status.is_some())
577            {
578                with_vex += 1;
579            }
580        }
581
582        Self {
583            components_with_vulns,
584            total_vulnerabilities: total_vulns,
585            with_cvss,
586            with_cwe,
587            with_remediation,
588            with_vex_status: with_vex,
589        }
590    }
591
592    /// Calculate vulnerability documentation quality score (0-100)
593    ///
594    /// Returns `None` when no vulnerability data exists, signaling that this
595    /// category should be excluded from the weighted score (N/A-aware).
596    /// This prevents inflating the overall score when vulnerability assessment
597    /// was not performed.
598    #[must_use]
599    pub fn documentation_score(&self) -> Option<f32> {
600        if self.total_vulnerabilities == 0 {
601            return None; // No vulnerability data — treat as N/A
602        }
603
604        let cvss_ratio = self.with_cvss as f32 / self.total_vulnerabilities as f32;
605        let cwe_ratio = self.with_cwe as f32 / self.total_vulnerabilities as f32;
606        let remediation_ratio = self.with_remediation as f32 / self.total_vulnerabilities as f32;
607
608        Some(
609            remediation_ratio
610                .mul_add(30.0, cvss_ratio.mul_add(40.0, cwe_ratio * 30.0))
611                .min(100.0),
612        )
613    }
614}
615
616// ============================================================================
617// Dependency graph quality metrics
618// ============================================================================
619
620/// Maximum edge count before skipping expensive graph analysis
621const MAX_EDGES_FOR_GRAPH_ANALYSIS: usize = 50_000;
622
623// ============================================================================
624// Software complexity index
625// ============================================================================
626
627/// Complexity level bands for the software complexity index
628#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
629#[non_exhaustive]
630pub enum ComplexityLevel {
631    /// Simplicity 75–100 (raw complexity 0–0.25)
632    Low,
633    /// Simplicity 50–74 (raw complexity 0.26–0.50)
634    Moderate,
635    /// Simplicity 25–49 (raw complexity 0.51–0.75)
636    High,
637    /// Simplicity 0–24 (raw complexity 0.76–1.00)
638    VeryHigh,
639}
640
641impl ComplexityLevel {
642    /// Determine complexity level from a simplicity score (0–100)
643    #[must_use]
644    pub const fn from_score(simplicity: f32) -> Self {
645        match simplicity as u32 {
646            75..=100 => Self::Low,
647            50..=74 => Self::Moderate,
648            25..=49 => Self::High,
649            _ => Self::VeryHigh,
650        }
651    }
652
653    /// Human-readable label
654    #[must_use]
655    pub const fn label(&self) -> &'static str {
656        match self {
657            Self::Low => "Low",
658            Self::Moderate => "Moderate",
659            Self::High => "High",
660            Self::VeryHigh => "Very High",
661        }
662    }
663}
664
665impl std::fmt::Display for ComplexityLevel {
666    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
667        f.write_str(self.label())
668    }
669}
670
671/// Breakdown of the five factors that compose the software complexity index.
672/// Each factor is normalized to 0.0–1.0 where higher = more complex.
673#[derive(Debug, Clone, Serialize, Deserialize)]
674pub struct ComplexityFactors {
675    /// Log-scaled edge density: `min(1.0, ln(1 + edges/components) / ln(20))`
676    pub dependency_volume: f32,
677    /// Depth ratio: `min(1.0, max_depth / 15.0)`
678    pub normalized_depth: f32,
679    /// Hub dominance: `min(1.0, max_out_degree / max(components * 0.25, 4))`
680    pub fanout_concentration: f32,
681    /// Cycle density: `min(1.0, cycle_count / max(1, components * 0.05))`
682    pub cycle_ratio: f32,
683    /// Extra disconnected subgraphs: `(islands - 1) / max(1, components - 1)`
684    pub fragmentation: f32,
685}
686
687/// Dependency graph quality metrics
688#[derive(Debug, Clone, Serialize, Deserialize)]
689pub struct DependencyMetrics {
690    /// Total dependency relationships
691    pub total_dependencies: usize,
692    /// Components with at least one dependency
693    pub components_with_deps: usize,
694    /// Maximum dependency depth (computed via BFS from roots)
695    pub max_depth: Option<usize>,
696    /// Average dependency depth across all reachable components
697    pub avg_depth: Option<f32>,
698    /// Orphan components (no incoming or outgoing deps)
699    pub orphan_components: usize,
700    /// Root components (no incoming deps, but has outgoing)
701    pub root_components: usize,
702    /// Number of dependency cycles detected
703    pub cycle_count: usize,
704    /// Number of disconnected subgraphs (islands)
705    pub island_count: usize,
706    /// Whether graph analysis was skipped due to size
707    pub graph_analysis_skipped: bool,
708    /// Maximum out-degree (most dependencies from a single component)
709    pub max_out_degree: usize,
710    /// Software complexity index (0–100, higher = simpler). `None` when graph analysis skipped.
711    pub software_complexity_index: Option<f32>,
712    /// Complexity level band. `None` when graph analysis skipped.
713    pub complexity_level: Option<ComplexityLevel>,
714    /// Factor breakdown. `None` when graph analysis skipped.
715    pub complexity_factors: Option<ComplexityFactors>,
716}
717
718impl DependencyMetrics {
719    /// Calculate dependency metrics from an SBOM
720    #[must_use]
721    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
722        use crate::model::CanonicalId;
723
724        let total_deps = sbom.edges.len();
725
726        // Build adjacency lists using CanonicalId.value() for string keys
727        let mut children: HashMap<&str, Vec<&str>> = HashMap::new();
728        let mut has_outgoing: HashSet<&str> = HashSet::new();
729        let mut has_incoming: HashSet<&str> = HashSet::new();
730
731        for edge in &sbom.edges {
732            children
733                .entry(edge.from.value())
734                .or_default()
735                .push(edge.to.value());
736            has_outgoing.insert(edge.from.value());
737            has_incoming.insert(edge.to.value());
738        }
739
740        let all_ids: Vec<&str> = sbom.components.keys().map(CanonicalId::value).collect();
741
742        let orphans = all_ids
743            .iter()
744            .filter(|c| !has_outgoing.contains(*c) && !has_incoming.contains(*c))
745            .count();
746
747        let roots: Vec<&str> = has_outgoing
748            .iter()
749            .filter(|c| !has_incoming.contains(*c))
750            .copied()
751            .collect();
752        let root_count = roots.len();
753
754        // Compute max out-degree (single pass over adjacency, O(V))
755        let max_out_degree = children.values().map(Vec::len).max().unwrap_or(0);
756
757        // Skip expensive graph analysis for very large graphs
758        if total_deps > MAX_EDGES_FOR_GRAPH_ANALYSIS {
759            return Self {
760                total_dependencies: total_deps,
761                components_with_deps: has_outgoing.len(),
762                max_depth: None,
763                avg_depth: None,
764                orphan_components: orphans,
765                root_components: root_count,
766                cycle_count: 0,
767                island_count: 0,
768                graph_analysis_skipped: true,
769                max_out_degree,
770                software_complexity_index: None,
771                complexity_level: None,
772                complexity_factors: None,
773            };
774        }
775
776        // BFS from roots to compute depth
777        let (max_depth, avg_depth) = compute_depth(&roots, &children);
778
779        // DFS cycle detection
780        let cycle_count = detect_cycles(&all_ids, &children);
781
782        // Union-Find for island/subgraph detection
783        let island_count = count_islands(&all_ids, &sbom.edges);
784
785        // Compute software complexity index
786        let component_count = all_ids.len();
787        let (complexity_index, complexity_lvl, factors) = compute_complexity(
788            total_deps,
789            component_count,
790            max_depth.unwrap_or(0),
791            max_out_degree,
792            cycle_count,
793            orphans,
794            island_count,
795        );
796
797        Self {
798            total_dependencies: total_deps,
799            components_with_deps: has_outgoing.len(),
800            max_depth,
801            avg_depth,
802            orphan_components: orphans,
803            root_components: root_count,
804            cycle_count,
805            island_count,
806            graph_analysis_skipped: false,
807            max_out_degree,
808            software_complexity_index: Some(complexity_index),
809            complexity_level: Some(complexity_lvl),
810            complexity_factors: Some(factors),
811        }
812    }
813
814    /// Calculate dependency graph quality score (0-100)
815    #[must_use]
816    pub fn quality_score(&self, total_components: usize) -> f32 {
817        if total_components == 0 {
818            return 0.0;
819        }
820
821        // Score based on how many components have dependency info
822        let coverage = if total_components > 1 {
823            (self.components_with_deps as f32 / (total_components - 1) as f32) * 100.0
824        } else {
825            100.0 // Single component SBOM
826        };
827
828        // Slight penalty for orphan components
829        let orphan_ratio = self.orphan_components as f32 / total_components as f32;
830        let orphan_penalty = orphan_ratio * 10.0;
831
832        // Penalty for cycles (5 points each, capped at 20)
833        let cycle_penalty = (self.cycle_count as f32 * 5.0).min(20.0);
834
835        // Penalty for excessive islands (>3 in multi-component SBOMs)
836        let island_penalty = if total_components > 5 && self.island_count > 3 {
837            ((self.island_count - 3) as f32 * 3.0).min(15.0)
838        } else {
839            0.0
840        };
841
842        (coverage - orphan_penalty - cycle_penalty - island_penalty).clamp(0.0, 100.0)
843    }
844}
845
846/// BFS from roots to compute max and average depth
847fn compute_depth(
848    roots: &[&str],
849    children: &HashMap<&str, Vec<&str>>,
850) -> (Option<usize>, Option<f32>) {
851    use std::collections::VecDeque;
852
853    if roots.is_empty() {
854        return (None, None);
855    }
856
857    let mut visited: HashSet<&str> = HashSet::new();
858    let mut queue: VecDeque<(&str, usize)> = VecDeque::new();
859    let mut max_d: usize = 0;
860    let mut total_depth: usize = 0;
861    let mut count: usize = 0;
862
863    for &root in roots {
864        if visited.insert(root) {
865            queue.push_back((root, 0));
866        }
867    }
868
869    while let Some((node, depth)) = queue.pop_front() {
870        max_d = max_d.max(depth);
871        total_depth += depth;
872        count += 1;
873
874        if let Some(kids) = children.get(node) {
875            for &kid in kids {
876                if visited.insert(kid) {
877                    queue.push_back((kid, depth + 1));
878                }
879            }
880        }
881    }
882
883    let avg = if count > 0 {
884        Some(total_depth as f32 / count as f32)
885    } else {
886        None
887    };
888
889    (Some(max_d), avg)
890}
891
892/// DFS-based cycle detection (white/gray/black coloring)
893fn detect_cycles(all_nodes: &[&str], children: &HashMap<&str, Vec<&str>>) -> usize {
894    const WHITE: u8 = 0;
895    const GRAY: u8 = 1;
896    const BLACK: u8 = 2;
897
898    let mut color: HashMap<&str, u8> = HashMap::with_capacity(all_nodes.len());
899    for &node in all_nodes {
900        color.insert(node, WHITE);
901    }
902
903    let mut cycles = 0;
904
905    fn dfs<'a>(
906        node: &'a str,
907        children: &HashMap<&str, Vec<&'a str>>,
908        color: &mut HashMap<&'a str, u8>,
909        cycles: &mut usize,
910    ) {
911        color.insert(node, GRAY);
912
913        if let Some(kids) = children.get(node) {
914            for &kid in kids {
915                match color.get(kid).copied().unwrap_or(WHITE) {
916                    GRAY => *cycles += 1, // back edge = cycle
917                    WHITE => dfs(kid, children, color, cycles),
918                    _ => {}
919                }
920            }
921        }
922
923        color.insert(node, BLACK);
924    }
925
926    for &node in all_nodes {
927        if color.get(node).copied().unwrap_or(WHITE) == WHITE {
928            dfs(node, children, &mut color, &mut cycles);
929        }
930    }
931
932    cycles
933}
934
935/// Union-Find to count disconnected subgraphs (islands)
936fn count_islands(all_nodes: &[&str], edges: &[crate::model::DependencyEdge]) -> usize {
937    if all_nodes.is_empty() {
938        return 0;
939    }
940
941    // Map node IDs to indices
942    let node_idx: HashMap<&str, usize> =
943        all_nodes.iter().enumerate().map(|(i, &n)| (n, i)).collect();
944
945    let mut parent: Vec<usize> = (0..all_nodes.len()).collect();
946    let mut rank: Vec<u8> = vec![0; all_nodes.len()];
947
948    fn find(parent: &mut Vec<usize>, x: usize) -> usize {
949        if parent[x] != x {
950            parent[x] = find(parent, parent[x]); // path compression
951        }
952        parent[x]
953    }
954
955    fn union(parent: &mut Vec<usize>, rank: &mut [u8], a: usize, b: usize) {
956        let ra = find(parent, a);
957        let rb = find(parent, b);
958        if ra != rb {
959            if rank[ra] < rank[rb] {
960                parent[ra] = rb;
961            } else if rank[ra] > rank[rb] {
962                parent[rb] = ra;
963            } else {
964                parent[rb] = ra;
965                rank[ra] += 1;
966            }
967        }
968    }
969
970    for edge in edges {
971        if let (Some(&a), Some(&b)) = (
972            node_idx.get(edge.from.value()),
973            node_idx.get(edge.to.value()),
974        ) {
975            union(&mut parent, &mut rank, a, b);
976        }
977    }
978
979    // Count unique roots
980    let mut roots = HashSet::new();
981    for i in 0..all_nodes.len() {
982        roots.insert(find(&mut parent, i));
983    }
984
985    roots.len()
986}
987
988/// Compute the software complexity index and factor breakdown.
989///
990/// Returns `(simplicity_index, complexity_level, factors)`.
991/// `simplicity_index` is 0–100 where 100 = simplest.
992fn compute_complexity(
993    edges: usize,
994    components: usize,
995    max_depth: usize,
996    max_out_degree: usize,
997    cycle_count: usize,
998    _orphans: usize,
999    islands: usize,
1000) -> (f32, ComplexityLevel, ComplexityFactors) {
1001    if components == 0 {
1002        let factors = ComplexityFactors {
1003            dependency_volume: 0.0,
1004            normalized_depth: 0.0,
1005            fanout_concentration: 0.0,
1006            cycle_ratio: 0.0,
1007            fragmentation: 0.0,
1008        };
1009        return (100.0, ComplexityLevel::Low, factors);
1010    }
1011
1012    // Factor 1: dependency volume — log-scaled edge density
1013    let edge_ratio = edges as f64 / components as f64;
1014    let dependency_volume = ((1.0 + edge_ratio).ln() / 20.0_f64.ln()).min(1.0) as f32;
1015
1016    // Factor 2: normalized depth
1017    let normalized_depth = (max_depth as f32 / 15.0).min(1.0);
1018
1019    // Factor 3: fanout concentration — hub dominance
1020    // Floor of 4.0 prevents small graphs from being penalized for max_out_degree of 1
1021    let fanout_denom = (components as f32 * 0.25).max(4.0);
1022    let fanout_concentration = (max_out_degree as f32 / fanout_denom).min(1.0);
1023
1024    // Factor 4: cycle ratio
1025    let cycle_threshold = (components as f32 * 0.05).max(1.0);
1026    let cycle_ratio = (cycle_count as f32 / cycle_threshold).min(1.0);
1027
1028    // Factor 5: fragmentation — extra disconnected subgraphs beyond the ideal of 1
1029    // Uses (islands - 1) because orphans are already counted as individual islands.
1030    let extra_islands = islands.saturating_sub(1);
1031    let fragmentation = if components > 1 {
1032        (extra_islands as f32 / (components - 1) as f32).min(1.0)
1033    } else {
1034        0.0
1035    };
1036
1037    let factors = ComplexityFactors {
1038        dependency_volume,
1039        normalized_depth,
1040        fanout_concentration,
1041        cycle_ratio,
1042        fragmentation,
1043    };
1044
1045    let raw_complexity = 0.30 * dependency_volume
1046        + 0.20 * normalized_depth
1047        + 0.20 * fanout_concentration
1048        + 0.20 * cycle_ratio
1049        + 0.10 * fragmentation;
1050
1051    let simplicity_index = (100.0 - raw_complexity * 100.0).clamp(0.0, 100.0);
1052    let level = ComplexityLevel::from_score(simplicity_index);
1053
1054    (simplicity_index, level, factors)
1055}
1056
1057// ============================================================================
1058// Provenance metrics
1059// ============================================================================
1060
1061/// Document provenance and authorship quality metrics
1062#[derive(Debug, Clone, Serialize, Deserialize)]
1063pub struct ProvenanceMetrics {
1064    /// Whether the SBOM was created by an identified tool
1065    pub has_tool_creator: bool,
1066    /// Whether the tool creator includes version information
1067    pub has_tool_version: bool,
1068    /// Whether an organization is identified as creator
1069    pub has_org_creator: bool,
1070    /// Whether any creator has a contact email
1071    pub has_contact_email: bool,
1072    /// Whether the document has a serial number / namespace
1073    pub has_serial_number: bool,
1074    /// Whether the document has a name
1075    pub has_document_name: bool,
1076    /// Age of the SBOM in days (since creation timestamp)
1077    pub timestamp_age_days: u32,
1078    /// Whether the SBOM is considered fresh (< 90 days old)
1079    pub is_fresh: bool,
1080    /// Whether a primary/described component is identified
1081    pub has_primary_component: bool,
1082    /// SBOM lifecycle phase (from CycloneDX 1.5+ metadata)
1083    pub lifecycle_phase: Option<String>,
1084    /// Self-declared completeness level of the SBOM
1085    pub completeness_declaration: CompletenessDeclaration,
1086    /// Whether the SBOM has a digital signature
1087    pub has_signature: bool,
1088}
1089
1090/// Freshness threshold in days
1091const FRESHNESS_THRESHOLD_DAYS: u32 = 90;
1092
1093impl ProvenanceMetrics {
1094    /// Calculate provenance metrics from an SBOM
1095    #[must_use]
1096    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
1097        let doc = &sbom.document;
1098
1099        let has_tool_creator = doc
1100            .creators
1101            .iter()
1102            .any(|c| c.creator_type == CreatorType::Tool);
1103        let has_tool_version = doc.creators.iter().any(|c| {
1104            c.creator_type == CreatorType::Tool
1105                && (c.name.contains(' ') || c.name.contains('/') || c.name.contains('@'))
1106        });
1107        let has_org_creator = doc
1108            .creators
1109            .iter()
1110            .any(|c| c.creator_type == CreatorType::Organization);
1111        let has_contact_email = doc.creators.iter().any(|c| c.email.is_some());
1112
1113        let age_days = (chrono::Utc::now() - doc.created).num_days().max(0) as u32;
1114
1115        Self {
1116            has_tool_creator,
1117            has_tool_version,
1118            has_org_creator,
1119            has_contact_email,
1120            has_serial_number: doc.serial_number.is_some(),
1121            has_document_name: doc.name.is_some(),
1122            timestamp_age_days: age_days,
1123            is_fresh: age_days < FRESHNESS_THRESHOLD_DAYS,
1124            has_primary_component: sbom.primary_component_id.is_some(),
1125            lifecycle_phase: doc.lifecycle_phase.clone(),
1126            completeness_declaration: doc.completeness_declaration.clone(),
1127            has_signature: doc.signature.is_some(),
1128        }
1129    }
1130
1131    /// Calculate provenance quality score (0-100)
1132    ///
1133    /// Weighted checklist: tool creator (15%), tool version (5%), org creator (12%),
1134    /// contact email (8%), serial number (8%), document name (5%), freshness (12%),
1135    /// primary component (12%), completeness declaration (8%), signature (5%),
1136    /// lifecycle phase (10% CDX-only).
1137    #[must_use]
1138    pub fn quality_score(&self, is_cyclonedx: bool) -> f32 {
1139        let mut score = 0.0;
1140        let mut total_weight = 0.0;
1141
1142        let completeness_declared =
1143            self.completeness_declaration != CompletenessDeclaration::Unknown;
1144
1145        let checks: &[(bool, f32)] = &[
1146            (self.has_tool_creator, 15.0),
1147            (self.has_tool_version, 5.0),
1148            (self.has_org_creator, 12.0),
1149            (self.has_contact_email, 8.0),
1150            (self.has_serial_number, 8.0),
1151            (self.has_document_name, 5.0),
1152            (self.is_fresh, 12.0),
1153            (self.has_primary_component, 12.0),
1154            (completeness_declared, 8.0),
1155            (self.has_signature, 5.0),
1156        ];
1157
1158        for &(present, weight) in checks {
1159            if present {
1160                score += weight;
1161            }
1162            total_weight += weight;
1163        }
1164
1165        // Lifecycle phase: only applicable for CycloneDX 1.5+
1166        if is_cyclonedx {
1167            let weight = 10.0;
1168            if self.lifecycle_phase.is_some() {
1169                score += weight;
1170            }
1171            total_weight += weight;
1172        }
1173
1174        if total_weight > 0.0 {
1175            (score / total_weight) * 100.0
1176        } else {
1177            0.0
1178        }
1179    }
1180}
1181
1182// ============================================================================
1183// Auditability metrics
1184// ============================================================================
1185
1186/// External reference and auditability quality metrics
1187#[derive(Debug, Clone, Serialize, Deserialize)]
1188pub struct AuditabilityMetrics {
1189    /// Components with VCS (version control) references
1190    pub components_with_vcs: usize,
1191    /// Components with website references
1192    pub components_with_website: usize,
1193    /// Components with security advisory references
1194    pub components_with_advisories: usize,
1195    /// Components with any external reference
1196    pub components_with_any_external_ref: usize,
1197    /// Whether the document has a security contact
1198    pub has_security_contact: bool,
1199    /// Whether the document has a vulnerability disclosure URL
1200    pub has_vuln_disclosure_url: bool,
1201}
1202
1203impl AuditabilityMetrics {
1204    /// Calculate auditability metrics from an SBOM
1205    #[must_use]
1206    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
1207        let mut with_vcs = 0;
1208        let mut with_website = 0;
1209        let mut with_advisories = 0;
1210        let mut with_any = 0;
1211
1212        for comp in sbom.components.values() {
1213            if comp.external_refs.is_empty() {
1214                continue;
1215            }
1216            with_any += 1;
1217
1218            let has_vcs = comp
1219                .external_refs
1220                .iter()
1221                .any(|r| r.ref_type == ExternalRefType::Vcs);
1222            let has_website = comp
1223                .external_refs
1224                .iter()
1225                .any(|r| r.ref_type == ExternalRefType::Website);
1226            let has_advisories = comp
1227                .external_refs
1228                .iter()
1229                .any(|r| r.ref_type == ExternalRefType::Advisories);
1230
1231            if has_vcs {
1232                with_vcs += 1;
1233            }
1234            if has_website {
1235                with_website += 1;
1236            }
1237            if has_advisories {
1238                with_advisories += 1;
1239            }
1240        }
1241
1242        Self {
1243            components_with_vcs: with_vcs,
1244            components_with_website: with_website,
1245            components_with_advisories: with_advisories,
1246            components_with_any_external_ref: with_any,
1247            has_security_contact: sbom.document.security_contact.is_some(),
1248            has_vuln_disclosure_url: sbom.document.vulnerability_disclosure_url.is_some(),
1249        }
1250    }
1251
1252    /// Calculate auditability quality score (0-100)
1253    ///
1254    /// Component-level coverage (60%) + document-level security metadata (40%).
1255    #[must_use]
1256    pub fn quality_score(&self, total_components: usize) -> f32 {
1257        if total_components == 0 {
1258            return 0.0;
1259        }
1260
1261        // Component-level: external ref coverage
1262        let ref_coverage =
1263            (self.components_with_any_external_ref as f32 / total_components as f32) * 40.0;
1264        let vcs_coverage = (self.components_with_vcs as f32 / total_components as f32) * 20.0;
1265
1266        // Document-level security metadata
1267        let security_contact_score = if self.has_security_contact { 20.0 } else { 0.0 };
1268        let disclosure_score = if self.has_vuln_disclosure_url {
1269            20.0
1270        } else {
1271            0.0
1272        };
1273
1274        (ref_coverage + vcs_coverage + security_contact_score + disclosure_score).min(100.0)
1275    }
1276}
1277
1278// ============================================================================
1279// Lifecycle metrics
1280// ============================================================================
1281
1282/// Component lifecycle quality metrics (requires enrichment data)
1283#[derive(Debug, Clone, Serialize, Deserialize)]
1284pub struct LifecycleMetrics {
1285    /// Components that have reached end-of-life
1286    pub eol_components: usize,
1287    /// Components classified as stale (no updates for 1+ years)
1288    pub stale_components: usize,
1289    /// Components explicitly marked as deprecated
1290    pub deprecated_components: usize,
1291    /// Components with archived repositories
1292    pub archived_components: usize,
1293    /// Components with a newer version available
1294    pub outdated_components: usize,
1295    /// Components that had lifecycle enrichment data
1296    pub enriched_components: usize,
1297    /// Enrichment coverage percentage (0-100)
1298    pub enrichment_coverage: f32,
1299}
1300
1301impl LifecycleMetrics {
1302    /// Calculate lifecycle metrics from an SBOM
1303    ///
1304    /// These metrics are only meaningful after enrichment. When
1305    /// `enrichment_coverage == 0`, the lifecycle score should be
1306    /// treated as N/A and excluded from the weighted total.
1307    #[must_use]
1308    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
1309        let total = sbom.components.len();
1310        let mut eol = 0;
1311        let mut stale = 0;
1312        let mut deprecated = 0;
1313        let mut archived = 0;
1314        let mut outdated = 0;
1315        let mut enriched = 0;
1316
1317        for comp in sbom.components.values() {
1318            let has_lifecycle_data = comp.eol.is_some() || comp.staleness.is_some();
1319            if has_lifecycle_data {
1320                enriched += 1;
1321            }
1322
1323            if let Some(ref eol_info) = comp.eol
1324                && eol_info.status == EolStatus::EndOfLife
1325            {
1326                eol += 1;
1327            }
1328
1329            if let Some(ref stale_info) = comp.staleness {
1330                match stale_info.level {
1331                    StalenessLevel::Stale | StalenessLevel::Abandoned => stale += 1,
1332                    StalenessLevel::Deprecated => deprecated += 1,
1333                    StalenessLevel::Archived => archived += 1,
1334                    _ => {}
1335                }
1336                if stale_info.is_deprecated {
1337                    deprecated += 1;
1338                }
1339                if stale_info.is_archived {
1340                    archived += 1;
1341                }
1342                if stale_info.latest_version.is_some() {
1343                    outdated += 1;
1344                }
1345            }
1346        }
1347
1348        let coverage = if total > 0 {
1349            (enriched as f32 / total as f32) * 100.0
1350        } else {
1351            0.0
1352        };
1353
1354        Self {
1355            eol_components: eol,
1356            stale_components: stale,
1357            deprecated_components: deprecated,
1358            archived_components: archived,
1359            outdated_components: outdated,
1360            enriched_components: enriched,
1361            enrichment_coverage: coverage,
1362        }
1363    }
1364
1365    /// Whether enrichment data is available for scoring
1366    #[must_use]
1367    pub fn has_data(&self) -> bool {
1368        self.enriched_components > 0
1369    }
1370
1371    /// Calculate lifecycle quality score (0-100)
1372    ///
1373    /// Starts at 100, subtracts penalties for problematic components.
1374    /// Returns `None` if no enrichment data is available.
1375    #[must_use]
1376    pub fn quality_score(&self) -> Option<f32> {
1377        if !self.has_data() {
1378            return None;
1379        }
1380
1381        let mut score = 100.0_f32;
1382
1383        // EOL: severe penalty (15 points each, capped at 60)
1384        score -= (self.eol_components as f32 * 15.0).min(60.0);
1385        // Stale: moderate penalty (5 points each, capped at 30)
1386        score -= (self.stale_components as f32 * 5.0).min(30.0);
1387        // Deprecated/archived: moderate penalty (3 points each, capped at 20)
1388        score -= ((self.deprecated_components + self.archived_components) as f32 * 3.0).min(20.0);
1389        // Outdated: mild penalty (1 point each, capped at 10)
1390        score -= (self.outdated_components as f32 * 1.0).min(10.0);
1391
1392        Some(score.clamp(0.0, 100.0))
1393    }
1394}
1395
1396// ============================================================================
1397// Helper functions
1398// ============================================================================
1399
1400fn is_valid_purl(purl: &str) -> bool {
1401    // Basic PURL validation: pkg:type/namespace/name@version
1402    purl.starts_with("pkg:") && purl.contains('/')
1403}
1404
1405fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
1406    // Extract type from pkg:type/...
1407    if let Some(rest) = purl.strip_prefix("pkg:")
1408        && let Some(slash_idx) = rest.find('/')
1409    {
1410        return Some(rest[..slash_idx].to_string());
1411    }
1412    None
1413}
1414
1415fn is_valid_cpe(cpe: &str) -> bool {
1416    // Basic CPE validation
1417    cpe.starts_with("cpe:2.3:") || cpe.starts_with("cpe:/")
1418}
1419
1420fn is_valid_spdx_license(expr: &str) -> bool {
1421    // Common SPDX license identifiers
1422    const COMMON_SPDX: &[&str] = &[
1423        "MIT",
1424        "Apache-2.0",
1425        "GPL-2.0",
1426        "GPL-3.0",
1427        "BSD-2-Clause",
1428        "BSD-3-Clause",
1429        "ISC",
1430        "MPL-2.0",
1431        "LGPL-2.1",
1432        "LGPL-3.0",
1433        "AGPL-3.0",
1434        "Unlicense",
1435        "CC0-1.0",
1436        "0BSD",
1437        "EPL-2.0",
1438        "CDDL-1.0",
1439        "Artistic-2.0",
1440        "GPL-2.0-only",
1441        "GPL-2.0-or-later",
1442        "GPL-3.0-only",
1443        "GPL-3.0-or-later",
1444        "LGPL-2.1-only",
1445        "LGPL-2.1-or-later",
1446        "LGPL-3.0-only",
1447        "LGPL-3.0-or-later",
1448    ];
1449
1450    // Check for common licenses or expressions
1451    let trimmed = expr.trim();
1452    COMMON_SPDX.contains(&trimmed)
1453        || trimmed.contains(" AND ")
1454        || trimmed.contains(" OR ")
1455        || trimmed.contains(" WITH ")
1456}
1457
1458/// Whether a license identifier is on the SPDX deprecated list.
1459///
1460/// These are license IDs that SPDX has deprecated in favor of more specific
1461/// identifiers (e.g., `GPL-2.0` → `GPL-2.0-only` or `GPL-2.0-or-later`).
1462fn is_deprecated_spdx_license(expr: &str) -> bool {
1463    const DEPRECATED: &[&str] = &[
1464        "GPL-2.0",
1465        "GPL-2.0+",
1466        "GPL-3.0",
1467        "GPL-3.0+",
1468        "LGPL-2.0",
1469        "LGPL-2.0+",
1470        "LGPL-2.1",
1471        "LGPL-2.1+",
1472        "LGPL-3.0",
1473        "LGPL-3.0+",
1474        "AGPL-1.0",
1475        "AGPL-3.0",
1476        "GFDL-1.1",
1477        "GFDL-1.2",
1478        "GFDL-1.3",
1479        "BSD-2-Clause-FreeBSD",
1480        "BSD-2-Clause-NetBSD",
1481        "eCos-2.0",
1482        "Nunit",
1483        "StandardML-NJ",
1484        "wxWindows",
1485    ];
1486    let trimmed = expr.trim();
1487    DEPRECATED.contains(&trimmed)
1488}
1489
1490/// Whether a license is considered restrictive/copyleft (GPL family).
1491///
1492/// This is informational — restrictive licenses are not inherently a quality
1493/// issue, but organizations need to know about them for compliance.
1494fn is_restrictive_license(expr: &str) -> bool {
1495    let trimmed = expr.trim().to_uppercase();
1496    trimmed.starts_with("GPL")
1497        || trimmed.starts_with("LGPL")
1498        || trimmed.starts_with("AGPL")
1499        || trimmed.starts_with("EUPL")
1500        || trimmed.starts_with("SSPL")
1501        || trimmed.starts_with("OSL")
1502        || trimmed.starts_with("CPAL")
1503        || trimmed.starts_with("CC-BY-SA")
1504        || trimmed.starts_with("CC-BY-NC")
1505}
1506
1507#[cfg(test)]
1508mod tests {
1509    use super::*;
1510
1511    #[test]
1512    fn test_purl_validation() {
1513        assert!(is_valid_purl("pkg:npm/@scope/name@1.0.0"));
1514        assert!(is_valid_purl("pkg:maven/group/artifact@1.0"));
1515        assert!(!is_valid_purl("npm:something"));
1516        assert!(!is_valid_purl("invalid"));
1517    }
1518
1519    #[test]
1520    fn test_cpe_validation() {
1521        assert!(is_valid_cpe("cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*"));
1522        assert!(is_valid_cpe("cpe:/a:vendor:product:1.0"));
1523        assert!(!is_valid_cpe("something:else"));
1524    }
1525
1526    #[test]
1527    fn test_spdx_license_validation() {
1528        assert!(is_valid_spdx_license("MIT"));
1529        assert!(is_valid_spdx_license("Apache-2.0"));
1530        assert!(is_valid_spdx_license("MIT AND Apache-2.0"));
1531        assert!(is_valid_spdx_license("GPL-2.0 OR MIT"));
1532    }
1533
1534    #[test]
1535    fn test_strong_hash_classification() {
1536        assert!(is_strong_hash(&HashAlgorithm::Sha256));
1537        assert!(is_strong_hash(&HashAlgorithm::Sha3_256));
1538        assert!(is_strong_hash(&HashAlgorithm::Blake3));
1539        assert!(!is_strong_hash(&HashAlgorithm::Md5));
1540        assert!(!is_strong_hash(&HashAlgorithm::Sha1));
1541        assert!(!is_strong_hash(&HashAlgorithm::Other("custom".to_string())));
1542    }
1543
1544    #[test]
1545    fn test_deprecated_license_detection() {
1546        assert!(is_deprecated_spdx_license("GPL-2.0"));
1547        assert!(is_deprecated_spdx_license("LGPL-2.1"));
1548        assert!(is_deprecated_spdx_license("AGPL-3.0"));
1549        assert!(!is_deprecated_spdx_license("GPL-2.0-only"));
1550        assert!(!is_deprecated_spdx_license("MIT"));
1551        assert!(!is_deprecated_spdx_license("Apache-2.0"));
1552    }
1553
1554    #[test]
1555    fn test_restrictive_license_detection() {
1556        assert!(is_restrictive_license("GPL-3.0-only"));
1557        assert!(is_restrictive_license("LGPL-2.1-or-later"));
1558        assert!(is_restrictive_license("AGPL-3.0-only"));
1559        assert!(is_restrictive_license("EUPL-1.2"));
1560        assert!(is_restrictive_license("CC-BY-SA-4.0"));
1561        assert!(!is_restrictive_license("MIT"));
1562        assert!(!is_restrictive_license("Apache-2.0"));
1563        assert!(!is_restrictive_license("BSD-3-Clause"));
1564    }
1565
1566    #[test]
1567    fn test_hash_quality_score_no_components() {
1568        let metrics = HashQualityMetrics {
1569            components_with_any_hash: 0,
1570            components_with_strong_hash: 0,
1571            components_with_weak_only: 0,
1572            algorithm_distribution: BTreeMap::new(),
1573            total_hashes: 0,
1574        };
1575        assert_eq!(metrics.quality_score(0), 0.0);
1576    }
1577
1578    #[test]
1579    fn test_hash_quality_score_all_strong() {
1580        let metrics = HashQualityMetrics {
1581            components_with_any_hash: 10,
1582            components_with_strong_hash: 10,
1583            components_with_weak_only: 0,
1584            algorithm_distribution: BTreeMap::new(),
1585            total_hashes: 10,
1586        };
1587        assert_eq!(metrics.quality_score(10), 100.0);
1588    }
1589
1590    #[test]
1591    fn test_hash_quality_score_weak_only_penalty() {
1592        let metrics = HashQualityMetrics {
1593            components_with_any_hash: 10,
1594            components_with_strong_hash: 0,
1595            components_with_weak_only: 10,
1596            algorithm_distribution: BTreeMap::new(),
1597            total_hashes: 10,
1598        };
1599        // 60 (any) + 0 (strong) - 10 (weak penalty) = 50
1600        assert_eq!(metrics.quality_score(10), 50.0);
1601    }
1602
1603    #[test]
1604    fn test_lifecycle_no_enrichment_returns_none() {
1605        let metrics = LifecycleMetrics {
1606            eol_components: 0,
1607            stale_components: 0,
1608            deprecated_components: 0,
1609            archived_components: 0,
1610            outdated_components: 0,
1611            enriched_components: 0,
1612            enrichment_coverage: 0.0,
1613        };
1614        assert!(!metrics.has_data());
1615        assert!(metrics.quality_score().is_none());
1616    }
1617
1618    #[test]
1619    fn test_lifecycle_with_eol_penalty() {
1620        let metrics = LifecycleMetrics {
1621            eol_components: 2,
1622            stale_components: 0,
1623            deprecated_components: 0,
1624            archived_components: 0,
1625            outdated_components: 0,
1626            enriched_components: 10,
1627            enrichment_coverage: 100.0,
1628        };
1629        // 100 - 30 (2 * 15) = 70
1630        assert_eq!(metrics.quality_score(), Some(70.0));
1631    }
1632
1633    #[test]
1634    fn test_cycle_detection_no_cycles() {
1635        let children: HashMap<&str, Vec<&str>> =
1636            HashMap::from([("a", vec!["b"]), ("b", vec!["c"])]);
1637        let all_nodes = vec!["a", "b", "c"];
1638        assert_eq!(detect_cycles(&all_nodes, &children), 0);
1639    }
1640
1641    #[test]
1642    fn test_cycle_detection_with_cycle() {
1643        let children: HashMap<&str, Vec<&str>> =
1644            HashMap::from([("a", vec!["b"]), ("b", vec!["c"]), ("c", vec!["a"])]);
1645        let all_nodes = vec!["a", "b", "c"];
1646        assert_eq!(detect_cycles(&all_nodes, &children), 1);
1647    }
1648
1649    #[test]
1650    fn test_depth_computation() {
1651        let children: HashMap<&str, Vec<&str>> =
1652            HashMap::from([("root", vec!["a", "b"]), ("a", vec!["c"])]);
1653        let roots = vec!["root"];
1654        let (max_d, avg_d) = compute_depth(&roots, &children);
1655        assert_eq!(max_d, Some(2)); // root -> a -> c
1656        assert!(avg_d.is_some());
1657    }
1658
1659    #[test]
1660    fn test_depth_empty_roots() {
1661        let children: HashMap<&str, Vec<&str>> = HashMap::new();
1662        let roots: Vec<&str> = vec![];
1663        let (max_d, avg_d) = compute_depth(&roots, &children);
1664        assert_eq!(max_d, None);
1665        assert_eq!(avg_d, None);
1666    }
1667
1668    #[test]
1669    fn test_provenance_quality_score() {
1670        let metrics = ProvenanceMetrics {
1671            has_tool_creator: true,
1672            has_tool_version: true,
1673            has_org_creator: true,
1674            has_contact_email: true,
1675            has_serial_number: true,
1676            has_document_name: true,
1677            timestamp_age_days: 10,
1678            is_fresh: true,
1679            has_primary_component: true,
1680            lifecycle_phase: Some("build".to_string()),
1681            completeness_declaration: CompletenessDeclaration::Complete,
1682            has_signature: true,
1683        };
1684        // All checks pass for CycloneDX
1685        assert_eq!(metrics.quality_score(true), 100.0);
1686    }
1687
1688    #[test]
1689    fn test_provenance_score_without_cyclonedx() {
1690        let metrics = ProvenanceMetrics {
1691            has_tool_creator: true,
1692            has_tool_version: true,
1693            has_org_creator: true,
1694            has_contact_email: true,
1695            has_serial_number: true,
1696            has_document_name: true,
1697            timestamp_age_days: 10,
1698            is_fresh: true,
1699            has_primary_component: true,
1700            lifecycle_phase: None,
1701            completeness_declaration: CompletenessDeclaration::Complete,
1702            has_signature: true,
1703        };
1704        // Lifecycle phase excluded for non-CDX
1705        assert_eq!(metrics.quality_score(false), 100.0);
1706    }
1707
1708    #[test]
1709    fn test_complexity_empty_graph() {
1710        let (simplicity, level, factors) = compute_complexity(0, 0, 0, 0, 0, 0, 0);
1711        assert_eq!(simplicity, 100.0);
1712        assert_eq!(level, ComplexityLevel::Low);
1713        assert_eq!(factors.dependency_volume, 0.0);
1714    }
1715
1716    #[test]
1717    fn test_complexity_single_node() {
1718        // 1 component, no edges, no cycles, 1 orphan, 1 island
1719        let (simplicity, level, _) = compute_complexity(0, 1, 0, 0, 0, 1, 1);
1720        assert!(
1721            simplicity >= 80.0,
1722            "Single node simplicity {simplicity} should be >= 80"
1723        );
1724        assert_eq!(level, ComplexityLevel::Low);
1725    }
1726
1727    #[test]
1728    fn test_complexity_monotonic_edges() {
1729        // More edges should never increase simplicity
1730        let (s1, _, _) = compute_complexity(5, 10, 2, 3, 0, 1, 1);
1731        let (s2, _, _) = compute_complexity(20, 10, 2, 3, 0, 1, 1);
1732        assert!(
1733            s2 <= s1,
1734            "More edges should not increase simplicity: {s2} vs {s1}"
1735        );
1736    }
1737
1738    #[test]
1739    fn test_complexity_monotonic_cycles() {
1740        let (s1, _, _) = compute_complexity(10, 10, 2, 3, 0, 1, 1);
1741        let (s2, _, _) = compute_complexity(10, 10, 2, 3, 3, 1, 1);
1742        assert!(
1743            s2 <= s1,
1744            "More cycles should not increase simplicity: {s2} vs {s1}"
1745        );
1746    }
1747
1748    #[test]
1749    fn test_complexity_monotonic_depth() {
1750        let (s1, _, _) = compute_complexity(10, 10, 2, 3, 0, 1, 1);
1751        let (s2, _, _) = compute_complexity(10, 10, 10, 3, 0, 1, 1);
1752        assert!(
1753            s2 <= s1,
1754            "More depth should not increase simplicity: {s2} vs {s1}"
1755        );
1756    }
1757
1758    #[test]
1759    fn test_complexity_graph_skipped() {
1760        // When graph_analysis_skipped, DependencyMetrics should have None complexity fields.
1761        // We test compute_complexity separately; the from_sbom integration handles the None case.
1762        let (simplicity, _, _) = compute_complexity(100, 50, 5, 10, 2, 5, 3);
1763        assert!(simplicity >= 0.0 && simplicity <= 100.0);
1764    }
1765
1766    #[test]
1767    fn test_complexity_level_bands() {
1768        assert_eq!(ComplexityLevel::from_score(100.0), ComplexityLevel::Low);
1769        assert_eq!(ComplexityLevel::from_score(75.0), ComplexityLevel::Low);
1770        assert_eq!(ComplexityLevel::from_score(74.0), ComplexityLevel::Moderate);
1771        assert_eq!(ComplexityLevel::from_score(50.0), ComplexityLevel::Moderate);
1772        assert_eq!(ComplexityLevel::from_score(49.0), ComplexityLevel::High);
1773        assert_eq!(ComplexityLevel::from_score(25.0), ComplexityLevel::High);
1774        assert_eq!(ComplexityLevel::from_score(24.0), ComplexityLevel::VeryHigh);
1775        assert_eq!(ComplexityLevel::from_score(0.0), ComplexityLevel::VeryHigh);
1776    }
1777
1778    #[test]
1779    fn test_completeness_declaration_display() {
1780        assert_eq!(CompletenessDeclaration::Complete.to_string(), "complete");
1781        assert_eq!(
1782            CompletenessDeclaration::IncompleteFirstPartyOnly.to_string(),
1783            "incomplete (first-party only)"
1784        );
1785        assert_eq!(CompletenessDeclaration::Unknown.to_string(), "unknown");
1786    }
1787}
sbom_tools/quality/metrics.rs

sbom_tools/quality/
metrics.rs