sbom_tools/quality/
metrics.rs

1//! Quality metrics for SBOM assessment.
2//!
3//! Provides detailed metrics for different aspects of SBOM quality.
4
5use std::collections::{BTreeMap, HashMap, HashSet};
6
7use crate::model::{
8    CompletenessDeclaration, CreatorType, EolStatus, ExternalRefType, HashAlgorithm,
9    NormalizedSbom, StalenessLevel,
10};
11use serde::{Deserialize, Serialize};
12
13/// Overall completeness metrics for an SBOM
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct CompletenessMetrics {
16    /// Percentage of components with versions (0-100)
17    pub components_with_version: f32,
18    /// Percentage of components with PURLs (0-100)
19    pub components_with_purl: f32,
20    /// Percentage of components with CPEs (0-100)
21    pub components_with_cpe: f32,
22    /// Percentage of components with suppliers (0-100)
23    pub components_with_supplier: f32,
24    /// Percentage of components with hashes (0-100)
25    pub components_with_hashes: f32,
26    /// Percentage of components with licenses (0-100)
27    pub components_with_licenses: f32,
28    /// Percentage of components with descriptions (0-100)
29    pub components_with_description: f32,
30    /// Whether document has creator information
31    pub has_creator_info: bool,
32    /// Whether document has timestamp
33    pub has_timestamp: bool,
34    /// Whether document has serial number/ID
35    pub has_serial_number: bool,
36    /// Total component count
37    pub total_components: usize,
38}
39
40impl CompletenessMetrics {
41    /// Calculate completeness metrics from an SBOM
42    #[must_use]
43    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
44        let total = sbom.components.len();
45        if total == 0 {
46            return Self::empty();
47        }
48
49        let mut with_version = 0;
50        let mut with_purl = 0;
51        let mut with_cpe = 0;
52        let mut with_supplier = 0;
53        let mut with_hashes = 0;
54        let mut with_licenses = 0;
55        let mut with_description = 0;
56
57        for comp in sbom.components.values() {
58            if comp.version.is_some() {
59                with_version += 1;
60            }
61            if comp.identifiers.purl.is_some() {
62                with_purl += 1;
63            }
64            if !comp.identifiers.cpe.is_empty() {
65                with_cpe += 1;
66            }
67            if comp.supplier.is_some() {
68                with_supplier += 1;
69            }
70            if !comp.hashes.is_empty() {
71                with_hashes += 1;
72            }
73            if !comp.licenses.declared.is_empty() || comp.licenses.concluded.is_some() {
74                with_licenses += 1;
75            }
76            if comp.description.is_some() {
77                with_description += 1;
78            }
79        }
80
81        let pct = |count: usize| (count as f32 / total as f32) * 100.0;
82
83        Self {
84            components_with_version: pct(with_version),
85            components_with_purl: pct(with_purl),
86            components_with_cpe: pct(with_cpe),
87            components_with_supplier: pct(with_supplier),
88            components_with_hashes: pct(with_hashes),
89            components_with_licenses: pct(with_licenses),
90            components_with_description: pct(with_description),
91            has_creator_info: !sbom.document.creators.is_empty(),
92            has_timestamp: true, // Always set in our model
93            has_serial_number: sbom.document.serial_number.is_some(),
94            total_components: total,
95        }
96    }
97
98    /// Create empty metrics
99    #[must_use]
100    pub const fn empty() -> Self {
101        Self {
102            components_with_version: 0.0,
103            components_with_purl: 0.0,
104            components_with_cpe: 0.0,
105            components_with_supplier: 0.0,
106            components_with_hashes: 0.0,
107            components_with_licenses: 0.0,
108            components_with_description: 0.0,
109            has_creator_info: false,
110            has_timestamp: false,
111            has_serial_number: false,
112            total_components: 0,
113        }
114    }
115
116    /// Calculate overall completeness score (0-100)
117    #[must_use]
118    pub fn overall_score(&self, weights: &CompletenessWeights) -> f32 {
119        let mut score = 0.0;
120        let mut total_weight = 0.0;
121
122        // Component field scores
123        score += self.components_with_version * weights.version;
124        total_weight += weights.version * 100.0;
125
126        score += self.components_with_purl * weights.purl;
127        total_weight += weights.purl * 100.0;
128
129        score += self.components_with_cpe * weights.cpe;
130        total_weight += weights.cpe * 100.0;
131
132        score += self.components_with_supplier * weights.supplier;
133        total_weight += weights.supplier * 100.0;
134
135        score += self.components_with_hashes * weights.hashes;
136        total_weight += weights.hashes * 100.0;
137
138        score += self.components_with_licenses * weights.licenses;
139        total_weight += weights.licenses * 100.0;
140
141        // Document metadata scores
142        if self.has_creator_info {
143            score += 100.0 * weights.creator_info;
144        }
145        total_weight += weights.creator_info * 100.0;
146
147        if self.has_serial_number {
148            score += 100.0 * weights.serial_number;
149        }
150        total_weight += weights.serial_number * 100.0;
151
152        if total_weight > 0.0 {
153            (score / total_weight) * 100.0
154        } else {
155            0.0
156        }
157    }
158}
159
160/// Weights for completeness score calculation
161#[derive(Debug, Clone)]
162pub struct CompletenessWeights {
163    pub version: f32,
164    pub purl: f32,
165    pub cpe: f32,
166    pub supplier: f32,
167    pub hashes: f32,
168    pub licenses: f32,
169    pub creator_info: f32,
170    pub serial_number: f32,
171}
172
173impl Default for CompletenessWeights {
174    fn default() -> Self {
175        Self {
176            version: 1.0,
177            purl: 1.5, // Higher weight for PURL
178            cpe: 0.5,  // Lower weight, nice to have
179            supplier: 1.0,
180            hashes: 1.0,
181            licenses: 1.2, // Important for compliance
182            creator_info: 0.3,
183            serial_number: 0.2,
184        }
185    }
186}
187
188// ============================================================================
189// Hash quality metrics
190// ============================================================================
191
192/// Hash/integrity quality metrics
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct HashQualityMetrics {
195    /// Components with any hash
196    pub components_with_any_hash: usize,
197    /// Components with at least one strong hash (SHA-256+, SHA-3, BLAKE, Blake3)
198    pub components_with_strong_hash: usize,
199    /// Components with only weak hashes (MD5, SHA-1) and no strong backup
200    pub components_with_weak_only: usize,
201    /// Distribution of hash algorithms across all components
202    pub algorithm_distribution: BTreeMap<String, usize>,
203    /// Total hash entries across all components
204    pub total_hashes: usize,
205}
206
207impl HashQualityMetrics {
208    /// Calculate hash quality metrics from an SBOM
209    #[must_use]
210    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
211        let mut with_any = 0;
212        let mut with_strong = 0;
213        let mut with_weak_only = 0;
214        let mut distribution: BTreeMap<String, usize> = BTreeMap::new();
215        let mut total_hashes = 0;
216
217        for comp in sbom.components.values() {
218            if comp.hashes.is_empty() {
219                continue;
220            }
221            with_any += 1;
222            total_hashes += comp.hashes.len();
223
224            let mut has_strong = false;
225            let mut has_weak = false;
226
227            for hash in &comp.hashes {
228                let label = hash_algorithm_label(&hash.algorithm);
229                *distribution.entry(label).or_insert(0) += 1;
230
231                if is_strong_hash(&hash.algorithm) {
232                    has_strong = true;
233                } else {
234                    has_weak = true;
235                }
236            }
237
238            if has_strong {
239                with_strong += 1;
240            } else if has_weak {
241                with_weak_only += 1;
242            }
243        }
244
245        Self {
246            components_with_any_hash: with_any,
247            components_with_strong_hash: with_strong,
248            components_with_weak_only: with_weak_only,
249            algorithm_distribution: distribution,
250            total_hashes,
251        }
252    }
253
254    /// Calculate integrity quality score (0-100)
255    ///
256    /// Base 60% for any-hash coverage + 40% bonus for strong-hash coverage,
257    /// with a penalty for weak-only components.
258    #[must_use]
259    pub fn quality_score(&self, total_components: usize) -> f32 {
260        if total_components == 0 {
261            return 0.0;
262        }
263
264        let any_coverage = self.components_with_any_hash as f32 / total_components as f32;
265        let strong_coverage = self.components_with_strong_hash as f32 / total_components as f32;
266        let weak_only_ratio = self.components_with_weak_only as f32 / total_components as f32;
267
268        let base = any_coverage * 60.0;
269        let strong_bonus = strong_coverage * 40.0;
270        let weak_penalty = weak_only_ratio * 10.0;
271
272        (base + strong_bonus - weak_penalty).clamp(0.0, 100.0)
273    }
274}
275
276/// Whether a hash algorithm is considered cryptographically strong
277fn is_strong_hash(algo: &HashAlgorithm) -> bool {
278    matches!(
279        algo,
280        HashAlgorithm::Sha256
281            | HashAlgorithm::Sha384
282            | HashAlgorithm::Sha512
283            | HashAlgorithm::Sha3_256
284            | HashAlgorithm::Sha3_384
285            | HashAlgorithm::Sha3_512
286            | HashAlgorithm::Blake2b256
287            | HashAlgorithm::Blake2b384
288            | HashAlgorithm::Blake2b512
289            | HashAlgorithm::Blake3
290            | HashAlgorithm::Streebog256
291            | HashAlgorithm::Streebog512
292    )
293}
294
295/// Human-readable label for a hash algorithm
296fn hash_algorithm_label(algo: &HashAlgorithm) -> String {
297    match algo {
298        HashAlgorithm::Md5 => "MD5".to_string(),
299        HashAlgorithm::Sha1 => "SHA-1".to_string(),
300        HashAlgorithm::Sha256 => "SHA-256".to_string(),
301        HashAlgorithm::Sha384 => "SHA-384".to_string(),
302        HashAlgorithm::Sha512 => "SHA-512".to_string(),
303        HashAlgorithm::Sha3_256 => "SHA3-256".to_string(),
304        HashAlgorithm::Sha3_384 => "SHA3-384".to_string(),
305        HashAlgorithm::Sha3_512 => "SHA3-512".to_string(),
306        HashAlgorithm::Blake2b256 => "BLAKE2b-256".to_string(),
307        HashAlgorithm::Blake2b384 => "BLAKE2b-384".to_string(),
308        HashAlgorithm::Blake2b512 => "BLAKE2b-512".to_string(),
309        HashAlgorithm::Blake3 => "BLAKE3".to_string(),
310        HashAlgorithm::Streebog256 => "Streebog-256".to_string(),
311        HashAlgorithm::Streebog512 => "Streebog-512".to_string(),
312        HashAlgorithm::Other(s) => s.clone(),
313    }
314}
315
316// ============================================================================
317// Identifier quality metrics
318// ============================================================================
319
320/// Identifier quality metrics
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct IdentifierMetrics {
323    /// Components with valid PURLs
324    pub valid_purls: usize,
325    /// Components with invalid/malformed PURLs
326    pub invalid_purls: usize,
327    /// Components with valid CPEs
328    pub valid_cpes: usize,
329    /// Components with invalid/malformed CPEs
330    pub invalid_cpes: usize,
331    /// Components with SWID tags
332    pub with_swid: usize,
333    /// Unique ecosystems identified
334    pub ecosystems: Vec<String>,
335    /// Components missing all identifiers (only name)
336    pub missing_all_identifiers: usize,
337}
338
339impl IdentifierMetrics {
340    /// Calculate identifier metrics from an SBOM
341    #[must_use]
342    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
343        let mut valid_purls = 0;
344        let mut invalid_purls = 0;
345        let mut valid_cpes = 0;
346        let mut invalid_cpes = 0;
347        let mut with_swid = 0;
348        let mut missing_all = 0;
349        let mut ecosystems = std::collections::HashSet::new();
350
351        for comp in sbom.components.values() {
352            let has_purl = comp.identifiers.purl.is_some();
353            let has_cpe = !comp.identifiers.cpe.is_empty();
354            let has_swid = comp.identifiers.swid.is_some();
355
356            if let Some(ref purl) = comp.identifiers.purl {
357                if is_valid_purl(purl) {
358                    valid_purls += 1;
359                    // Extract ecosystem from PURL
360                    if let Some(eco) = extract_ecosystem_from_purl(purl) {
361                        ecosystems.insert(eco);
362                    }
363                } else {
364                    invalid_purls += 1;
365                }
366            }
367
368            for cpe in &comp.identifiers.cpe {
369                if is_valid_cpe(cpe) {
370                    valid_cpes += 1;
371                } else {
372                    invalid_cpes += 1;
373                }
374            }
375
376            if has_swid {
377                with_swid += 1;
378            }
379
380            if !has_purl && !has_cpe && !has_swid {
381                missing_all += 1;
382            }
383        }
384
385        let mut ecosystem_list: Vec<String> = ecosystems.into_iter().collect();
386        ecosystem_list.sort();
387
388        Self {
389            valid_purls,
390            invalid_purls,
391            valid_cpes,
392            invalid_cpes,
393            with_swid,
394            ecosystems: ecosystem_list,
395            missing_all_identifiers: missing_all,
396        }
397    }
398
399    /// Calculate identifier quality score (0-100)
400    #[must_use]
401    pub fn quality_score(&self, total_components: usize) -> f32 {
402        if total_components == 0 {
403            return 0.0;
404        }
405
406        let with_valid_id = self.valid_purls + self.valid_cpes + self.with_swid;
407        let coverage =
408            (with_valid_id.min(total_components) as f32 / total_components as f32) * 100.0;
409
410        // Penalize invalid identifiers
411        let invalid_count = self.invalid_purls + self.invalid_cpes;
412        let penalty = (invalid_count as f32 / total_components as f32) * 20.0;
413
414        (coverage - penalty).clamp(0.0, 100.0)
415    }
416}
417
418/// License quality metrics
419#[derive(Debug, Clone, Serialize, Deserialize)]
420pub struct LicenseMetrics {
421    /// Components with declared licenses
422    pub with_declared: usize,
423    /// Components with concluded licenses
424    pub with_concluded: usize,
425    /// Components with valid SPDX expressions
426    pub valid_spdx_expressions: usize,
427    /// Components with non-standard license names
428    pub non_standard_licenses: usize,
429    /// Components with NOASSERTION license
430    pub noassertion_count: usize,
431    /// Components with deprecated SPDX license identifiers
432    pub deprecated_licenses: usize,
433    /// Components with restrictive/copyleft licenses (GPL family)
434    pub restrictive_licenses: usize,
435    /// Specific copyleft license identifiers found
436    pub copyleft_license_ids: Vec<String>,
437    /// Unique licenses found
438    pub unique_licenses: Vec<String>,
439}
440
441impl LicenseMetrics {
442    /// Calculate license metrics from an SBOM
443    #[must_use]
444    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
445        let mut with_declared = 0;
446        let mut with_concluded = 0;
447        let mut valid_spdx = 0;
448        let mut non_standard = 0;
449        let mut noassertion = 0;
450        let mut deprecated = 0;
451        let mut restrictive = 0;
452        let mut licenses = HashSet::new();
453        let mut copyleft_ids = HashSet::new();
454
455        for comp in sbom.components.values() {
456            if !comp.licenses.declared.is_empty() {
457                with_declared += 1;
458                for lic in &comp.licenses.declared {
459                    let expr = &lic.expression;
460                    licenses.insert(expr.clone());
461
462                    if expr == "NOASSERTION" {
463                        noassertion += 1;
464                    } else if is_valid_spdx_license(expr) {
465                        valid_spdx += 1;
466                    } else {
467                        non_standard += 1;
468                    }
469
470                    if is_deprecated_spdx_license(expr) {
471                        deprecated += 1;
472                    }
473                    if is_restrictive_license(expr) {
474                        restrictive += 1;
475                        copyleft_ids.insert(expr.clone());
476                    }
477                }
478            }
479
480            if comp.licenses.concluded.is_some() {
481                with_concluded += 1;
482            }
483        }
484
485        let mut license_list: Vec<String> = licenses.into_iter().collect();
486        license_list.sort();
487
488        let mut copyleft_list: Vec<String> = copyleft_ids.into_iter().collect();
489        copyleft_list.sort();
490
491        Self {
492            with_declared,
493            with_concluded,
494            valid_spdx_expressions: valid_spdx,
495            non_standard_licenses: non_standard,
496            noassertion_count: noassertion,
497            deprecated_licenses: deprecated,
498            restrictive_licenses: restrictive,
499            copyleft_license_ids: copyleft_list,
500            unique_licenses: license_list,
501        }
502    }
503
504    /// Calculate license quality score (0-100)
505    #[must_use]
506    pub fn quality_score(&self, total_components: usize) -> f32 {
507        if total_components == 0 {
508            return 0.0;
509        }
510
511        let coverage = (self.with_declared as f32 / total_components as f32) * 60.0;
512
513        // Bonus for SPDX compliance
514        let spdx_ratio = if self.with_declared > 0 {
515            self.valid_spdx_expressions as f32 / self.with_declared as f32
516        } else {
517            0.0
518        };
519        let spdx_bonus = spdx_ratio * 30.0;
520
521        // Penalty for NOASSERTION
522        let noassertion_penalty =
523            (self.noassertion_count as f32 / total_components.max(1) as f32) * 10.0;
524
525        // Penalty for deprecated licenses (2 points each, capped)
526        let deprecated_penalty = (self.deprecated_licenses as f32 * 2.0).min(10.0);
527
528        (coverage + spdx_bonus - noassertion_penalty - deprecated_penalty).clamp(0.0, 100.0)
529    }
530}
531
532/// Vulnerability information quality metrics
533#[derive(Debug, Clone, Serialize, Deserialize)]
534pub struct VulnerabilityMetrics {
535    /// Components with vulnerability information
536    pub components_with_vulns: usize,
537    /// Total vulnerabilities reported
538    pub total_vulnerabilities: usize,
539    /// Vulnerabilities with CVSS scores
540    pub with_cvss: usize,
541    /// Vulnerabilities with CWE information
542    pub with_cwe: usize,
543    /// Vulnerabilities with remediation info
544    pub with_remediation: usize,
545    /// Components with VEX status
546    pub with_vex_status: usize,
547}
548
549impl VulnerabilityMetrics {
550    /// Calculate vulnerability metrics from an SBOM
551    #[must_use]
552    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
553        let mut components_with_vulns = 0;
554        let mut total_vulns = 0;
555        let mut with_cvss = 0;
556        let mut with_cwe = 0;
557        let mut with_remediation = 0;
558        let mut with_vex = 0;
559
560        for comp in sbom.components.values() {
561            if !comp.vulnerabilities.is_empty() {
562                components_with_vulns += 1;
563            }
564
565            for vuln in &comp.vulnerabilities {
566                total_vulns += 1;
567
568                if !vuln.cvss.is_empty() {
569                    with_cvss += 1;
570                }
571                if !vuln.cwes.is_empty() {
572                    with_cwe += 1;
573                }
574                if vuln.remediation.is_some() {
575                    with_remediation += 1;
576                }
577            }
578
579            if comp.vex_status.is_some()
580                || comp.vulnerabilities.iter().any(|v| v.vex_status.is_some())
581            {
582                with_vex += 1;
583            }
584        }
585
586        Self {
587            components_with_vulns,
588            total_vulnerabilities: total_vulns,
589            with_cvss,
590            with_cwe,
591            with_remediation,
592            with_vex_status: with_vex,
593        }
594    }
595
596    /// Calculate vulnerability documentation quality score (0-100)
597    ///
598    /// Returns `None` when no vulnerability data exists, signaling that this
599    /// category should be excluded from the weighted score (N/A-aware).
600    /// This prevents inflating the overall score when vulnerability assessment
601    /// was not performed.
602    #[must_use]
603    pub fn documentation_score(&self) -> Option<f32> {
604        if self.total_vulnerabilities == 0 {
605            return None; // No vulnerability data — treat as N/A
606        }
607
608        let cvss_ratio = self.with_cvss as f32 / self.total_vulnerabilities as f32;
609        let cwe_ratio = self.with_cwe as f32 / self.total_vulnerabilities as f32;
610        let remediation_ratio = self.with_remediation as f32 / self.total_vulnerabilities as f32;
611
612        Some(
613            remediation_ratio
614                .mul_add(30.0, cvss_ratio.mul_add(40.0, cwe_ratio * 30.0))
615                .min(100.0),
616        )
617    }
618}
619
620// ============================================================================
621// Dependency graph quality metrics
622// ============================================================================
623
624/// Maximum edge count before skipping expensive graph analysis
625const MAX_EDGES_FOR_GRAPH_ANALYSIS: usize = 50_000;
626
627// ============================================================================
628// Software complexity index
629// ============================================================================
630
631/// Complexity level bands for the software complexity index
632#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
633#[non_exhaustive]
634pub enum ComplexityLevel {
635    /// Simplicity 75–100 (raw complexity 0–0.25)
636    Low,
637    /// Simplicity 50–74 (raw complexity 0.26–0.50)
638    Moderate,
639    /// Simplicity 25–49 (raw complexity 0.51–0.75)
640    High,
641    /// Simplicity 0–24 (raw complexity 0.76–1.00)
642    VeryHigh,
643}
644
645impl ComplexityLevel {
646    /// Determine complexity level from a simplicity score (0–100)
647    #[must_use]
648    pub const fn from_score(simplicity: f32) -> Self {
649        match simplicity as u32 {
650            75..=100 => Self::Low,
651            50..=74 => Self::Moderate,
652            25..=49 => Self::High,
653            _ => Self::VeryHigh,
654        }
655    }
656
657    /// Human-readable label
658    #[must_use]
659    pub const fn label(&self) -> &'static str {
660        match self {
661            Self::Low => "Low",
662            Self::Moderate => "Moderate",
663            Self::High => "High",
664            Self::VeryHigh => "Very High",
665        }
666    }
667}
668
669impl std::fmt::Display for ComplexityLevel {
670    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
671        f.write_str(self.label())
672    }
673}
674
675/// Breakdown of the five factors that compose the software complexity index.
676/// Each factor is normalized to 0.0–1.0 where higher = more complex.
677#[derive(Debug, Clone, Serialize, Deserialize)]
678pub struct ComplexityFactors {
679    /// Log-scaled edge density: `min(1.0, ln(1 + edges/components) / ln(20))`
680    pub dependency_volume: f32,
681    /// Depth ratio: `min(1.0, max_depth / 15.0)`
682    pub normalized_depth: f32,
683    /// Hub dominance: `min(1.0, max_out_degree / max(components * 0.25, 4))`
684    pub fanout_concentration: f32,
685    /// Cycle density: `min(1.0, cycle_count / max(1, components * 0.05))`
686    pub cycle_ratio: f32,
687    /// Extra disconnected subgraphs: `(islands - 1) / max(1, components - 1)`
688    pub fragmentation: f32,
689}
690
691/// Dependency graph quality metrics
692#[derive(Debug, Clone, Serialize, Deserialize)]
693pub struct DependencyMetrics {
694    /// Total dependency relationships
695    pub total_dependencies: usize,
696    /// Components with at least one dependency
697    pub components_with_deps: usize,
698    /// Maximum dependency depth (computed via BFS from roots)
699    pub max_depth: Option<usize>,
700    /// Average dependency depth across all reachable components
701    pub avg_depth: Option<f32>,
702    /// Orphan components (no incoming or outgoing deps)
703    pub orphan_components: usize,
704    /// Root components (no incoming deps, but has outgoing)
705    pub root_components: usize,
706    /// Number of dependency cycles detected
707    pub cycle_count: usize,
708    /// Number of disconnected subgraphs (islands)
709    pub island_count: usize,
710    /// Whether graph analysis was skipped due to size
711    pub graph_analysis_skipped: bool,
712    /// Maximum out-degree (most dependencies from a single component)
713    pub max_out_degree: usize,
714    /// Software complexity index (0–100, higher = simpler). `None` when graph analysis skipped.
715    pub software_complexity_index: Option<f32>,
716    /// Complexity level band. `None` when graph analysis skipped.
717    pub complexity_level: Option<ComplexityLevel>,
718    /// Factor breakdown. `None` when graph analysis skipped.
719    pub complexity_factors: Option<ComplexityFactors>,
720}
721
722impl DependencyMetrics {
723    /// Calculate dependency metrics from an SBOM
724    #[must_use]
725    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
726        use crate::model::CanonicalId;
727
728        let total_deps = sbom.edges.len();
729
730        // Build adjacency lists using CanonicalId.value() for string keys
731        let mut children: HashMap<&str, Vec<&str>> = HashMap::new();
732        let mut has_outgoing: HashSet<&str> = HashSet::new();
733        let mut has_incoming: HashSet<&str> = HashSet::new();
734
735        for edge in &sbom.edges {
736            children
737                .entry(edge.from.value())
738                .or_default()
739                .push(edge.to.value());
740            has_outgoing.insert(edge.from.value());
741            has_incoming.insert(edge.to.value());
742        }
743
744        let all_ids: Vec<&str> = sbom.components.keys().map(CanonicalId::value).collect();
745
746        let orphans = all_ids
747            .iter()
748            .filter(|c| !has_outgoing.contains(*c) && !has_incoming.contains(*c))
749            .count();
750
751        let roots: Vec<&str> = has_outgoing
752            .iter()
753            .filter(|c| !has_incoming.contains(*c))
754            .copied()
755            .collect();
756        let root_count = roots.len();
757
758        // Compute max out-degree (single pass over adjacency, O(V))
759        let max_out_degree = children.values().map(Vec::len).max().unwrap_or(0);
760
761        // Skip expensive graph analysis for very large graphs
762        if total_deps > MAX_EDGES_FOR_GRAPH_ANALYSIS {
763            return Self {
764                total_dependencies: total_deps,
765                components_with_deps: has_outgoing.len(),
766                max_depth: None,
767                avg_depth: None,
768                orphan_components: orphans,
769                root_components: root_count,
770                cycle_count: 0,
771                island_count: 0,
772                graph_analysis_skipped: true,
773                max_out_degree,
774                software_complexity_index: None,
775                complexity_level: None,
776                complexity_factors: None,
777            };
778        }
779
780        // BFS from roots to compute depth
781        let (max_depth, avg_depth) = compute_depth(&roots, &children);
782
783        // DFS cycle detection
784        let cycle_count = detect_cycles(&all_ids, &children);
785
786        // Union-Find for island/subgraph detection
787        let island_count = count_islands(&all_ids, &sbom.edges);
788
789        // Compute software complexity index
790        let component_count = all_ids.len();
791        let (complexity_index, complexity_lvl, factors) = compute_complexity(
792            total_deps,
793            component_count,
794            max_depth.unwrap_or(0),
795            max_out_degree,
796            cycle_count,
797            orphans,
798            island_count,
799        );
800
801        Self {
802            total_dependencies: total_deps,
803            components_with_deps: has_outgoing.len(),
804            max_depth,
805            avg_depth,
806            orphan_components: orphans,
807            root_components: root_count,
808            cycle_count,
809            island_count,
810            graph_analysis_skipped: false,
811            max_out_degree,
812            software_complexity_index: Some(complexity_index),
813            complexity_level: Some(complexity_lvl),
814            complexity_factors: Some(factors),
815        }
816    }
817
818    /// Calculate dependency graph quality score (0-100)
819    #[must_use]
820    pub fn quality_score(&self, total_components: usize) -> f32 {
821        if total_components == 0 {
822            return 0.0;
823        }
824
825        // Score based on how many components have dependency info
826        let coverage = if total_components > 1 {
827            (self.components_with_deps as f32 / (total_components - 1) as f32) * 100.0
828        } else {
829            100.0 // Single component SBOM
830        };
831
832        // Slight penalty for orphan components
833        let orphan_ratio = self.orphan_components as f32 / total_components as f32;
834        let orphan_penalty = orphan_ratio * 10.0;
835
836        // Penalty for cycles (5 points each, capped at 20)
837        let cycle_penalty = (self.cycle_count as f32 * 5.0).min(20.0);
838
839        // Penalty for excessive islands (>3 in multi-component SBOMs)
840        let island_penalty = if total_components > 5 && self.island_count > 3 {
841            ((self.island_count - 3) as f32 * 3.0).min(15.0)
842        } else {
843            0.0
844        };
845
846        (coverage - orphan_penalty - cycle_penalty - island_penalty).clamp(0.0, 100.0)
847    }
848}
849
850/// BFS from roots to compute max and average depth
851fn compute_depth(
852    roots: &[&str],
853    children: &HashMap<&str, Vec<&str>>,
854) -> (Option<usize>, Option<f32>) {
855    use std::collections::VecDeque;
856
857    if roots.is_empty() {
858        return (None, None);
859    }
860
861    let mut visited: HashSet<&str> = HashSet::new();
862    let mut queue: VecDeque<(&str, usize)> = VecDeque::new();
863    let mut max_d: usize = 0;
864    let mut total_depth: usize = 0;
865    let mut count: usize = 0;
866
867    for &root in roots {
868        if visited.insert(root) {
869            queue.push_back((root, 0));
870        }
871    }
872
873    while let Some((node, depth)) = queue.pop_front() {
874        max_d = max_d.max(depth);
875        total_depth += depth;
876        count += 1;
877
878        if let Some(kids) = children.get(node) {
879            for &kid in kids {
880                if visited.insert(kid) {
881                    queue.push_back((kid, depth + 1));
882                }
883            }
884        }
885    }
886
887    let avg = if count > 0 {
888        Some(total_depth as f32 / count as f32)
889    } else {
890        None
891    };
892
893    (Some(max_d), avg)
894}
895
896/// DFS-based cycle detection (white/gray/black coloring)
897fn detect_cycles(all_nodes: &[&str], children: &HashMap<&str, Vec<&str>>) -> usize {
898    const WHITE: u8 = 0;
899    const GRAY: u8 = 1;
900    const BLACK: u8 = 2;
901
902    let mut color: HashMap<&str, u8> = HashMap::with_capacity(all_nodes.len());
903    for &node in all_nodes {
904        color.insert(node, WHITE);
905    }
906
907    let mut cycles = 0;
908
909    fn dfs<'a>(
910        node: &'a str,
911        children: &HashMap<&str, Vec<&'a str>>,
912        color: &mut HashMap<&'a str, u8>,
913        cycles: &mut usize,
914    ) {
915        color.insert(node, GRAY);
916
917        if let Some(kids) = children.get(node) {
918            for &kid in kids {
919                match color.get(kid).copied().unwrap_or(WHITE) {
920                    GRAY => *cycles += 1, // back edge = cycle
921                    WHITE => dfs(kid, children, color, cycles),
922                    _ => {}
923                }
924            }
925        }
926
927        color.insert(node, BLACK);
928    }
929
930    for &node in all_nodes {
931        if color.get(node).copied().unwrap_or(WHITE) == WHITE {
932            dfs(node, children, &mut color, &mut cycles);
933        }
934    }
935
936    cycles
937}
938
939/// Union-Find to count disconnected subgraphs (islands)
940fn count_islands(all_nodes: &[&str], edges: &[crate::model::DependencyEdge]) -> usize {
941    if all_nodes.is_empty() {
942        return 0;
943    }
944
945    // Map node IDs to indices
946    let node_idx: HashMap<&str, usize> =
947        all_nodes.iter().enumerate().map(|(i, &n)| (n, i)).collect();
948
949    let mut parent: Vec<usize> = (0..all_nodes.len()).collect();
950    let mut rank: Vec<u8> = vec![0; all_nodes.len()];
951
952    fn find(parent: &mut Vec<usize>, x: usize) -> usize {
953        if parent[x] != x {
954            parent[x] = find(parent, parent[x]); // path compression
955        }
956        parent[x]
957    }
958
959    fn union(parent: &mut Vec<usize>, rank: &mut [u8], a: usize, b: usize) {
960        let ra = find(parent, a);
961        let rb = find(parent, b);
962        if ra != rb {
963            if rank[ra] < rank[rb] {
964                parent[ra] = rb;
965            } else if rank[ra] > rank[rb] {
966                parent[rb] = ra;
967            } else {
968                parent[rb] = ra;
969                rank[ra] += 1;
970            }
971        }
972    }
973
974    for edge in edges {
975        if let (Some(&a), Some(&b)) = (
976            node_idx.get(edge.from.value()),
977            node_idx.get(edge.to.value()),
978        ) {
979            union(&mut parent, &mut rank, a, b);
980        }
981    }
982
983    // Count unique roots
984    let mut roots = HashSet::new();
985    for i in 0..all_nodes.len() {
986        roots.insert(find(&mut parent, i));
987    }
988
989    roots.len()
990}
991
992/// Compute the software complexity index and factor breakdown.
993///
994/// Returns `(simplicity_index, complexity_level, factors)`.
995/// `simplicity_index` is 0–100 where 100 = simplest.
996fn compute_complexity(
997    edges: usize,
998    components: usize,
999    max_depth: usize,
1000    max_out_degree: usize,
1001    cycle_count: usize,
1002    _orphans: usize,
1003    islands: usize,
1004) -> (f32, ComplexityLevel, ComplexityFactors) {
1005    if components == 0 {
1006        let factors = ComplexityFactors {
1007            dependency_volume: 0.0,
1008            normalized_depth: 0.0,
1009            fanout_concentration: 0.0,
1010            cycle_ratio: 0.0,
1011            fragmentation: 0.0,
1012        };
1013        return (100.0, ComplexityLevel::Low, factors);
1014    }
1015
1016    // Factor 1: dependency volume — log-scaled edge density
1017    let edge_ratio = edges as f64 / components as f64;
1018    let dependency_volume = ((1.0 + edge_ratio).ln() / 20.0_f64.ln()).min(1.0) as f32;
1019
1020    // Factor 2: normalized depth
1021    let normalized_depth = (max_depth as f32 / 15.0).min(1.0);
1022
1023    // Factor 3: fanout concentration — hub dominance
1024    // Floor of 4.0 prevents small graphs from being penalized for max_out_degree of 1
1025    let fanout_denom = (components as f32 * 0.25).max(4.0);
1026    let fanout_concentration = (max_out_degree as f32 / fanout_denom).min(1.0);
1027
1028    // Factor 4: cycle ratio
1029    let cycle_threshold = (components as f32 * 0.05).max(1.0);
1030    let cycle_ratio = (cycle_count as f32 / cycle_threshold).min(1.0);
1031
1032    // Factor 5: fragmentation — extra disconnected subgraphs beyond the ideal of 1
1033    // Uses (islands - 1) because orphans are already counted as individual islands.
1034    let extra_islands = islands.saturating_sub(1);
1035    let fragmentation = if components > 1 {
1036        (extra_islands as f32 / (components - 1) as f32).min(1.0)
1037    } else {
1038        0.0
1039    };
1040
1041    let factors = ComplexityFactors {
1042        dependency_volume,
1043        normalized_depth,
1044        fanout_concentration,
1045        cycle_ratio,
1046        fragmentation,
1047    };
1048
1049    let raw_complexity = 0.30 * dependency_volume
1050        + 0.20 * normalized_depth
1051        + 0.20 * fanout_concentration
1052        + 0.20 * cycle_ratio
1053        + 0.10 * fragmentation;
1054
1055    let simplicity_index = (100.0 - raw_complexity * 100.0).clamp(0.0, 100.0);
1056    let level = ComplexityLevel::from_score(simplicity_index);
1057
1058    (simplicity_index, level, factors)
1059}
1060
1061// ============================================================================
1062// Provenance metrics
1063// ============================================================================
1064
1065/// Document provenance and authorship quality metrics
1066#[derive(Debug, Clone, Serialize, Deserialize)]
1067pub struct ProvenanceMetrics {
1068    /// Whether the SBOM was created by an identified tool
1069    pub has_tool_creator: bool,
1070    /// Whether the tool creator includes version information
1071    pub has_tool_version: bool,
1072    /// Whether an organization is identified as creator
1073    pub has_org_creator: bool,
1074    /// Whether any creator has a contact email
1075    pub has_contact_email: bool,
1076    /// Whether the document has a serial number / namespace
1077    pub has_serial_number: bool,
1078    /// Whether the document has a name
1079    pub has_document_name: bool,
1080    /// Age of the SBOM in days (since creation timestamp)
1081    pub timestamp_age_days: u32,
1082    /// Whether the SBOM is considered fresh (< 90 days old)
1083    pub is_fresh: bool,
1084    /// Whether a primary/described component is identified
1085    pub has_primary_component: bool,
1086    /// SBOM lifecycle phase (from CycloneDX 1.5+ metadata)
1087    pub lifecycle_phase: Option<String>,
1088    /// Self-declared completeness level of the SBOM
1089    pub completeness_declaration: CompletenessDeclaration,
1090    /// Whether the SBOM has a digital signature
1091    pub has_signature: bool,
1092    /// Whether the SBOM has data provenance citations (CycloneDX 1.7+)
1093    pub has_citations: bool,
1094    /// Number of data provenance citations
1095    pub citations_count: usize,
1096}
1097
1098/// Freshness threshold in days
1099const FRESHNESS_THRESHOLD_DAYS: u32 = 90;
1100
1101impl ProvenanceMetrics {
1102    /// Calculate provenance metrics from an SBOM
1103    #[must_use]
1104    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
1105        let doc = &sbom.document;
1106
1107        let has_tool_creator = doc
1108            .creators
1109            .iter()
1110            .any(|c| c.creator_type == CreatorType::Tool);
1111        let has_tool_version = doc.creators.iter().any(|c| {
1112            c.creator_type == CreatorType::Tool
1113                && (c.name.contains(' ') || c.name.contains('/') || c.name.contains('@'))
1114        });
1115        let has_org_creator = doc
1116            .creators
1117            .iter()
1118            .any(|c| c.creator_type == CreatorType::Organization);
1119        let has_contact_email = doc.creators.iter().any(|c| c.email.is_some());
1120
1121        let age_days = (chrono::Utc::now() - doc.created).num_days().max(0) as u32;
1122
1123        Self {
1124            has_tool_creator,
1125            has_tool_version,
1126            has_org_creator,
1127            has_contact_email,
1128            has_serial_number: doc.serial_number.is_some(),
1129            has_document_name: doc.name.is_some(),
1130            timestamp_age_days: age_days,
1131            is_fresh: age_days < FRESHNESS_THRESHOLD_DAYS,
1132            has_primary_component: sbom.primary_component_id.is_some(),
1133            lifecycle_phase: doc.lifecycle_phase.clone(),
1134            completeness_declaration: doc.completeness_declaration.clone(),
1135            has_signature: doc.signature.is_some(),
1136            has_citations: doc.citations_count > 0,
1137            citations_count: doc.citations_count,
1138        }
1139    }
1140
1141    /// Calculate provenance quality score (0-100)
1142    ///
1143    /// Weighted checklist: tool creator (15%), tool version (5%), org creator (12%),
1144    /// contact email (8%), serial number (8%), document name (5%), freshness (12%),
1145    /// primary component (12%), completeness declaration (8%), signature (5%),
1146    /// lifecycle phase (10% CDX-only).
1147    #[must_use]
1148    pub fn quality_score(&self, is_cyclonedx: bool) -> f32 {
1149        let mut score = 0.0;
1150        let mut total_weight = 0.0;
1151
1152        let completeness_declared =
1153            self.completeness_declaration != CompletenessDeclaration::Unknown;
1154
1155        let checks: &[(bool, f32)] = &[
1156            (self.has_tool_creator, 15.0),
1157            (self.has_tool_version, 5.0),
1158            (self.has_org_creator, 12.0),
1159            (self.has_contact_email, 8.0),
1160            (self.has_serial_number, 8.0),
1161            (self.has_document_name, 5.0),
1162            (self.is_fresh, 12.0),
1163            (self.has_primary_component, 12.0),
1164            (completeness_declared, 8.0),
1165            (self.has_signature, 5.0),
1166        ];
1167
1168        for &(present, weight) in checks {
1169            if present {
1170                score += weight;
1171            }
1172            total_weight += weight;
1173        }
1174
1175        // Lifecycle phase: only applicable for CycloneDX 1.5+
1176        if is_cyclonedx {
1177            let weight = 10.0;
1178            if self.lifecycle_phase.is_some() {
1179                score += weight;
1180            }
1181            total_weight += weight;
1182
1183            // Data provenance citations bonus (CycloneDX 1.7+)
1184            let citations_weight = 5.0;
1185            if self.has_citations {
1186                score += citations_weight;
1187            }
1188            total_weight += citations_weight;
1189        }
1190
1191        if total_weight > 0.0 {
1192            (score / total_weight) * 100.0
1193        } else {
1194            0.0
1195        }
1196    }
1197}
1198
1199// ============================================================================
1200// Auditability metrics
1201// ============================================================================
1202
1203/// External reference and auditability quality metrics
1204#[derive(Debug, Clone, Serialize, Deserialize)]
1205pub struct AuditabilityMetrics {
1206    /// Components with VCS (version control) references
1207    pub components_with_vcs: usize,
1208    /// Components with website references
1209    pub components_with_website: usize,
1210    /// Components with security advisory references
1211    pub components_with_advisories: usize,
1212    /// Components with any external reference
1213    pub components_with_any_external_ref: usize,
1214    /// Whether the document has a security contact
1215    pub has_security_contact: bool,
1216    /// Whether the document has a vulnerability disclosure URL
1217    pub has_vuln_disclosure_url: bool,
1218}
1219
1220impl AuditabilityMetrics {
1221    /// Calculate auditability metrics from an SBOM
1222    #[must_use]
1223    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
1224        let mut with_vcs = 0;
1225        let mut with_website = 0;
1226        let mut with_advisories = 0;
1227        let mut with_any = 0;
1228
1229        for comp in sbom.components.values() {
1230            if comp.external_refs.is_empty() {
1231                continue;
1232            }
1233            with_any += 1;
1234
1235            let has_vcs = comp
1236                .external_refs
1237                .iter()
1238                .any(|r| r.ref_type == ExternalRefType::Vcs);
1239            let has_website = comp
1240                .external_refs
1241                .iter()
1242                .any(|r| r.ref_type == ExternalRefType::Website);
1243            let has_advisories = comp
1244                .external_refs
1245                .iter()
1246                .any(|r| r.ref_type == ExternalRefType::Advisories);
1247
1248            if has_vcs {
1249                with_vcs += 1;
1250            }
1251            if has_website {
1252                with_website += 1;
1253            }
1254            if has_advisories {
1255                with_advisories += 1;
1256            }
1257        }
1258
1259        Self {
1260            components_with_vcs: with_vcs,
1261            components_with_website: with_website,
1262            components_with_advisories: with_advisories,
1263            components_with_any_external_ref: with_any,
1264            has_security_contact: sbom.document.security_contact.is_some(),
1265            has_vuln_disclosure_url: sbom.document.vulnerability_disclosure_url.is_some(),
1266        }
1267    }
1268
1269    /// Calculate auditability quality score (0-100)
1270    ///
1271    /// Component-level coverage (60%) + document-level security metadata (40%).
1272    #[must_use]
1273    pub fn quality_score(&self, total_components: usize) -> f32 {
1274        if total_components == 0 {
1275            return 0.0;
1276        }
1277
1278        // Component-level: external ref coverage
1279        let ref_coverage =
1280            (self.components_with_any_external_ref as f32 / total_components as f32) * 40.0;
1281        let vcs_coverage = (self.components_with_vcs as f32 / total_components as f32) * 20.0;
1282
1283        // Document-level security metadata
1284        let security_contact_score = if self.has_security_contact { 20.0 } else { 0.0 };
1285        let disclosure_score = if self.has_vuln_disclosure_url {
1286            20.0
1287        } else {
1288            0.0
1289        };
1290
1291        (ref_coverage + vcs_coverage + security_contact_score + disclosure_score).min(100.0)
1292    }
1293}
1294
1295// ============================================================================
1296// Lifecycle metrics
1297// ============================================================================
1298
1299/// Component lifecycle quality metrics (requires enrichment data)
1300#[derive(Debug, Clone, Serialize, Deserialize)]
1301pub struct LifecycleMetrics {
1302    /// Components that have reached end-of-life
1303    pub eol_components: usize,
1304    /// Components classified as stale (no updates for 1+ years)
1305    pub stale_components: usize,
1306    /// Components explicitly marked as deprecated
1307    pub deprecated_components: usize,
1308    /// Components with archived repositories
1309    pub archived_components: usize,
1310    /// Components with a newer version available
1311    pub outdated_components: usize,
1312    /// Components that had lifecycle enrichment data
1313    pub enriched_components: usize,
1314    /// Enrichment coverage percentage (0-100)
1315    pub enrichment_coverage: f32,
1316}
1317
1318impl LifecycleMetrics {
1319    /// Calculate lifecycle metrics from an SBOM
1320    ///
1321    /// These metrics are only meaningful after enrichment. When
1322    /// `enrichment_coverage == 0`, the lifecycle score should be
1323    /// treated as N/A and excluded from the weighted total.
1324    #[must_use]
1325    pub fn from_sbom(sbom: &NormalizedSbom) -> Self {
1326        let total = sbom.components.len();
1327        let mut eol = 0;
1328        let mut stale = 0;
1329        let mut deprecated = 0;
1330        let mut archived = 0;
1331        let mut outdated = 0;
1332        let mut enriched = 0;
1333
1334        for comp in sbom.components.values() {
1335            let has_lifecycle_data = comp.eol.is_some() || comp.staleness.is_some();
1336            if has_lifecycle_data {
1337                enriched += 1;
1338            }
1339
1340            if let Some(ref eol_info) = comp.eol
1341                && eol_info.status == EolStatus::EndOfLife
1342            {
1343                eol += 1;
1344            }
1345
1346            if let Some(ref stale_info) = comp.staleness {
1347                match stale_info.level {
1348                    StalenessLevel::Stale | StalenessLevel::Abandoned => stale += 1,
1349                    StalenessLevel::Deprecated => deprecated += 1,
1350                    StalenessLevel::Archived => archived += 1,
1351                    _ => {}
1352                }
1353                if stale_info.is_deprecated {
1354                    deprecated += 1;
1355                }
1356                if stale_info.is_archived {
1357                    archived += 1;
1358                }
1359                if stale_info.latest_version.is_some() {
1360                    outdated += 1;
1361                }
1362            }
1363        }
1364
1365        let coverage = if total > 0 {
1366            (enriched as f32 / total as f32) * 100.0
1367        } else {
1368            0.0
1369        };
1370
1371        Self {
1372            eol_components: eol,
1373            stale_components: stale,
1374            deprecated_components: deprecated,
1375            archived_components: archived,
1376            outdated_components: outdated,
1377            enriched_components: enriched,
1378            enrichment_coverage: coverage,
1379        }
1380    }
1381
1382    /// Whether enrichment data is available for scoring
1383    #[must_use]
1384    pub fn has_data(&self) -> bool {
1385        self.enriched_components > 0
1386    }
1387
1388    /// Calculate lifecycle quality score (0-100)
1389    ///
1390    /// Starts at 100, subtracts penalties for problematic components.
1391    /// Returns `None` if no enrichment data is available.
1392    #[must_use]
1393    pub fn quality_score(&self) -> Option<f32> {
1394        if !self.has_data() {
1395            return None;
1396        }
1397
1398        let mut score = 100.0_f32;
1399
1400        // EOL: severe penalty (15 points each, capped at 60)
1401        score -= (self.eol_components as f32 * 15.0).min(60.0);
1402        // Stale: moderate penalty (5 points each, capped at 30)
1403        score -= (self.stale_components as f32 * 5.0).min(30.0);
1404        // Deprecated/archived: moderate penalty (3 points each, capped at 20)
1405        score -= ((self.deprecated_components + self.archived_components) as f32 * 3.0).min(20.0);
1406        // Outdated: mild penalty (1 point each, capped at 10)
1407        score -= (self.outdated_components as f32 * 1.0).min(10.0);
1408
1409        Some(score.clamp(0.0, 100.0))
1410    }
1411}
1412
1413// ============================================================================
1414// Helper functions
1415// ============================================================================
1416
1417fn is_valid_purl(purl: &str) -> bool {
1418    // Basic PURL validation: pkg:type/namespace/name@version
1419    purl.starts_with("pkg:") && purl.contains('/')
1420}
1421
1422fn extract_ecosystem_from_purl(purl: &str) -> Option<String> {
1423    // Extract type from pkg:type/...
1424    if let Some(rest) = purl.strip_prefix("pkg:")
1425        && let Some(slash_idx) = rest.find('/')
1426    {
1427        return Some(rest[..slash_idx].to_string());
1428    }
1429    None
1430}
1431
1432fn is_valid_cpe(cpe: &str) -> bool {
1433    // Basic CPE validation
1434    cpe.starts_with("cpe:2.3:") || cpe.starts_with("cpe:/")
1435}
1436
1437fn is_valid_spdx_license(expr: &str) -> bool {
1438    // Common SPDX license identifiers
1439    const COMMON_SPDX: &[&str] = &[
1440        "MIT",
1441        "Apache-2.0",
1442        "GPL-2.0",
1443        "GPL-3.0",
1444        "BSD-2-Clause",
1445        "BSD-3-Clause",
1446        "ISC",
1447        "MPL-2.0",
1448        "LGPL-2.1",
1449        "LGPL-3.0",
1450        "AGPL-3.0",
1451        "Unlicense",
1452        "CC0-1.0",
1453        "0BSD",
1454        "EPL-2.0",
1455        "CDDL-1.0",
1456        "Artistic-2.0",
1457        "GPL-2.0-only",
1458        "GPL-2.0-or-later",
1459        "GPL-3.0-only",
1460        "GPL-3.0-or-later",
1461        "LGPL-2.1-only",
1462        "LGPL-2.1-or-later",
1463        "LGPL-3.0-only",
1464        "LGPL-3.0-or-later",
1465    ];
1466
1467    // Check for common licenses or expressions
1468    let trimmed = expr.trim();
1469    COMMON_SPDX.contains(&trimmed)
1470        || trimmed.contains(" AND ")
1471        || trimmed.contains(" OR ")
1472        || trimmed.contains(" WITH ")
1473}
1474
1475/// Whether a license identifier is on the SPDX deprecated list.
1476///
1477/// These are license IDs that SPDX has deprecated in favor of more specific
1478/// identifiers (e.g., `GPL-2.0` → `GPL-2.0-only` or `GPL-2.0-or-later`).
1479fn is_deprecated_spdx_license(expr: &str) -> bool {
1480    const DEPRECATED: &[&str] = &[
1481        "GPL-2.0",
1482        "GPL-2.0+",
1483        "GPL-3.0",
1484        "GPL-3.0+",
1485        "LGPL-2.0",
1486        "LGPL-2.0+",
1487        "LGPL-2.1",
1488        "LGPL-2.1+",
1489        "LGPL-3.0",
1490        "LGPL-3.0+",
1491        "AGPL-1.0",
1492        "AGPL-3.0",
1493        "GFDL-1.1",
1494        "GFDL-1.2",
1495        "GFDL-1.3",
1496        "BSD-2-Clause-FreeBSD",
1497        "BSD-2-Clause-NetBSD",
1498        "eCos-2.0",
1499        "Nunit",
1500        "StandardML-NJ",
1501        "wxWindows",
1502    ];
1503    let trimmed = expr.trim();
1504    DEPRECATED.contains(&trimmed)
1505}
1506
1507/// Whether a license is considered restrictive/copyleft (GPL family).
1508///
1509/// This is informational — restrictive licenses are not inherently a quality
1510/// issue, but organizations need to know about them for compliance.
1511fn is_restrictive_license(expr: &str) -> bool {
1512    let trimmed = expr.trim().to_uppercase();
1513    trimmed.starts_with("GPL")
1514        || trimmed.starts_with("LGPL")
1515        || trimmed.starts_with("AGPL")
1516        || trimmed.starts_with("EUPL")
1517        || trimmed.starts_with("SSPL")
1518        || trimmed.starts_with("OSL")
1519        || trimmed.starts_with("CPAL")
1520        || trimmed.starts_with("CC-BY-SA")
1521        || trimmed.starts_with("CC-BY-NC")
1522}
1523
1524#[cfg(test)]
1525mod tests {
1526    use super::*;
1527
1528    #[test]
1529    fn test_purl_validation() {
1530        assert!(is_valid_purl("pkg:npm/@scope/name@1.0.0"));
1531        assert!(is_valid_purl("pkg:maven/group/artifact@1.0"));
1532        assert!(!is_valid_purl("npm:something"));
1533        assert!(!is_valid_purl("invalid"));
1534    }
1535
1536    #[test]
1537    fn test_cpe_validation() {
1538        assert!(is_valid_cpe("cpe:2.3:a:vendor:product:1.0:*:*:*:*:*:*:*"));
1539        assert!(is_valid_cpe("cpe:/a:vendor:product:1.0"));
1540        assert!(!is_valid_cpe("something:else"));
1541    }
1542
1543    #[test]
1544    fn test_spdx_license_validation() {
1545        assert!(is_valid_spdx_license("MIT"));
1546        assert!(is_valid_spdx_license("Apache-2.0"));
1547        assert!(is_valid_spdx_license("MIT AND Apache-2.0"));
1548        assert!(is_valid_spdx_license("GPL-2.0 OR MIT"));
1549    }
1550
1551    #[test]
1552    fn test_strong_hash_classification() {
1553        assert!(is_strong_hash(&HashAlgorithm::Sha256));
1554        assert!(is_strong_hash(&HashAlgorithm::Sha3_256));
1555        assert!(is_strong_hash(&HashAlgorithm::Blake3));
1556        assert!(!is_strong_hash(&HashAlgorithm::Md5));
1557        assert!(!is_strong_hash(&HashAlgorithm::Sha1));
1558        assert!(!is_strong_hash(&HashAlgorithm::Other("custom".to_string())));
1559    }
1560
1561    #[test]
1562    fn test_deprecated_license_detection() {
1563        assert!(is_deprecated_spdx_license("GPL-2.0"));
1564        assert!(is_deprecated_spdx_license("LGPL-2.1"));
1565        assert!(is_deprecated_spdx_license("AGPL-3.0"));
1566        assert!(!is_deprecated_spdx_license("GPL-2.0-only"));
1567        assert!(!is_deprecated_spdx_license("MIT"));
1568        assert!(!is_deprecated_spdx_license("Apache-2.0"));
1569    }
1570
1571    #[test]
1572    fn test_restrictive_license_detection() {
1573        assert!(is_restrictive_license("GPL-3.0-only"));
1574        assert!(is_restrictive_license("LGPL-2.1-or-later"));
1575        assert!(is_restrictive_license("AGPL-3.0-only"));
1576        assert!(is_restrictive_license("EUPL-1.2"));
1577        assert!(is_restrictive_license("CC-BY-SA-4.0"));
1578        assert!(!is_restrictive_license("MIT"));
1579        assert!(!is_restrictive_license("Apache-2.0"));
1580        assert!(!is_restrictive_license("BSD-3-Clause"));
1581    }
1582
1583    #[test]
1584    fn test_hash_quality_score_no_components() {
1585        let metrics = HashQualityMetrics {
1586            components_with_any_hash: 0,
1587            components_with_strong_hash: 0,
1588            components_with_weak_only: 0,
1589            algorithm_distribution: BTreeMap::new(),
1590            total_hashes: 0,
1591        };
1592        assert_eq!(metrics.quality_score(0), 0.0);
1593    }
1594
1595    #[test]
1596    fn test_hash_quality_score_all_strong() {
1597        let metrics = HashQualityMetrics {
1598            components_with_any_hash: 10,
1599            components_with_strong_hash: 10,
1600            components_with_weak_only: 0,
1601            algorithm_distribution: BTreeMap::new(),
1602            total_hashes: 10,
1603        };
1604        assert_eq!(metrics.quality_score(10), 100.0);
1605    }
1606
1607    #[test]
1608    fn test_hash_quality_score_weak_only_penalty() {
1609        let metrics = HashQualityMetrics {
1610            components_with_any_hash: 10,
1611            components_with_strong_hash: 0,
1612            components_with_weak_only: 10,
1613            algorithm_distribution: BTreeMap::new(),
1614            total_hashes: 10,
1615        };
1616        // 60 (any) + 0 (strong) - 10 (weak penalty) = 50
1617        assert_eq!(metrics.quality_score(10), 50.0);
1618    }
1619
1620    #[test]
1621    fn test_lifecycle_no_enrichment_returns_none() {
1622        let metrics = LifecycleMetrics {
1623            eol_components: 0,
1624            stale_components: 0,
1625            deprecated_components: 0,
1626            archived_components: 0,
1627            outdated_components: 0,
1628            enriched_components: 0,
1629            enrichment_coverage: 0.0,
1630        };
1631        assert!(!metrics.has_data());
1632        assert!(metrics.quality_score().is_none());
1633    }
1634
1635    #[test]
1636    fn test_lifecycle_with_eol_penalty() {
1637        let metrics = LifecycleMetrics {
1638            eol_components: 2,
1639            stale_components: 0,
1640            deprecated_components: 0,
1641            archived_components: 0,
1642            outdated_components: 0,
1643            enriched_components: 10,
1644            enrichment_coverage: 100.0,
1645        };
1646        // 100 - 30 (2 * 15) = 70
1647        assert_eq!(metrics.quality_score(), Some(70.0));
1648    }
1649
1650    #[test]
1651    fn test_cycle_detection_no_cycles() {
1652        let children: HashMap<&str, Vec<&str>> =
1653            HashMap::from([("a", vec!["b"]), ("b", vec!["c"])]);
1654        let all_nodes = vec!["a", "b", "c"];
1655        assert_eq!(detect_cycles(&all_nodes, &children), 0);
1656    }
1657
1658    #[test]
1659    fn test_cycle_detection_with_cycle() {
1660        let children: HashMap<&str, Vec<&str>> =
1661            HashMap::from([("a", vec!["b"]), ("b", vec!["c"]), ("c", vec!["a"])]);
1662        let all_nodes = vec!["a", "b", "c"];
1663        assert_eq!(detect_cycles(&all_nodes, &children), 1);
1664    }
1665
1666    #[test]
1667    fn test_depth_computation() {
1668        let children: HashMap<&str, Vec<&str>> =
1669            HashMap::from([("root", vec!["a", "b"]), ("a", vec!["c"])]);
1670        let roots = vec!["root"];
1671        let (max_d, avg_d) = compute_depth(&roots, &children);
1672        assert_eq!(max_d, Some(2)); // root -> a -> c
1673        assert!(avg_d.is_some());
1674    }
1675
1676    #[test]
1677    fn test_depth_empty_roots() {
1678        let children: HashMap<&str, Vec<&str>> = HashMap::new();
1679        let roots: Vec<&str> = vec![];
1680        let (max_d, avg_d) = compute_depth(&roots, &children);
1681        assert_eq!(max_d, None);
1682        assert_eq!(avg_d, None);
1683    }
1684
1685    #[test]
1686    fn test_provenance_quality_score() {
1687        let metrics = ProvenanceMetrics {
1688            has_tool_creator: true,
1689            has_tool_version: true,
1690            has_org_creator: true,
1691            has_contact_email: true,
1692            has_serial_number: true,
1693            has_document_name: true,
1694            timestamp_age_days: 10,
1695            is_fresh: true,
1696            has_primary_component: true,
1697            lifecycle_phase: Some("build".to_string()),
1698            completeness_declaration: CompletenessDeclaration::Complete,
1699            has_signature: true,
1700            has_citations: true,
1701            citations_count: 3,
1702        };
1703        // All checks pass for CycloneDX
1704        assert_eq!(metrics.quality_score(true), 100.0);
1705    }
1706
1707    #[test]
1708    fn test_provenance_score_without_cyclonedx() {
1709        let metrics = ProvenanceMetrics {
1710            has_tool_creator: true,
1711            has_tool_version: true,
1712            has_org_creator: true,
1713            has_contact_email: true,
1714            has_serial_number: true,
1715            has_document_name: true,
1716            timestamp_age_days: 10,
1717            is_fresh: true,
1718            has_primary_component: true,
1719            lifecycle_phase: None,
1720            completeness_declaration: CompletenessDeclaration::Complete,
1721            has_signature: true,
1722            has_citations: false,
1723            citations_count: 0,
1724        };
1725        // Lifecycle phase and citations excluded for non-CDX
1726        assert_eq!(metrics.quality_score(false), 100.0);
1727    }
1728
1729    #[test]
1730    fn test_complexity_empty_graph() {
1731        let (simplicity, level, factors) = compute_complexity(0, 0, 0, 0, 0, 0, 0);
1732        assert_eq!(simplicity, 100.0);
1733        assert_eq!(level, ComplexityLevel::Low);
1734        assert_eq!(factors.dependency_volume, 0.0);
1735    }
1736
1737    #[test]
1738    fn test_complexity_single_node() {
1739        // 1 component, no edges, no cycles, 1 orphan, 1 island
1740        let (simplicity, level, _) = compute_complexity(0, 1, 0, 0, 0, 1, 1);
1741        assert!(
1742            simplicity >= 80.0,
1743            "Single node simplicity {simplicity} should be >= 80"
1744        );
1745        assert_eq!(level, ComplexityLevel::Low);
1746    }
1747
1748    #[test]
1749    fn test_complexity_monotonic_edges() {
1750        // More edges should never increase simplicity
1751        let (s1, _, _) = compute_complexity(5, 10, 2, 3, 0, 1, 1);
1752        let (s2, _, _) = compute_complexity(20, 10, 2, 3, 0, 1, 1);
1753        assert!(
1754            s2 <= s1,
1755            "More edges should not increase simplicity: {s2} vs {s1}"
1756        );
1757    }
1758
1759    #[test]
1760    fn test_complexity_monotonic_cycles() {
1761        let (s1, _, _) = compute_complexity(10, 10, 2, 3, 0, 1, 1);
1762        let (s2, _, _) = compute_complexity(10, 10, 2, 3, 3, 1, 1);
1763        assert!(
1764            s2 <= s1,
1765            "More cycles should not increase simplicity: {s2} vs {s1}"
1766        );
1767    }
1768
1769    #[test]
1770    fn test_complexity_monotonic_depth() {
1771        let (s1, _, _) = compute_complexity(10, 10, 2, 3, 0, 1, 1);
1772        let (s2, _, _) = compute_complexity(10, 10, 10, 3, 0, 1, 1);
1773        assert!(
1774            s2 <= s1,
1775            "More depth should not increase simplicity: {s2} vs {s1}"
1776        );
1777    }
1778
1779    #[test]
1780    fn test_complexity_graph_skipped() {
1781        // When graph_analysis_skipped, DependencyMetrics should have None complexity fields.
1782        // We test compute_complexity separately; the from_sbom integration handles the None case.
1783        let (simplicity, _, _) = compute_complexity(100, 50, 5, 10, 2, 5, 3);
1784        assert!(simplicity >= 0.0 && simplicity <= 100.0);
1785    }
1786
1787    #[test]
1788    fn test_complexity_level_bands() {
1789        assert_eq!(ComplexityLevel::from_score(100.0), ComplexityLevel::Low);
1790        assert_eq!(ComplexityLevel::from_score(75.0), ComplexityLevel::Low);
1791        assert_eq!(ComplexityLevel::from_score(74.0), ComplexityLevel::Moderate);
1792        assert_eq!(ComplexityLevel::from_score(50.0), ComplexityLevel::Moderate);
1793        assert_eq!(ComplexityLevel::from_score(49.0), ComplexityLevel::High);
1794        assert_eq!(ComplexityLevel::from_score(25.0), ComplexityLevel::High);
1795        assert_eq!(ComplexityLevel::from_score(24.0), ComplexityLevel::VeryHigh);
1796        assert_eq!(ComplexityLevel::from_score(0.0), ComplexityLevel::VeryHigh);
1797    }
1798
1799    #[test]
1800    fn test_completeness_declaration_display() {
1801        assert_eq!(CompletenessDeclaration::Complete.to_string(), "complete");
1802        assert_eq!(
1803            CompletenessDeclaration::IncompleteFirstPartyOnly.to_string(),
1804            "incomplete (first-party only)"
1805        );
1806        assert_eq!(CompletenessDeclaration::Unknown.to_string(), "unknown");
1807    }
1808}
sbom_tools/quality/metrics.rs

sbom_tools/quality/
metrics.rs