Skip to main content

sbom_tools/matching/
scoring.rs

1//! Multi-field scoring and version divergence calculations.
2//!
3//! This module provides graduated version comparison and the
4//! `MultiFieldScoreResult` type used by multi-field weighted matching.
5
6use super::config;
7
8/// Compute version divergence score using semver distance.
9///
10/// Returns a graduated score based on how different the versions are:
11/// - Exact match: 1.0
12/// - Same major.minor: 0.8 - (`patch_diff` * 0.01), min 0.5
13/// - Same major: 0.5 - (`minor_diff` * `minor_penalty`), min 0.2
14/// - Different major: 0.3 - (`major_diff` * `major_penalty`), min 0.0
15#[must_use]
16pub fn compute_version_divergence_score(
17    version_a: &Option<String>,
18    version_b: &Option<String>,
19    weights: &config::MultiFieldWeights,
20) -> f64 {
21    match (version_a, version_b) {
22        (Some(va), Some(vb)) if va == vb => 1.0,
23        (None, None) => 0.5, // Both missing = neutral
24        (Some(va), Some(vb)) => {
25            // Parse semver components
26            let parts_a = parse_semver_parts(va);
27            let parts_b = parse_semver_parts(vb);
28
29            if let (Some((maj_a, min_a, patch_a)), Some((maj_b, min_b, patch_b))) =
30                (parts_a, parts_b)
31            {
32                if maj_a == maj_b && min_a == min_b {
33                    // Same major.minor - small penalty for patch difference
34                    let patch_diff =
35                        (i64::from(patch_a) - i64::from(patch_b)).unsigned_abs() as f64;
36                    patch_diff.mul_add(-0.01, 0.8).max(0.5)
37                } else if maj_a == maj_b {
38                    // Same major - moderate penalty for minor difference
39                    let minor_diff = (i64::from(min_a) - i64::from(min_b)).unsigned_abs() as f64;
40                    minor_diff
41                        .mul_add(-weights.version_minor_penalty, 0.5)
42                        .max(0.2)
43                } else {
44                    // Different major - larger penalty
45                    let major_diff = (i64::from(maj_a) - i64::from(maj_b)).unsigned_abs() as f64;
46                    major_diff
47                        .mul_add(-weights.version_major_penalty, 0.3)
48                        .max(0.0)
49                }
50            } else {
51                // Couldn't parse semver - fall back to string comparison
52                // Give partial credit if versions share a common prefix
53                let common_prefix_len = va
54                    .chars()
55                    .zip(vb.chars())
56                    .take_while(|(a, b)| a == b)
57                    .count();
58                let max_len = va.len().max(vb.len());
59                if max_len > 0 && common_prefix_len > 0 {
60                    (common_prefix_len as f64 / max_len as f64 * 0.5).min(0.4)
61                } else {
62                    0.1 // Different versions with no common prefix
63                }
64            }
65        }
66        _ => 0.0, // One missing
67    }
68}
69
70/// Parse a version string into semver components (major, minor, patch).
71/// Returns None if the version cannot be parsed.
72#[must_use]
73pub fn parse_semver_parts(version: &str) -> Option<(u32, u32, u32)> {
74    // Strip common prefixes like 'v' or 'V'
75    let version = version.trim_start_matches(['v', 'V']);
76
77    // Split on '.' and try to parse first three components
78    let mut parts = version.split(['.', '-', '+']);
79
80    let major: u32 = parts.next()?.parse().ok()?;
81    let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
82    let patch: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
83
84    Some((major, minor, patch))
85}
86
87/// Result of multi-field scoring with per-field breakdown.
88#[derive(Debug, Clone, Default)]
89pub struct MultiFieldScoreResult {
90    /// Total weighted score (0.0 - 1.0)
91    pub total: f64,
92    /// Name similarity score
93    pub name_score: f64,
94    /// Version match score
95    pub version_score: f64,
96    /// Ecosystem match score
97    pub ecosystem_score: f64,
98    /// License overlap score (Jaccard)
99    pub license_score: f64,
100    /// Supplier match score
101    pub supplier_score: f64,
102    /// Group/namespace match score
103    pub group_score: f64,
104}
105
106impl MultiFieldScoreResult {
107    /// Get a human-readable summary of the score breakdown.
108    #[must_use]
109    pub fn summary(&self) -> String {
110        format!(
111            "Total: {:.2} (name: {:.2}, version: {:.2}, ecosystem: {:.2}, licenses: {:.2}, supplier: {:.2}, group: {:.2})",
112            self.total,
113            self.name_score,
114            self.version_score,
115            self.ecosystem_score,
116            self.license_score,
117            self.supplier_score,
118            self.group_score
119        )
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn test_parse_semver_basic() {
129        assert_eq!(parse_semver_parts("1.2.3"), Some((1, 2, 3)));
130    }
131
132    #[test]
133    fn test_parse_semver_with_prefix() {
134        assert_eq!(parse_semver_parts("v1.2.3"), Some((1, 2, 3)));
135    }
136
137    #[test]
138    fn test_parse_semver_major_only() {
139        assert_eq!(parse_semver_parts("3"), Some((3, 0, 0)));
140    }
141
142    #[test]
143    fn test_parse_semver_invalid() {
144        assert_eq!(parse_semver_parts("abc"), None);
145    }
146
147    #[test]
148    fn test_version_divergence_exact() {
149        let weights = config::MultiFieldWeights::default();
150        let v1 = Some("1.2.3".to_string());
151        let v2 = Some("1.2.3".to_string());
152        assert_eq!(compute_version_divergence_score(&v1, &v2, &weights), 1.0);
153    }
154
155    #[test]
156    fn test_version_divergence_same_major_minor() {
157        let weights = config::MultiFieldWeights::default();
158        let v1 = Some("1.2.3".to_string());
159        let v2 = Some("1.2.5".to_string());
160        let score = compute_version_divergence_score(&v1, &v2, &weights);
161        assert!((0.5..=0.8).contains(&score));
162    }
163
164    #[test]
165    fn test_version_divergence_none() {
166        let weights = config::MultiFieldWeights::default();
167        assert_eq!(
168            compute_version_divergence_score(&None, &None, &weights),
169            0.5
170        );
171    }
172
173    #[test]
174    fn test_multi_field_score_result_summary() {
175        let result = MultiFieldScoreResult {
176            total: 0.85,
177            name_score: 0.9,
178            version_score: 1.0,
179            ecosystem_score: 1.0,
180            license_score: 0.5,
181            supplier_score: 0.0,
182            group_score: 1.0,
183        };
184        let summary = result.summary();
185        assert!(summary.contains("0.85"));
186    }
187}