Skip to main content

sbom_tools/matching/
scoring.rs

1//! Multi-field scoring and version divergence calculations.
2//!
3//! This module provides graduated version comparison and the
4//! `MultiFieldScoreResult` type used by multi-field weighted matching.
5
6use super::config;
7
8/// Compute version divergence score using semver distance.
9///
10/// Returns a graduated score based on how different the versions are:
11/// - Exact match: 1.0
12/// - Same major.minor: 0.8 - (`patch_diff` * 0.01), min 0.5
13/// - Same major: 0.5 - (`minor_diff` * `minor_penalty`), min 0.2
14/// - Different major: 0.3 - (`major_diff` * `major_penalty`), min 0.0
15#[must_use] 
16pub fn compute_version_divergence_score(
17    version_a: &Option<String>,
18    version_b: &Option<String>,
19    weights: &config::MultiFieldWeights,
20) -> f64 {
21    match (version_a, version_b) {
22        (Some(va), Some(vb)) if va == vb => 1.0,
23        (None, None) => 0.5, // Both missing = neutral
24        (Some(va), Some(vb)) => {
25            // Parse semver components
26            let parts_a = parse_semver_parts(va);
27            let parts_b = parse_semver_parts(vb);
28
29            if let (Some((maj_a, min_a, patch_a)), Some((maj_b, min_b, patch_b))) = (parts_a, parts_b) {
30                if maj_a == maj_b && min_a == min_b {
31                    // Same major.minor - small penalty for patch difference
32                    let patch_diff = (i64::from(patch_a) - i64::from(patch_b)).unsigned_abs() as f64;
33                    patch_diff.mul_add(-0.01, 0.8).max(0.5)
34                } else if maj_a == maj_b {
35                    // Same major - moderate penalty for minor difference
36                    let minor_diff = (i64::from(min_a) - i64::from(min_b)).unsigned_abs() as f64;
37                    minor_diff.mul_add(-weights.version_minor_penalty, 0.5).max(0.2)
38                } else {
39                    // Different major - larger penalty
40                    let major_diff = (i64::from(maj_a) - i64::from(maj_b)).unsigned_abs() as f64;
41                    major_diff.mul_add(-weights.version_major_penalty, 0.3).max(0.0)
42                }
43            } else {
44                // Couldn't parse semver - fall back to string comparison
45                // Give partial credit if versions share a common prefix
46                let common_prefix_len = va
47                    .chars()
48                    .zip(vb.chars())
49                    .take_while(|(a, b)| a == b)
50                    .count();
51                let max_len = va.len().max(vb.len());
52                if max_len > 0 && common_prefix_len > 0 {
53                    (common_prefix_len as f64 / max_len as f64 * 0.5).min(0.4)
54                } else {
55                    0.1 // Different versions with no common prefix
56                }
57            }
58        }
59        _ => 0.0, // One missing
60    }
61}
62
63/// Parse a version string into semver components (major, minor, patch).
64/// Returns None if the version cannot be parsed.
65#[must_use] 
66pub fn parse_semver_parts(version: &str) -> Option<(u32, u32, u32)> {
67    // Strip common prefixes like 'v' or 'V'
68    let version = version.trim_start_matches(['v', 'V']);
69
70    // Split on '.' and try to parse first three components
71    let mut parts = version.split(['.', '-', '+']);
72
73    let major: u32 = parts.next()?.parse().ok()?;
74    let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
75    let patch: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
76
77    Some((major, minor, patch))
78}
79
80/// Result of multi-field scoring with per-field breakdown.
81#[derive(Debug, Clone, Default)]
82pub struct MultiFieldScoreResult {
83    /// Total weighted score (0.0 - 1.0)
84    pub total: f64,
85    /// Name similarity score
86    pub name_score: f64,
87    /// Version match score
88    pub version_score: f64,
89    /// Ecosystem match score
90    pub ecosystem_score: f64,
91    /// License overlap score (Jaccard)
92    pub license_score: f64,
93    /// Supplier match score
94    pub supplier_score: f64,
95    /// Group/namespace match score
96    pub group_score: f64,
97}
98
99impl MultiFieldScoreResult {
100    /// Get a human-readable summary of the score breakdown.
101    #[must_use] 
102    pub fn summary(&self) -> String {
103        format!(
104            "Total: {:.2} (name: {:.2}, version: {:.2}, ecosystem: {:.2}, licenses: {:.2}, supplier: {:.2}, group: {:.2})",
105            self.total,
106            self.name_score,
107            self.version_score,
108            self.ecosystem_score,
109            self.license_score,
110            self.supplier_score,
111            self.group_score
112        )
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn test_parse_semver_basic() {
122        assert_eq!(parse_semver_parts("1.2.3"), Some((1, 2, 3)));
123    }
124
125    #[test]
126    fn test_parse_semver_with_prefix() {
127        assert_eq!(parse_semver_parts("v1.2.3"), Some((1, 2, 3)));
128    }
129
130    #[test]
131    fn test_parse_semver_major_only() {
132        assert_eq!(parse_semver_parts("3"), Some((3, 0, 0)));
133    }
134
135    #[test]
136    fn test_parse_semver_invalid() {
137        assert_eq!(parse_semver_parts("abc"), None);
138    }
139
140    #[test]
141    fn test_version_divergence_exact() {
142        let weights = config::MultiFieldWeights::default();
143        let v1 = Some("1.2.3".to_string());
144        let v2 = Some("1.2.3".to_string());
145        assert_eq!(compute_version_divergence_score(&v1, &v2, &weights), 1.0);
146    }
147
148    #[test]
149    fn test_version_divergence_same_major_minor() {
150        let weights = config::MultiFieldWeights::default();
151        let v1 = Some("1.2.3".to_string());
152        let v2 = Some("1.2.5".to_string());
153        let score = compute_version_divergence_score(&v1, &v2, &weights);
154        assert!((0.5..=0.8).contains(&score));
155    }
156
157    #[test]
158    fn test_version_divergence_none() {
159        let weights = config::MultiFieldWeights::default();
160        assert_eq!(
161            compute_version_divergence_score(&None, &None, &weights),
162            0.5
163        );
164    }
165
166    #[test]
167    fn test_multi_field_score_result_summary() {
168        let result = MultiFieldScoreResult {
169            total: 0.85,
170            name_score: 0.9,
171            version_score: 1.0,
172            ecosystem_score: 1.0,
173            license_score: 0.5,
174            supplier_score: 0.0,
175            group_score: 1.0,
176        };
177        let summary = result.summary();
178        assert!(summary.contains("0.85"));
179    }
180}