Skip to main content

sbom_tools/matching/
scoring.rs

1//! Multi-field scoring and version divergence calculations.
2//!
3//! This module provides graduated version comparison and the
4//! `MultiFieldScoreResult` type used by multi-field weighted matching.
5
6use super::config;
7
8/// Compute version divergence score using semver distance.
9///
10/// Returns a graduated score based on how different the versions are:
11/// - Exact match: 1.0
12/// - Same major.minor: 0.8 - (`patch_diff` * 0.01), min 0.5
13/// - Same major: 0.5 - (`minor_diff` * `minor_penalty`), min 0.2
14/// - Different major: 0.3 - (`major_diff` * `major_penalty`), min 0.0
15#[must_use]
16pub fn compute_version_divergence_score(
17    version_a: &Option<String>,
18    version_b: &Option<String>,
19    weights: &config::MultiFieldWeights,
20) -> f64 {
21    match (version_a, version_b) {
22        (Some(va), Some(vb)) if va == vb => 1.0,
23        (None, None) => 0.5, // Both missing = neutral
24        (Some(va), Some(vb)) => {
25            // Parse semver components
26            let parts_a = parse_semver_parts(va);
27            let parts_b = parse_semver_parts(vb);
28
29            if let (Some(ref pa), Some(ref pb)) = (parts_a, parts_b) {
30                let (maj_a, min_a, patch_a) = pa.triple();
31                let (maj_b, min_b, patch_b) = pb.triple();
32
33                let score = if maj_a == maj_b && min_a == min_b {
34                    // Same major.minor - small penalty for patch difference
35                    let patch_diff =
36                        (i64::from(patch_a) - i64::from(patch_b)).unsigned_abs() as f64;
37                    patch_diff.mul_add(-0.01, 0.8).max(0.5)
38                } else if maj_a == maj_b {
39                    // Same major - moderate penalty for minor difference
40                    let minor_diff = (i64::from(min_a) - i64::from(min_b)).unsigned_abs() as f64;
41                    minor_diff
42                        .mul_add(-weights.version_minor_penalty, 0.5)
43                        .max(0.2)
44                } else {
45                    // Different major - larger penalty
46                    let major_diff = (i64::from(maj_a) - i64::from(maj_b)).unsigned_abs() as f64;
47                    major_diff
48                        .mul_add(-weights.version_major_penalty, 0.3)
49                        .max(0.0)
50                };
51
52                // Apply pre-release adjustment
53                let pre_release_penalty = match (&pa.pre_release, &pb.pre_release) {
54                    (None, None) => 0.0,
55                    (Some(a), Some(b)) if a == b => 0.0,
56                    (Some(_), Some(_)) => 0.05,
57                    (None, Some(_)) | (Some(_), None) => 0.15,
58                };
59                (score - pre_release_penalty).max(0.0)
60            } else {
61                // Couldn't parse semver - fall back to string comparison
62                // Give partial credit if versions share a common prefix
63                let common_prefix_len = va
64                    .chars()
65                    .zip(vb.chars())
66                    .take_while(|(a, b)| a == b)
67                    .count();
68                let max_len = va.len().max(vb.len());
69                if max_len > 0 && common_prefix_len > 0 {
70                    (common_prefix_len as f64 / max_len as f64 * 0.5).min(0.4)
71                } else {
72                    0.1 // Different versions with no common prefix
73                }
74            }
75        }
76        _ => 0.0, // One missing
77    }
78}
79
80/// Parsed semver components with optional pre-release tag.
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct SemverParts {
83    pub major: u32,
84    pub minor: u32,
85    pub patch: u32,
86    /// Pre-release tag (e.g., "alpha.1", "beta", "rc.2"). None for release versions.
87    pub pre_release: Option<String>,
88}
89
90impl SemverParts {
91    /// Whether this is a stable release (no pre-release tag).
92    #[must_use]
93    pub const fn is_release(&self) -> bool {
94        self.pre_release.is_none()
95    }
96
97    /// Tuple of (major, minor, patch) for backwards compatibility.
98    #[must_use]
99    pub const fn triple(&self) -> (u32, u32, u32) {
100        (self.major, self.minor, self.patch)
101    }
102}
103
104/// Parse a version string into semver components with optional pre-release tag.
105/// Returns None if the version cannot be parsed.
106#[must_use]
107pub fn parse_semver_parts(version: &str) -> Option<SemverParts> {
108    // Strip common prefixes like 'v' or 'V'
109    let version = version.trim_start_matches(['v', 'V']);
110
111    // Separate pre-release and build metadata
112    // Format: major.minor.patch[-pre_release][+build]
113    let (version_part, pre_release) = match version.split_once('-') {
114        Some((v, rest)) => {
115            // Strip build metadata after '+'
116            let pre = rest.split('+').next().unwrap_or(rest);
117            (v, Some(pre.to_string()))
118        }
119        None => {
120            // Strip build metadata even without pre-release
121            let v = version.split('+').next().unwrap_or(version);
122            (v, None)
123        }
124    };
125
126    let mut parts = version_part.split('.');
127    let major: u32 = parts.next()?.parse().ok()?;
128    let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
129    let patch: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
130
131    Some(SemverParts {
132        major,
133        minor,
134        patch,
135        pre_release,
136    })
137}
138
139/// Result of multi-field scoring with per-field breakdown.
140#[derive(Debug, Clone, Default)]
141pub struct MultiFieldScoreResult {
142    /// Total weighted score (0.0 - 1.0)
143    pub total: f64,
144    /// Name similarity score
145    pub name_score: f64,
146    /// Version match score
147    pub version_score: f64,
148    /// Ecosystem match score
149    pub ecosystem_score: f64,
150    /// License overlap score (Jaccard)
151    pub license_score: f64,
152    /// Supplier match score
153    pub supplier_score: f64,
154    /// Group/namespace match score
155    pub group_score: f64,
156}
157
158impl MultiFieldScoreResult {
159    /// Get a human-readable summary of the score breakdown.
160    #[must_use]
161    pub fn summary(&self) -> String {
162        format!(
163            "Total: {:.2} (name: {:.2}, version: {:.2}, ecosystem: {:.2}, licenses: {:.2}, supplier: {:.2}, group: {:.2})",
164            self.total,
165            self.name_score,
166            self.version_score,
167            self.ecosystem_score,
168            self.license_score,
169            self.supplier_score,
170            self.group_score
171        )
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn test_parse_semver_basic() {
181        let p = parse_semver_parts("1.2.3").unwrap();
182        assert_eq!(p.triple(), (1, 2, 3));
183        assert!(p.is_release());
184    }
185
186    #[test]
187    fn test_parse_semver_with_prefix() {
188        let p = parse_semver_parts("v1.2.3").unwrap();
189        assert_eq!(p.triple(), (1, 2, 3));
190    }
191
192    #[test]
193    fn test_parse_semver_major_only() {
194        let p = parse_semver_parts("3").unwrap();
195        assert_eq!(p.triple(), (3, 0, 0));
196    }
197
198    #[test]
199    fn test_parse_semver_invalid() {
200        assert_eq!(parse_semver_parts("abc"), None);
201    }
202
203    #[test]
204    fn test_parse_semver_prerelease() {
205        let p = parse_semver_parts("1.2.3-alpha.1").unwrap();
206        assert_eq!(p.triple(), (1, 2, 3));
207        assert_eq!(p.pre_release.as_deref(), Some("alpha.1"));
208        assert!(!p.is_release());
209    }
210
211    #[test]
212    fn test_parse_semver_prerelease_rc() {
213        let p = parse_semver_parts("2.0.0-rc.1").unwrap();
214        assert_eq!(p.triple(), (2, 0, 0));
215        assert_eq!(p.pre_release.as_deref(), Some("rc.1"));
216    }
217
218    #[test]
219    fn test_parse_semver_build_metadata_stripped() {
220        let p = parse_semver_parts("1.2.3+build.456").unwrap();
221        assert_eq!(p.triple(), (1, 2, 3));
222        assert!(p.pre_release.is_none());
223        assert!(p.is_release());
224    }
225
226    #[test]
227    fn test_parse_semver_prerelease_with_build() {
228        let p = parse_semver_parts("1.0.0-beta.2+build.123").unwrap();
229        assert_eq!(p.triple(), (1, 0, 0));
230        assert_eq!(p.pre_release.as_deref(), Some("beta.2"));
231    }
232
233    #[test]
234    fn test_version_divergence_prerelease_penalty() {
235        let weights = config::MultiFieldWeights::default();
236        // Release vs pre-release should score lower than release vs release
237        let release_pair = compute_version_divergence_score(
238            &Some("1.2.3".into()),
239            &Some("1.2.4".into()),
240            &weights,
241        );
242        let mixed_pair = compute_version_divergence_score(
243            &Some("1.2.3".into()),
244            &Some("1.2.3-alpha".into()),
245            &weights,
246        );
247        assert!(
248            mixed_pair < release_pair,
249            "Pre-release mismatch ({mixed_pair}) should score lower than patch diff ({release_pair})"
250        );
251    }
252
253    #[test]
254    fn test_version_divergence_same_prerelease() {
255        let weights = config::MultiFieldWeights::default();
256        let score = compute_version_divergence_score(
257            &Some("1.2.3-alpha.1".into()),
258            &Some("1.2.3-alpha.1".into()),
259            &weights,
260        );
261        assert_eq!(
262            score, 1.0,
263            "Identical pre-release versions should score 1.0"
264        );
265    }
266
267    #[test]
268    fn test_version_divergence_exact() {
269        let weights = config::MultiFieldWeights::default();
270        let v1 = Some("1.2.3".to_string());
271        let v2 = Some("1.2.3".to_string());
272        assert_eq!(compute_version_divergence_score(&v1, &v2, &weights), 1.0);
273    }
274
275    #[test]
276    fn test_version_divergence_same_major_minor() {
277        let weights = config::MultiFieldWeights::default();
278        let v1 = Some("1.2.3".to_string());
279        let v2 = Some("1.2.5".to_string());
280        let score = compute_version_divergence_score(&v1, &v2, &weights);
281        assert!((0.5..=0.8).contains(&score));
282    }
283
284    #[test]
285    fn test_version_divergence_none() {
286        let weights = config::MultiFieldWeights::default();
287        assert_eq!(
288            compute_version_divergence_score(&None, &None, &weights),
289            0.5
290        );
291    }
292
293    #[test]
294    fn test_multi_field_score_result_summary() {
295        let result = MultiFieldScoreResult {
296            total: 0.85,
297            name_score: 0.9,
298            version_score: 1.0,
299            ecosystem_score: 1.0,
300            license_score: 0.5,
301            supplier_score: 0.0,
302            group_score: 1.0,
303        };
304        let summary = result.summary();
305        assert!(summary.contains("0.85"));
306    }
307}