1use crate::matching::{ComponentMatcher, FuzzyMatchConfig, FuzzyMatcher};
24use crate::model::{Component, NormalizedSbom};
25
26#[derive(Debug, Clone)]
28pub struct AdaptiveThresholdConfig {
29 pub min_threshold: f64,
31 pub max_threshold: f64,
33 pub max_iterations: usize,
35 pub target_match_ratio: Option<f64>,
37 pub min_samples: usize,
39 pub precision: f64,
41}
42
43impl Default for AdaptiveThresholdConfig {
44 fn default() -> Self {
45 Self {
46 min_threshold: 0.50,
47 max_threshold: 0.99,
48 max_iterations: 20,
49 target_match_ratio: None, min_samples: 10,
51 precision: 0.01,
52 }
53 }
54}
55
56impl AdaptiveThresholdConfig {
57 #[must_use]
59 pub fn for_target_ratio(ratio: f64) -> Self {
60 Self {
61 target_match_ratio: Some(ratio.clamp(0.0, 1.0)),
62 ..Default::default()
63 }
64 }
65
66 #[must_use]
68 pub fn otsu() -> Self {
69 Self {
70 target_match_ratio: None,
71 ..Default::default()
72 }
73 }
74}
75
76#[derive(Debug, Clone)]
78pub struct AdaptiveThresholdResult {
79 pub threshold: f64,
81 pub method: AdaptiveMethod,
83 pub samples: usize,
85 pub score_stats: ScoreStats,
87 pub match_ratio: f64,
89 pub confidence: f64,
91}
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum AdaptiveMethod {
96 TargetRatio,
98 Otsu,
100 Default,
102}
103
104#[derive(Debug, Clone)]
106pub struct ScoreStats {
107 pub min: f64,
109 pub max: f64,
111 pub mean: f64,
113 pub std_dev: f64,
115 pub median: f64,
117 pub exact_matches: usize,
119 pub zero_scores: usize,
121}
122
123impl ScoreStats {
124 #[must_use]
126 pub fn from_scores(scores: &[f64]) -> Self {
127 if scores.is_empty() {
128 return Self {
129 min: 0.0,
130 max: 0.0,
131 mean: 0.0,
132 std_dev: 0.0,
133 median: 0.0,
134 exact_matches: 0,
135 zero_scores: 0,
136 };
137 }
138
139 let mut sorted = scores.to_vec();
140 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
141
142 let min = sorted[0];
143 let max = sorted[sorted.len() - 1];
144 let mean = scores.iter().sum::<f64>() / scores.len() as f64;
145 let median = if sorted.len().is_multiple_of(2) {
146 f64::midpoint(sorted[sorted.len() / 2 - 1], sorted[sorted.len() / 2])
147 } else {
148 sorted[sorted.len() / 2]
149 };
150
151 let variance =
152 scores.iter().map(|&s| (s - mean).powi(2)).sum::<f64>() / scores.len() as f64;
153 let std_dev = variance.sqrt();
154
155 let exact_matches = scores.iter().filter(|&&s| s >= 0.9999).count();
156 let zero_scores = scores.iter().filter(|&&s| s < 0.0001).count();
157
158 Self {
159 min,
160 max,
161 mean,
162 std_dev,
163 median,
164 exact_matches,
165 zero_scores,
166 }
167 }
168}
169
170pub struct AdaptiveThreshold {
172 config: AdaptiveThresholdConfig,
173}
174
175impl AdaptiveThreshold {
176 #[must_use]
178 pub const fn new(config: AdaptiveThresholdConfig) -> Self {
179 Self { config }
180 }
181
182 #[must_use]
184 pub fn compute_threshold(
185 &self,
186 old_sbom: &NormalizedSbom,
187 new_sbom: &NormalizedSbom,
188 matcher: &FuzzyMatcher,
189 ) -> AdaptiveThresholdResult {
190 let scores = self.sample_scores(old_sbom, new_sbom, matcher);
192
193 if scores.len() < self.config.min_samples {
194 return AdaptiveThresholdResult {
196 threshold: matcher.config().threshold,
197 method: AdaptiveMethod::Default,
198 samples: scores.len(),
199 score_stats: ScoreStats::from_scores(&scores),
200 match_ratio: 0.0,
201 confidence: 0.0,
202 };
203 }
204
205 let stats = ScoreStats::from_scores(&scores);
206
207 let (threshold, method) = self.config.target_match_ratio.map_or_else(
209 || {
210 let t = self.otsu_threshold(&scores);
211 (t, AdaptiveMethod::Otsu)
212 },
213 |target_ratio| {
214 let t = self.binary_search_threshold(&scores, target_ratio);
215 (t, AdaptiveMethod::TargetRatio)
216 },
217 );
218
219 let threshold = threshold.clamp(self.config.min_threshold, self.config.max_threshold);
221
222 let match_count = scores.iter().filter(|&&s| s >= threshold).count();
224 let match_ratio = match_count as f64 / scores.len() as f64;
225
226 let confidence = self.estimate_confidence(&scores, &stats);
228
229 AdaptiveThresholdResult {
230 threshold,
231 method,
232 samples: scores.len(),
233 score_stats: stats,
234 match_ratio,
235 confidence,
236 }
237 }
238
239 fn sample_scores(
241 &self,
242 old_sbom: &NormalizedSbom,
243 new_sbom: &NormalizedSbom,
244 matcher: &FuzzyMatcher,
245 ) -> Vec<f64> {
246 let mut scores = Vec::new();
247 let max_samples = 1000; let old_components: Vec<&Component> = old_sbom.components.values().collect();
250 let new_components: Vec<&Component> = new_sbom.components.values().collect();
251
252 for old_comp in old_components.iter().take(max_samples) {
254 let best_score = new_components
255 .iter()
256 .map(|new_comp| matcher.match_score(old_comp, new_comp))
257 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
258 .unwrap_or(0.0);
259
260 scores.push(best_score);
261 }
262
263 scores
264 }
265
266 fn binary_search_threshold(&self, scores: &[f64], target_ratio: f64) -> f64 {
268 let mut low = self.config.min_threshold;
269 let mut high = self.config.max_threshold;
270
271 for _ in 0..self.config.max_iterations {
272 let mid = f64::midpoint(low, high);
273 let match_count = scores.iter().filter(|&&s| s >= mid).count();
274 let ratio = match_count as f64 / scores.len() as f64;
275
276 if (ratio - target_ratio).abs() < self.config.precision {
277 return mid;
278 }
279
280 if ratio > target_ratio {
281 low = mid;
283 } else {
284 high = mid;
286 }
287 }
288
289 f64::midpoint(low, high)
290 }
291
292 fn otsu_threshold(&self, scores: &[f64]) -> f64 {
294 let num_bins = 100;
296 let mut histogram = vec![0usize; num_bins];
297
298 for &score in scores {
299 let bin = ((score * (num_bins - 1) as f64) as usize).min(num_bins - 1);
300 histogram[bin] += 1;
301 }
302
303 let total = scores.len() as f64;
304 let mut sum_total = 0.0;
305 for (i, &count) in histogram.iter().enumerate() {
306 sum_total += i as f64 * count as f64;
307 }
308
309 let mut first_optimal_bin = 0;
310 let mut last_optimal_bin = 0;
311 let mut best_variance = 0.0;
312 let mut sum_background = 0.0;
313 let mut weight_background = 0.0;
314 let variance_tolerance = 1e-6; for (i, &count) in histogram.iter().enumerate() {
317 weight_background += count as f64;
318 if weight_background == 0.0 {
319 continue;
320 }
321
322 let weight_foreground = total - weight_background;
323 if weight_foreground == 0.0 {
324 break;
325 }
326
327 sum_background += i as f64 * count as f64;
328 let mean_background = sum_background / weight_background;
329 let mean_foreground = (sum_total - sum_background) / weight_foreground;
330
331 let between_variance =
332 weight_background * weight_foreground * (mean_background - mean_foreground).powi(2);
333
334 if between_variance > best_variance + variance_tolerance {
335 best_variance = between_variance;
337 first_optimal_bin = i;
338 last_optimal_bin = i;
339 } else if (between_variance - best_variance).abs() <= variance_tolerance {
340 last_optimal_bin = i;
342 }
343 }
344
345 let middle_bin = usize::midpoint(first_optimal_bin, last_optimal_bin);
348 (middle_bin as f64 + 0.5) / num_bins as f64
349 }
350
351 fn estimate_confidence(&self, scores: &[f64], stats: &ScoreStats) -> f64 {
353 let sample_factor = (scores.len() as f64 / 100.0).min(1.0);
360 let distribution_factor = (stats.std_dev * 3.0).min(1.0);
361 let exact_match_factor = if stats.exact_matches > 0 { 0.9 } else { 0.5 };
362 let zero_score_penalty = if stats.zero_scores == scores.len() {
363 0.0
364 } else {
365 1.0
366 };
367
368 (sample_factor * 0.3
369 + distribution_factor * 0.3
370 + exact_match_factor * 0.2
371 + zero_score_penalty * 0.2)
372 .clamp(0.0, 1.0)
373 }
374}
375
376impl Default for AdaptiveThreshold {
377 fn default() -> Self {
378 Self::new(AdaptiveThresholdConfig::default())
379 }
380}
381
382pub trait AdaptiveMatching {
384 fn with_adaptive_threshold(
386 old_sbom: &NormalizedSbom,
387 new_sbom: &NormalizedSbom,
388 base_config: FuzzyMatchConfig,
389 ) -> (FuzzyMatcher, AdaptiveThresholdResult);
390
391 fn adapt_threshold(
393 &self,
394 old_sbom: &NormalizedSbom,
395 new_sbom: &NormalizedSbom,
396 ) -> AdaptiveThresholdResult;
397}
398
399impl AdaptiveMatching for FuzzyMatcher {
400 fn with_adaptive_threshold(
401 old_sbom: &NormalizedSbom,
402 new_sbom: &NormalizedSbom,
403 base_config: FuzzyMatchConfig,
404 ) -> (FuzzyMatcher, AdaptiveThresholdResult) {
405 let base_matcher = Self::new(base_config.clone());
406 let adjuster = AdaptiveThreshold::default();
407 let result = adjuster.compute_threshold(old_sbom, new_sbom, &base_matcher);
408
409 let mut adapted_config = base_config;
411 adapted_config.threshold = result.threshold;
412 let adapted_matcher = Self::new(adapted_config);
413
414 (adapted_matcher, result)
415 }
416
417 fn adapt_threshold(
418 &self,
419 old_sbom: &NormalizedSbom,
420 new_sbom: &NormalizedSbom,
421 ) -> AdaptiveThresholdResult {
422 let adjuster = AdaptiveThreshold::default();
423 adjuster.compute_threshold(old_sbom, new_sbom, self)
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430 use crate::model::DocumentMetadata;
431
432 fn make_component(name: &str) -> Component {
433 Component::new(name.to_string(), format!("id-{}", name))
434 }
435
436 fn make_sbom_with_components(names: &[&str]) -> NormalizedSbom {
437 let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
438 for name in names {
439 sbom.add_component(make_component(name));
440 }
441 sbom
442 }
443
444 #[test]
445 fn test_score_stats_computation() {
446 let scores = vec![0.0, 0.3, 0.5, 0.7, 1.0];
447 let stats = ScoreStats::from_scores(&scores);
448
449 assert_eq!(stats.min, 0.0);
450 assert_eq!(stats.max, 1.0);
451 assert!((stats.mean - 0.5).abs() < 0.01);
452 assert_eq!(stats.median, 0.5);
453 assert_eq!(stats.exact_matches, 1);
454 assert_eq!(stats.zero_scores, 1);
455 }
456
457 #[test]
458 fn test_score_stats_empty() {
459 let scores: Vec<f64> = vec![];
460 let stats = ScoreStats::from_scores(&scores);
461
462 assert_eq!(stats.min, 0.0);
463 assert_eq!(stats.max, 0.0);
464 assert_eq!(stats.mean, 0.0);
465 }
466
467 #[test]
468 fn test_adaptive_threshold_config() {
469 let config = AdaptiveThresholdConfig::for_target_ratio(0.7);
470 assert_eq!(config.target_match_ratio, Some(0.7));
471
472 let config = AdaptiveThresholdConfig::otsu();
473 assert!(config.target_match_ratio.is_none());
474 }
475
476 #[test]
477 fn test_adaptive_threshold_with_similar_sboms() {
478 let old = make_sbom_with_components(&[
479 "lodash", "react", "express", "axios", "moment", "webpack", "babel", "eslint",
480 "prettier", "jest",
481 ]);
482 let new = make_sbom_with_components(&[
483 "lodash", "react", "express", "axios", "dayjs", "webpack", "babel", "eslint", "prettier", "vitest", ]);
486
487 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
488 let adjuster = AdaptiveThreshold::new(AdaptiveThresholdConfig::otsu());
489 let result = adjuster.compute_threshold(&old, &new, &matcher);
490
491 assert!(result.threshold >= 0.5 && result.threshold <= 0.99);
492 assert!(result.samples >= 10);
493 assert!(result.confidence > 0.0);
494 }
495
496 #[test]
497 fn test_adaptive_threshold_target_ratio() {
498 let old = make_sbom_with_components(&[
499 "package-a",
500 "package-b",
501 "package-c",
502 "package-d",
503 "package-e",
504 "package-f",
505 "package-g",
506 "package-h",
507 "package-i",
508 "package-j",
509 ]);
510 let new = make_sbom_with_components(&[
511 "package-a",
512 "package-b",
513 "package-c",
514 "different-1",
515 "different-2",
516 "different-3",
517 "different-4",
518 "different-5",
519 "different-6",
520 "different-7",
521 ]);
522
523 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
524 let config = AdaptiveThresholdConfig::for_target_ratio(0.3); let adjuster = AdaptiveThreshold::new(config);
526 let result = adjuster.compute_threshold(&old, &new, &matcher);
527
528 assert_eq!(result.method, AdaptiveMethod::TargetRatio);
529 }
530
531 #[test]
532 fn test_adaptive_matching_trait() {
533 let old = make_sbom_with_components(&["lodash", "express", "react", "vue", "angular"]);
534 let new = make_sbom_with_components(&["lodash", "express", "react", "svelte", "solid"]);
535
536 let base_config = FuzzyMatchConfig::balanced();
537 let (adapted_matcher, result) =
538 FuzzyMatcher::with_adaptive_threshold(&old, &new, base_config);
539
540 assert!((adapted_matcher.config().threshold - result.threshold).abs() < 0.001);
542 }
543
544 #[test]
545 fn test_binary_search_threshold() {
546 let scores: Vec<f64> = (0..100).map(|i| i as f64 / 100.0).collect();
547 let adjuster = AdaptiveThreshold::new(AdaptiveThresholdConfig::for_target_ratio(0.5));
548
549 let threshold = adjuster.binary_search_threshold(&scores, 0.5);
550
551 assert!((threshold - 0.5).abs() < 0.1);
553 }
554
555 #[test]
556 fn test_otsu_threshold_bimodal() {
557 let mut scores = vec![0.1; 50];
560 scores.extend(std::iter::repeat_n(0.9, 50));
562
563 let adjuster = AdaptiveThreshold::default();
564 let threshold = adjuster.otsu_threshold(&scores);
565
566 assert!(
569 threshold > 0.2 && threshold < 0.8,
570 "Threshold {} should be between peaks (0.2-0.8)",
571 threshold
572 );
573 }
574}