1use crate::matching::{ComponentMatcher, FuzzyMatchConfig, FuzzyMatcher};
24use crate::model::{Component, NormalizedSbom};
25
26#[derive(Debug, Clone)]
28pub struct AdaptiveThresholdConfig {
29 pub min_threshold: f64,
31 pub max_threshold: f64,
33 pub max_iterations: usize,
35 pub target_match_ratio: Option<f64>,
37 pub min_samples: usize,
39 pub precision: f64,
41}
42
43impl Default for AdaptiveThresholdConfig {
44 fn default() -> Self {
45 Self {
46 min_threshold: 0.50,
47 max_threshold: 0.99,
48 max_iterations: 20,
49 target_match_ratio: None, min_samples: 10,
51 precision: 0.01,
52 }
53 }
54}
55
56impl AdaptiveThresholdConfig {
57 pub fn for_target_ratio(ratio: f64) -> Self {
59 Self {
60 target_match_ratio: Some(ratio.clamp(0.0, 1.0)),
61 ..Default::default()
62 }
63 }
64
65 pub fn otsu() -> Self {
67 Self {
68 target_match_ratio: None,
69 ..Default::default()
70 }
71 }
72}
73
74#[derive(Debug, Clone)]
76pub struct AdaptiveThresholdResult {
77 pub threshold: f64,
79 pub method: AdaptiveMethod,
81 pub samples: usize,
83 pub score_stats: ScoreStats,
85 pub match_ratio: f64,
87 pub confidence: f64,
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
93pub enum AdaptiveMethod {
94 TargetRatio,
96 Otsu,
98 Default,
100}
101
102#[derive(Debug, Clone)]
104pub struct ScoreStats {
105 pub min: f64,
107 pub max: f64,
109 pub mean: f64,
111 pub std_dev: f64,
113 pub median: f64,
115 pub exact_matches: usize,
117 pub zero_scores: usize,
119}
120
121impl ScoreStats {
122 pub fn from_scores(scores: &[f64]) -> Self {
124 if scores.is_empty() {
125 return Self {
126 min: 0.0,
127 max: 0.0,
128 mean: 0.0,
129 std_dev: 0.0,
130 median: 0.0,
131 exact_matches: 0,
132 zero_scores: 0,
133 };
134 }
135
136 let mut sorted = scores.to_vec();
137 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
138
139 let min = sorted[0];
140 let max = sorted[sorted.len() - 1];
141 let mean = scores.iter().sum::<f64>() / scores.len() as f64;
142 let median = if sorted.len().is_multiple_of(2) {
143 (sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) / 2.0
144 } else {
145 sorted[sorted.len() / 2]
146 };
147
148 let variance =
149 scores.iter().map(|&s| (s - mean).powi(2)).sum::<f64>() / scores.len() as f64;
150 let std_dev = variance.sqrt();
151
152 let exact_matches = scores.iter().filter(|&&s| s >= 0.9999).count();
153 let zero_scores = scores.iter().filter(|&&s| s < 0.0001).count();
154
155 Self {
156 min,
157 max,
158 mean,
159 std_dev,
160 median,
161 exact_matches,
162 zero_scores,
163 }
164 }
165}
166
167pub struct AdaptiveThreshold {
169 config: AdaptiveThresholdConfig,
170}
171
172impl AdaptiveThreshold {
173 pub fn new(config: AdaptiveThresholdConfig) -> Self {
175 Self { config }
176 }
177
178 pub fn compute_threshold(
180 &self,
181 old_sbom: &NormalizedSbom,
182 new_sbom: &NormalizedSbom,
183 matcher: &FuzzyMatcher,
184 ) -> AdaptiveThresholdResult {
185 let scores = self.sample_scores(old_sbom, new_sbom, matcher);
187
188 if scores.len() < self.config.min_samples {
189 return AdaptiveThresholdResult {
191 threshold: matcher.config().threshold,
192 method: AdaptiveMethod::Default,
193 samples: scores.len(),
194 score_stats: ScoreStats::from_scores(&scores),
195 match_ratio: 0.0,
196 confidence: 0.0,
197 };
198 }
199
200 let stats = ScoreStats::from_scores(&scores);
201
202 let (threshold, method) = if let Some(target_ratio) = self.config.target_match_ratio {
204 let t = self.binary_search_threshold(&scores, target_ratio);
205 (t, AdaptiveMethod::TargetRatio)
206 } else {
207 let t = self.otsu_threshold(&scores);
208 (t, AdaptiveMethod::Otsu)
209 };
210
211 let threshold = threshold.clamp(self.config.min_threshold, self.config.max_threshold);
213
214 let match_count = scores.iter().filter(|&&s| s >= threshold).count();
216 let match_ratio = match_count as f64 / scores.len() as f64;
217
218 let confidence = self.estimate_confidence(&scores, &stats);
220
221 AdaptiveThresholdResult {
222 threshold,
223 method,
224 samples: scores.len(),
225 score_stats: stats,
226 match_ratio,
227 confidence,
228 }
229 }
230
231 fn sample_scores(
233 &self,
234 old_sbom: &NormalizedSbom,
235 new_sbom: &NormalizedSbom,
236 matcher: &FuzzyMatcher,
237 ) -> Vec<f64> {
238 let mut scores = Vec::new();
239 let max_samples = 1000; let old_components: Vec<&Component> = old_sbom.components.values().collect();
242 let new_components: Vec<&Component> = new_sbom.components.values().collect();
243
244 for old_comp in old_components.iter().take(max_samples) {
246 let best_score = new_components
247 .iter()
248 .map(|new_comp| matcher.match_score(old_comp, new_comp))
249 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
250 .unwrap_or(0.0);
251
252 scores.push(best_score);
253 }
254
255 scores
256 }
257
258 fn binary_search_threshold(&self, scores: &[f64], target_ratio: f64) -> f64 {
260 let mut low = self.config.min_threshold;
261 let mut high = self.config.max_threshold;
262
263 for _ in 0..self.config.max_iterations {
264 let mid = (low + high) / 2.0;
265 let match_count = scores.iter().filter(|&&s| s >= mid).count();
266 let ratio = match_count as f64 / scores.len() as f64;
267
268 if (ratio - target_ratio).abs() < self.config.precision {
269 return mid;
270 }
271
272 if ratio > target_ratio {
273 low = mid;
275 } else {
276 high = mid;
278 }
279 }
280
281 (low + high) / 2.0
282 }
283
284 fn otsu_threshold(&self, scores: &[f64]) -> f64 {
286 let num_bins = 100;
288 let mut histogram = vec![0usize; num_bins];
289
290 for &score in scores {
291 let bin = ((score * (num_bins - 1) as f64) as usize).min(num_bins - 1);
292 histogram[bin] += 1;
293 }
294
295 let total = scores.len() as f64;
296 let mut sum_total = 0.0;
297 for (i, &count) in histogram.iter().enumerate() {
298 sum_total += i as f64 * count as f64;
299 }
300
301 let mut first_optimal_bin = 0;
302 let mut last_optimal_bin = 0;
303 let mut best_variance = 0.0;
304 let mut sum_background = 0.0;
305 let mut weight_background = 0.0;
306 let variance_tolerance = 1e-6; for (i, &count) in histogram.iter().enumerate() {
309 weight_background += count as f64;
310 if weight_background == 0.0 {
311 continue;
312 }
313
314 let weight_foreground = total - weight_background;
315 if weight_foreground == 0.0 {
316 break;
317 }
318
319 sum_background += i as f64 * count as f64;
320 let mean_background = sum_background / weight_background;
321 let mean_foreground = (sum_total - sum_background) / weight_foreground;
322
323 let between_variance =
324 weight_background * weight_foreground * (mean_background - mean_foreground).powi(2);
325
326 if between_variance > best_variance + variance_tolerance {
327 best_variance = between_variance;
329 first_optimal_bin = i;
330 last_optimal_bin = i;
331 } else if (between_variance - best_variance).abs() <= variance_tolerance {
332 last_optimal_bin = i;
334 }
335 }
336
337 let middle_bin = (first_optimal_bin + last_optimal_bin) / 2;
340 (middle_bin as f64 + 0.5) / num_bins as f64
341 }
342
343 fn estimate_confidence(&self, scores: &[f64], stats: &ScoreStats) -> f64 {
345 let sample_factor = (scores.len() as f64 / 100.0).min(1.0);
352 let distribution_factor = (stats.std_dev * 3.0).min(1.0);
353 let exact_match_factor = if stats.exact_matches > 0 { 0.9 } else { 0.5 };
354 let zero_score_penalty = if stats.zero_scores == scores.len() {
355 0.0
356 } else {
357 1.0
358 };
359
360 (sample_factor * 0.3
361 + distribution_factor * 0.3
362 + exact_match_factor * 0.2
363 + zero_score_penalty * 0.2)
364 .clamp(0.0, 1.0)
365 }
366}
367
368impl Default for AdaptiveThreshold {
369 fn default() -> Self {
370 Self::new(AdaptiveThresholdConfig::default())
371 }
372}
373
374pub trait AdaptiveMatching {
376 fn with_adaptive_threshold(
378 old_sbom: &NormalizedSbom,
379 new_sbom: &NormalizedSbom,
380 base_config: FuzzyMatchConfig,
381 ) -> (FuzzyMatcher, AdaptiveThresholdResult);
382
383 fn adapt_threshold(
385 &self,
386 old_sbom: &NormalizedSbom,
387 new_sbom: &NormalizedSbom,
388 ) -> AdaptiveThresholdResult;
389}
390
391impl AdaptiveMatching for FuzzyMatcher {
392 fn with_adaptive_threshold(
393 old_sbom: &NormalizedSbom,
394 new_sbom: &NormalizedSbom,
395 base_config: FuzzyMatchConfig,
396 ) -> (FuzzyMatcher, AdaptiveThresholdResult) {
397 let base_matcher = FuzzyMatcher::new(base_config.clone());
398 let adjuster = AdaptiveThreshold::default();
399 let result = adjuster.compute_threshold(old_sbom, new_sbom, &base_matcher);
400
401 let mut adapted_config = base_config;
403 adapted_config.threshold = result.threshold;
404 let adapted_matcher = FuzzyMatcher::new(adapted_config);
405
406 (adapted_matcher, result)
407 }
408
409 fn adapt_threshold(
410 &self,
411 old_sbom: &NormalizedSbom,
412 new_sbom: &NormalizedSbom,
413 ) -> AdaptiveThresholdResult {
414 let adjuster = AdaptiveThreshold::default();
415 adjuster.compute_threshold(old_sbom, new_sbom, self)
416 }
417}
418
419#[cfg(test)]
420mod tests {
421 use super::*;
422 use crate::model::DocumentMetadata;
423
424 fn make_component(name: &str) -> Component {
425 Component::new(name.to_string(), format!("id-{}", name))
426 }
427
428 fn make_sbom_with_components(names: &[&str]) -> NormalizedSbom {
429 let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
430 for name in names {
431 sbom.add_component(make_component(name));
432 }
433 sbom
434 }
435
436 #[test]
437 fn test_score_stats_computation() {
438 let scores = vec![0.0, 0.3, 0.5, 0.7, 1.0];
439 let stats = ScoreStats::from_scores(&scores);
440
441 assert_eq!(stats.min, 0.0);
442 assert_eq!(stats.max, 1.0);
443 assert!((stats.mean - 0.5).abs() < 0.01);
444 assert_eq!(stats.median, 0.5);
445 assert_eq!(stats.exact_matches, 1);
446 assert_eq!(stats.zero_scores, 1);
447 }
448
449 #[test]
450 fn test_score_stats_empty() {
451 let scores: Vec<f64> = vec![];
452 let stats = ScoreStats::from_scores(&scores);
453
454 assert_eq!(stats.min, 0.0);
455 assert_eq!(stats.max, 0.0);
456 assert_eq!(stats.mean, 0.0);
457 }
458
459 #[test]
460 fn test_adaptive_threshold_config() {
461 let config = AdaptiveThresholdConfig::for_target_ratio(0.7);
462 assert_eq!(config.target_match_ratio, Some(0.7));
463
464 let config = AdaptiveThresholdConfig::otsu();
465 assert!(config.target_match_ratio.is_none());
466 }
467
468 #[test]
469 fn test_adaptive_threshold_with_similar_sboms() {
470 let old = make_sbom_with_components(&[
471 "lodash", "react", "express", "axios", "moment", "webpack", "babel", "eslint",
472 "prettier", "jest",
473 ]);
474 let new = make_sbom_with_components(&[
475 "lodash", "react", "express", "axios", "dayjs", "webpack", "babel", "eslint", "prettier", "vitest", ]);
478
479 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
480 let adjuster = AdaptiveThreshold::new(AdaptiveThresholdConfig::otsu());
481 let result = adjuster.compute_threshold(&old, &new, &matcher);
482
483 assert!(result.threshold >= 0.5 && result.threshold <= 0.99);
484 assert!(result.samples >= 10);
485 assert!(result.confidence > 0.0);
486 }
487
488 #[test]
489 fn test_adaptive_threshold_target_ratio() {
490 let old = make_sbom_with_components(&[
491 "package-a",
492 "package-b",
493 "package-c",
494 "package-d",
495 "package-e",
496 "package-f",
497 "package-g",
498 "package-h",
499 "package-i",
500 "package-j",
501 ]);
502 let new = make_sbom_with_components(&[
503 "package-a",
504 "package-b",
505 "package-c",
506 "different-1",
507 "different-2",
508 "different-3",
509 "different-4",
510 "different-5",
511 "different-6",
512 "different-7",
513 ]);
514
515 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
516 let config = AdaptiveThresholdConfig::for_target_ratio(0.3); let adjuster = AdaptiveThreshold::new(config);
518 let result = adjuster.compute_threshold(&old, &new, &matcher);
519
520 assert_eq!(result.method, AdaptiveMethod::TargetRatio);
521 }
522
523 #[test]
524 fn test_adaptive_matching_trait() {
525 let old = make_sbom_with_components(&["lodash", "express", "react", "vue", "angular"]);
526 let new = make_sbom_with_components(&["lodash", "express", "react", "svelte", "solid"]);
527
528 let base_config = FuzzyMatchConfig::balanced();
529 let (adapted_matcher, result) =
530 FuzzyMatcher::with_adaptive_threshold(&old, &new, base_config);
531
532 assert!((adapted_matcher.config().threshold - result.threshold).abs() < 0.001);
534 }
535
536 #[test]
537 fn test_binary_search_threshold() {
538 let scores: Vec<f64> = (0..100).map(|i| i as f64 / 100.0).collect();
539 let adjuster = AdaptiveThreshold::new(AdaptiveThresholdConfig::for_target_ratio(0.5));
540
541 let threshold = adjuster.binary_search_threshold(&scores, 0.5);
542
543 assert!((threshold - 0.5).abs() < 0.1);
545 }
546
547 #[test]
548 fn test_otsu_threshold_bimodal() {
549 let mut scores = Vec::new();
552
553 for _ in 0..50 {
555 scores.push(0.1);
556 }
557 for _ in 0..50 {
559 scores.push(0.9);
560 }
561
562 let adjuster = AdaptiveThreshold::default();
563 let threshold = adjuster.otsu_threshold(&scores);
564
565 assert!(
568 threshold > 0.2 && threshold < 0.8,
569 "Threshold {} should be between peaks (0.2-0.8)",
570 threshold
571 );
572 }
573}