1use crate::matching::{ComponentMatcher, FuzzyMatchConfig, FuzzyMatcher};
24use crate::model::{Component, NormalizedSbom};
25
26#[derive(Debug, Clone)]
28pub struct AdaptiveThresholdConfig {
29 pub min_threshold: f64,
31 pub max_threshold: f64,
33 pub max_iterations: usize,
35 pub target_match_ratio: Option<f64>,
37 pub min_samples: usize,
39 pub precision: f64,
41}
42
43impl Default for AdaptiveThresholdConfig {
44 fn default() -> Self {
45 Self {
46 min_threshold: 0.50,
47 max_threshold: 0.99,
48 max_iterations: 20,
49 target_match_ratio: None, min_samples: 10,
51 precision: 0.01,
52 }
53 }
54}
55
56impl AdaptiveThresholdConfig {
57 #[must_use]
59 pub fn for_target_ratio(ratio: f64) -> Self {
60 Self {
61 target_match_ratio: Some(ratio.clamp(0.0, 1.0)),
62 ..Default::default()
63 }
64 }
65
66 #[must_use]
68 pub fn otsu() -> Self {
69 Self {
70 target_match_ratio: None,
71 ..Default::default()
72 }
73 }
74}
75
76#[derive(Debug, Clone)]
78pub struct AdaptiveThresholdResult {
79 pub threshold: f64,
81 pub method: AdaptiveMethod,
83 pub samples: usize,
85 pub score_stats: ScoreStats,
87 pub match_ratio: f64,
89 pub confidence: f64,
91}
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95#[non_exhaustive]
96pub enum AdaptiveMethod {
97 TargetRatio,
99 Otsu,
101 Default,
103}
104
105#[derive(Debug, Clone)]
107pub struct ScoreStats {
108 pub min: f64,
110 pub max: f64,
112 pub mean: f64,
114 pub std_dev: f64,
116 pub median: f64,
118 pub exact_matches: usize,
120 pub zero_scores: usize,
122}
123
124impl ScoreStats {
125 #[must_use]
127 pub fn from_scores(scores: &[f64]) -> Self {
128 if scores.is_empty() {
129 return Self {
130 min: 0.0,
131 max: 0.0,
132 mean: 0.0,
133 std_dev: 0.0,
134 median: 0.0,
135 exact_matches: 0,
136 zero_scores: 0,
137 };
138 }
139
140 let mut sorted = scores.to_vec();
141 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
142
143 let min = sorted[0];
144 let max = sorted[sorted.len() - 1];
145 let mean = scores.iter().sum::<f64>() / scores.len() as f64;
146 let median = if sorted.len().is_multiple_of(2) {
147 f64::midpoint(sorted[sorted.len() / 2 - 1], sorted[sorted.len() / 2])
148 } else {
149 sorted[sorted.len() / 2]
150 };
151
152 let variance =
153 scores.iter().map(|&s| (s - mean).powi(2)).sum::<f64>() / scores.len() as f64;
154 let std_dev = variance.sqrt();
155
156 let exact_matches = scores.iter().filter(|&&s| s >= 0.9999).count();
157 let zero_scores = scores.iter().filter(|&&s| s < 0.0001).count();
158
159 Self {
160 min,
161 max,
162 mean,
163 std_dev,
164 median,
165 exact_matches,
166 zero_scores,
167 }
168 }
169}
170
171pub struct AdaptiveThreshold {
173 config: AdaptiveThresholdConfig,
174}
175
176impl AdaptiveThreshold {
177 #[must_use]
179 pub const fn new(config: AdaptiveThresholdConfig) -> Self {
180 Self { config }
181 }
182
183 #[must_use]
185 pub fn compute_threshold(
186 &self,
187 old_sbom: &NormalizedSbom,
188 new_sbom: &NormalizedSbom,
189 matcher: &FuzzyMatcher,
190 ) -> AdaptiveThresholdResult {
191 let scores = self.sample_scores(old_sbom, new_sbom, matcher);
193
194 if scores.len() < self.config.min_samples {
195 return AdaptiveThresholdResult {
197 threshold: matcher.config().threshold,
198 method: AdaptiveMethod::Default,
199 samples: scores.len(),
200 score_stats: ScoreStats::from_scores(&scores),
201 match_ratio: 0.0,
202 confidence: 0.0,
203 };
204 }
205
206 let stats = ScoreStats::from_scores(&scores);
207
208 let (threshold, method) = self.config.target_match_ratio.map_or_else(
210 || {
211 let t = self.otsu_threshold(&scores);
212 (t, AdaptiveMethod::Otsu)
213 },
214 |target_ratio| {
215 let t = self.binary_search_threshold(&scores, target_ratio);
216 (t, AdaptiveMethod::TargetRatio)
217 },
218 );
219
220 let threshold = threshold.clamp(self.config.min_threshold, self.config.max_threshold);
222
223 let match_count = scores.iter().filter(|&&s| s >= threshold).count();
225 let match_ratio = match_count as f64 / scores.len() as f64;
226
227 let confidence = self.estimate_confidence(&scores, &stats);
229
230 AdaptiveThresholdResult {
231 threshold,
232 method,
233 samples: scores.len(),
234 score_stats: stats,
235 match_ratio,
236 confidence,
237 }
238 }
239
240 fn sample_scores(
242 &self,
243 old_sbom: &NormalizedSbom,
244 new_sbom: &NormalizedSbom,
245 matcher: &FuzzyMatcher,
246 ) -> Vec<f64> {
247 let mut scores = Vec::new();
248 let max_samples = 1000; let old_components: Vec<&Component> = old_sbom.components.values().collect();
251 let new_components: Vec<&Component> = new_sbom.components.values().collect();
252
253 for old_comp in old_components.iter().take(max_samples) {
255 let best_score = new_components
256 .iter()
257 .map(|new_comp| matcher.match_score(old_comp, new_comp))
258 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
259 .unwrap_or(0.0);
260
261 scores.push(best_score);
262 }
263
264 scores
265 }
266
267 fn binary_search_threshold(&self, scores: &[f64], target_ratio: f64) -> f64 {
269 let mut low = self.config.min_threshold;
270 let mut high = self.config.max_threshold;
271
272 for _ in 0..self.config.max_iterations {
273 let mid = f64::midpoint(low, high);
274 let match_count = scores.iter().filter(|&&s| s >= mid).count();
275 let ratio = match_count as f64 / scores.len() as f64;
276
277 if (ratio - target_ratio).abs() < self.config.precision {
278 return mid;
279 }
280
281 if ratio > target_ratio {
282 low = mid;
284 } else {
285 high = mid;
287 }
288 }
289
290 f64::midpoint(low, high)
291 }
292
293 fn otsu_threshold(&self, scores: &[f64]) -> f64 {
295 let num_bins = 100;
297 let mut histogram = vec![0usize; num_bins];
298
299 for &score in scores {
300 let bin = ((score * (num_bins - 1) as f64) as usize).min(num_bins - 1);
301 histogram[bin] += 1;
302 }
303
304 let total = scores.len() as f64;
305 let mut sum_total = 0.0;
306 for (i, &count) in histogram.iter().enumerate() {
307 sum_total += i as f64 * count as f64;
308 }
309
310 let mut first_optimal_bin = 0;
311 let mut last_optimal_bin = 0;
312 let mut best_variance = 0.0;
313 let mut sum_background = 0.0;
314 let mut weight_background = 0.0;
315 let variance_tolerance = 1e-6; for (i, &count) in histogram.iter().enumerate() {
318 weight_background += count as f64;
319 if weight_background == 0.0 {
320 continue;
321 }
322
323 let weight_foreground = total - weight_background;
324 if weight_foreground == 0.0 {
325 break;
326 }
327
328 sum_background += i as f64 * count as f64;
329 let mean_background = sum_background / weight_background;
330 let mean_foreground = (sum_total - sum_background) / weight_foreground;
331
332 let between_variance =
333 weight_background * weight_foreground * (mean_background - mean_foreground).powi(2);
334
335 if between_variance > best_variance + variance_tolerance {
336 best_variance = between_variance;
338 first_optimal_bin = i;
339 last_optimal_bin = i;
340 } else if (between_variance - best_variance).abs() <= variance_tolerance {
341 last_optimal_bin = i;
343 }
344 }
345
346 let middle_bin = usize::midpoint(first_optimal_bin, last_optimal_bin);
349 (middle_bin as f64 + 0.5) / num_bins as f64
350 }
351
352 fn estimate_confidence(&self, scores: &[f64], stats: &ScoreStats) -> f64 {
354 let sample_factor = (scores.len() as f64 / 100.0).min(1.0);
361 let distribution_factor = (stats.std_dev * 3.0).min(1.0);
362 let exact_match_factor = if stats.exact_matches > 0 { 0.9 } else { 0.5 };
363 let zero_score_penalty = if stats.zero_scores == scores.len() {
364 0.0
365 } else {
366 1.0
367 };
368
369 (sample_factor * 0.3
370 + distribution_factor * 0.3
371 + exact_match_factor * 0.2
372 + zero_score_penalty * 0.2)
373 .clamp(0.0, 1.0)
374 }
375}
376
377impl Default for AdaptiveThreshold {
378 fn default() -> Self {
379 Self::new(AdaptiveThresholdConfig::default())
380 }
381}
382
383pub trait AdaptiveMatching {
385 fn with_adaptive_threshold(
387 old_sbom: &NormalizedSbom,
388 new_sbom: &NormalizedSbom,
389 base_config: FuzzyMatchConfig,
390 ) -> (FuzzyMatcher, AdaptiveThresholdResult);
391
392 fn adapt_threshold(
394 &self,
395 old_sbom: &NormalizedSbom,
396 new_sbom: &NormalizedSbom,
397 ) -> AdaptiveThresholdResult;
398}
399
400impl AdaptiveMatching for FuzzyMatcher {
401 fn with_adaptive_threshold(
402 old_sbom: &NormalizedSbom,
403 new_sbom: &NormalizedSbom,
404 base_config: FuzzyMatchConfig,
405 ) -> (FuzzyMatcher, AdaptiveThresholdResult) {
406 let base_matcher = Self::new(base_config.clone());
407 let adjuster = AdaptiveThreshold::default();
408 let result = adjuster.compute_threshold(old_sbom, new_sbom, &base_matcher);
409
410 let mut adapted_config = base_config;
412 adapted_config.threshold = result.threshold;
413 let adapted_matcher = Self::new(adapted_config);
414
415 (adapted_matcher, result)
416 }
417
418 fn adapt_threshold(
419 &self,
420 old_sbom: &NormalizedSbom,
421 new_sbom: &NormalizedSbom,
422 ) -> AdaptiveThresholdResult {
423 let adjuster = AdaptiveThreshold::default();
424 adjuster.compute_threshold(old_sbom, new_sbom, self)
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431 use crate::model::DocumentMetadata;
432
433 fn make_component(name: &str) -> Component {
434 Component::new(name.to_string(), format!("id-{}", name))
435 }
436
437 fn make_sbom_with_components(names: &[&str]) -> NormalizedSbom {
438 let mut sbom = NormalizedSbom::new(DocumentMetadata::default());
439 for name in names {
440 sbom.add_component(make_component(name));
441 }
442 sbom
443 }
444
445 #[test]
446 fn test_score_stats_computation() {
447 let scores = vec![0.0, 0.3, 0.5, 0.7, 1.0];
448 let stats = ScoreStats::from_scores(&scores);
449
450 assert_eq!(stats.min, 0.0);
451 assert_eq!(stats.max, 1.0);
452 assert!((stats.mean - 0.5).abs() < 0.01);
453 assert_eq!(stats.median, 0.5);
454 assert_eq!(stats.exact_matches, 1);
455 assert_eq!(stats.zero_scores, 1);
456 }
457
458 #[test]
459 fn test_score_stats_empty() {
460 let scores: Vec<f64> = vec![];
461 let stats = ScoreStats::from_scores(&scores);
462
463 assert_eq!(stats.min, 0.0);
464 assert_eq!(stats.max, 0.0);
465 assert_eq!(stats.mean, 0.0);
466 }
467
468 #[test]
469 fn test_adaptive_threshold_config() {
470 let config = AdaptiveThresholdConfig::for_target_ratio(0.7);
471 assert_eq!(config.target_match_ratio, Some(0.7));
472
473 let config = AdaptiveThresholdConfig::otsu();
474 assert!(config.target_match_ratio.is_none());
475 }
476
477 #[test]
478 fn test_adaptive_threshold_with_similar_sboms() {
479 let old = make_sbom_with_components(&[
480 "lodash", "react", "express", "axios", "moment", "webpack", "babel", "eslint",
481 "prettier", "jest",
482 ]);
483 let new = make_sbom_with_components(&[
484 "lodash", "react", "express", "axios", "dayjs", "webpack", "babel", "eslint", "prettier", "vitest", ]);
487
488 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
489 let adjuster = AdaptiveThreshold::new(AdaptiveThresholdConfig::otsu());
490 let result = adjuster.compute_threshold(&old, &new, &matcher);
491
492 assert!(result.threshold >= 0.5 && result.threshold <= 0.99);
493 assert!(result.samples >= 10);
494 assert!(result.confidence > 0.0);
495 }
496
497 #[test]
498 fn test_adaptive_threshold_target_ratio() {
499 let old = make_sbom_with_components(&[
500 "package-a",
501 "package-b",
502 "package-c",
503 "package-d",
504 "package-e",
505 "package-f",
506 "package-g",
507 "package-h",
508 "package-i",
509 "package-j",
510 ]);
511 let new = make_sbom_with_components(&[
512 "package-a",
513 "package-b",
514 "package-c",
515 "different-1",
516 "different-2",
517 "different-3",
518 "different-4",
519 "different-5",
520 "different-6",
521 "different-7",
522 ]);
523
524 let matcher = FuzzyMatcher::new(FuzzyMatchConfig::balanced());
525 let config = AdaptiveThresholdConfig::for_target_ratio(0.3); let adjuster = AdaptiveThreshold::new(config);
527 let result = adjuster.compute_threshold(&old, &new, &matcher);
528
529 assert_eq!(result.method, AdaptiveMethod::TargetRatio);
530 }
531
532 #[test]
533 fn test_adaptive_matching_trait() {
534 let old = make_sbom_with_components(&["lodash", "express", "react", "vue", "angular"]);
535 let new = make_sbom_with_components(&["lodash", "express", "react", "svelte", "solid"]);
536
537 let base_config = FuzzyMatchConfig::balanced();
538 let (adapted_matcher, result) =
539 FuzzyMatcher::with_adaptive_threshold(&old, &new, base_config);
540
541 assert!((adapted_matcher.config().threshold - result.threshold).abs() < 0.001);
543 }
544
545 #[test]
546 fn test_binary_search_threshold() {
547 let scores: Vec<f64> = (0..100).map(|i| i as f64 / 100.0).collect();
548 let adjuster = AdaptiveThreshold::new(AdaptiveThresholdConfig::for_target_ratio(0.5));
549
550 let threshold = adjuster.binary_search_threshold(&scores, 0.5);
551
552 assert!((threshold - 0.5).abs() < 0.1);
554 }
555
556 #[test]
557 fn test_otsu_threshold_bimodal() {
558 let mut scores = vec![0.1; 50];
561 scores.extend(std::iter::repeat_n(0.9, 50));
563
564 let adjuster = AdaptiveThreshold::default();
565 let threshold = adjuster.otsu_threshold(&scores);
566
567 assert!(
570 threshold > 0.2 && threshold < 0.8,
571 "Threshold {} should be between peaks (0.2-0.8)",
572 threshold
573 );
574 }
575}