1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct FuzzyMatchConfig {
8 pub threshold: f64,
10 pub levenshtein_weight: f64,
12 pub jaro_winkler_weight: f64,
14 pub use_aliases: bool,
16 pub use_ecosystem_rules: bool,
18 pub max_candidates: usize,
20 #[serde(default)]
22 pub field_weights: Option<MultiFieldWeights>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct MultiFieldWeights {
34 pub name: f64,
36 pub version: f64,
38 pub ecosystem: f64,
40 pub licenses: f64,
42 pub supplier: f64,
44 pub group: f64,
46
47 #[serde(default)]
51 pub ecosystem_mismatch_penalty: f64,
52 #[serde(default = "default_true")]
54 pub version_divergence_enabled: bool,
55 #[serde(default = "default_version_major_penalty")]
57 pub version_major_penalty: f64,
58 #[serde(default = "default_version_minor_penalty")]
60 pub version_minor_penalty: f64,
61}
62
63fn default_true() -> bool {
64 true
65}
66
67fn default_version_major_penalty() -> f64 {
68 0.10
69}
70
71fn default_version_minor_penalty() -> f64 {
72 0.02
73}
74
75impl MultiFieldWeights {
76 pub fn name_focused() -> Self {
78 Self {
79 name: 0.80,
80 version: 0.05,
81 ecosystem: 0.10,
82 licenses: 0.03,
83 supplier: 0.01,
84 group: 0.01,
85 ecosystem_mismatch_penalty: -0.15,
86 version_divergence_enabled: true,
87 version_major_penalty: 0.10,
88 version_minor_penalty: 0.02,
89 }
90 }
91
92 pub fn balanced() -> Self {
94 Self {
95 name: 0.60,
96 version: 0.10,
97 ecosystem: 0.15,
98 licenses: 0.08,
99 supplier: 0.04,
100 group: 0.03,
101 ecosystem_mismatch_penalty: -0.15, version_divergence_enabled: true,
103 version_major_penalty: 0.10,
104 version_minor_penalty: 0.02,
105 }
106 }
107
108 pub fn security_focused() -> Self {
110 Self {
111 name: 0.50,
112 version: 0.20,
113 ecosystem: 0.20,
114 licenses: 0.05,
115 supplier: 0.03,
116 group: 0.02,
117 ecosystem_mismatch_penalty: -0.25, version_divergence_enabled: true,
119 version_major_penalty: 0.15, version_minor_penalty: 0.03,
121 }
122 }
123
124 pub fn legacy() -> Self {
129 Self {
130 name: 0.60,
131 version: 0.10,
132 ecosystem: 0.15,
133 licenses: 0.08,
134 supplier: 0.04,
135 group: 0.03,
136 ecosystem_mismatch_penalty: 0.0, version_divergence_enabled: false, version_major_penalty: 0.0,
139 version_minor_penalty: 0.0,
140 }
141 }
142
143 pub fn is_normalized(&self) -> bool {
146 let sum =
147 self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
148 (sum - 1.0).abs() < 0.001
149 }
150
151 pub fn normalize(&mut self) {
154 let sum =
155 self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
156 if sum > 0.0 {
157 self.name /= sum;
158 self.version /= sum;
159 self.ecosystem /= sum;
160 self.licenses /= sum;
161 self.supplier /= sum;
162 self.group /= sum;
163 }
164 }
165}
166
167impl Default for MultiFieldWeights {
168 fn default() -> Self {
169 Self::balanced()
170 }
171}
172
173impl FuzzyMatchConfig {
174 pub fn strict() -> Self {
176 Self {
177 threshold: 0.95,
178 levenshtein_weight: 0.5,
179 jaro_winkler_weight: 0.5,
180 use_aliases: true,
181 use_ecosystem_rules: true,
182 max_candidates: 100,
183 field_weights: None, }
185 }
186
187 pub fn balanced() -> Self {
189 Self {
190 threshold: 0.85,
191 levenshtein_weight: 0.4,
192 jaro_winkler_weight: 0.6,
193 use_aliases: true,
194 use_ecosystem_rules: true,
195 max_candidates: 500,
196 field_weights: None, }
198 }
199
200 pub fn permissive() -> Self {
202 Self {
203 threshold: 0.70,
204 levenshtein_weight: 0.3,
205 jaro_winkler_weight: 0.7,
206 use_aliases: true,
207 use_ecosystem_rules: true,
208 max_candidates: 1000,
209 field_weights: None, }
211 }
212
213 pub fn with_multi_field(mut self, weights: MultiFieldWeights) -> Self {
215 self.field_weights = Some(weights);
216 self
217 }
218
219 pub fn with_threshold(mut self, threshold: f64) -> Self {
221 self.threshold = threshold;
222 self
223 }
224
225 pub fn strict_multi_field() -> Self {
227 Self::strict().with_multi_field(MultiFieldWeights::security_focused())
228 }
229
230 pub fn balanced_multi_field() -> Self {
232 Self::balanced().with_multi_field(MultiFieldWeights::balanced())
233 }
234
235 pub fn from_preset(name: &str) -> Option<Self> {
241 match name.to_lowercase().as_str() {
242 "strict" => Some(Self::strict()),
243 "balanced" => Some(Self::balanced()),
244 "permissive" => Some(Self::permissive()),
245 "strict-multi" | "strict_multi" => Some(Self::strict_multi_field()),
246 "balanced-multi" | "balanced_multi" => Some(Self::balanced_multi_field()),
247 _ => None,
248 }
249 }
250}
251
252impl Default for FuzzyMatchConfig {
253 fn default() -> Self {
254 Self::balanced()
255 }
256}
257
258#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct CrossEcosystemConfig {
265 pub enabled: bool,
267 pub min_score: f64,
269 pub score_penalty: f64,
271 pub max_candidates: usize,
273 pub verified_only: bool,
275}
276
277impl Default for CrossEcosystemConfig {
278 fn default() -> Self {
279 Self {
280 enabled: true,
281 min_score: 0.80,
282 score_penalty: 0.10,
283 max_candidates: 10,
284 verified_only: false,
285 }
286 }
287}
288
289impl CrossEcosystemConfig {
290 pub fn disabled() -> Self {
292 Self {
293 enabled: false,
294 ..Default::default()
295 }
296 }
297
298 pub fn strict() -> Self {
300 Self {
301 enabled: true,
302 min_score: 0.90,
303 score_penalty: 0.15,
304 max_candidates: 5,
305 verified_only: true,
306 }
307 }
308
309 pub fn permissive() -> Self {
311 Self {
312 enabled: true,
313 min_score: 0.70,
314 score_penalty: 0.05,
315 max_candidates: 20,
316 verified_only: false,
317 }
318 }
319}