1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct FuzzyMatchConfig {
8 pub threshold: f64,
10 pub levenshtein_weight: f64,
12 pub jaro_winkler_weight: f64,
14 pub use_aliases: bool,
16 pub use_ecosystem_rules: bool,
18 pub max_candidates: usize,
20 #[serde(default)]
22 pub field_weights: Option<MultiFieldWeights>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct MultiFieldWeights {
34 pub name: f64,
36 pub version: f64,
38 pub ecosystem: f64,
40 pub licenses: f64,
42 pub supplier: f64,
44 pub group: f64,
46
47 #[serde(default)]
51 pub ecosystem_mismatch_penalty: f64,
52 #[serde(default = "default_true")]
54 pub version_divergence_enabled: bool,
55 #[serde(default = "default_version_major_penalty")]
57 pub version_major_penalty: f64,
58 #[serde(default = "default_version_minor_penalty")]
60 pub version_minor_penalty: f64,
61}
62
63const fn default_true() -> bool {
64 true
65}
66
67const fn default_version_major_penalty() -> f64 {
68 0.10
69}
70
71const fn default_version_minor_penalty() -> f64 {
72 0.02
73}
74
75impl MultiFieldWeights {
76 #[must_use]
78 pub const fn name_focused() -> Self {
79 Self {
80 name: 0.80,
81 version: 0.05,
82 ecosystem: 0.10,
83 licenses: 0.03,
84 supplier: 0.01,
85 group: 0.01,
86 ecosystem_mismatch_penalty: -0.15,
87 version_divergence_enabled: true,
88 version_major_penalty: 0.10,
89 version_minor_penalty: 0.02,
90 }
91 }
92
93 #[must_use]
95 pub const fn balanced() -> Self {
96 Self {
97 name: 0.60,
98 version: 0.10,
99 ecosystem: 0.15,
100 licenses: 0.08,
101 supplier: 0.04,
102 group: 0.03,
103 ecosystem_mismatch_penalty: -0.15, version_divergence_enabled: true,
105 version_major_penalty: 0.10,
106 version_minor_penalty: 0.02,
107 }
108 }
109
110 #[must_use]
112 pub const fn security_focused() -> Self {
113 Self {
114 name: 0.50,
115 version: 0.20,
116 ecosystem: 0.20,
117 licenses: 0.05,
118 supplier: 0.03,
119 group: 0.02,
120 ecosystem_mismatch_penalty: -0.25, version_divergence_enabled: true,
122 version_major_penalty: 0.15, version_minor_penalty: 0.03,
124 }
125 }
126
127 #[must_use]
132 pub const fn legacy() -> Self {
133 Self {
134 name: 0.60,
135 version: 0.10,
136 ecosystem: 0.15,
137 licenses: 0.08,
138 supplier: 0.04,
139 group: 0.03,
140 ecosystem_mismatch_penalty: 0.0, version_divergence_enabled: false, version_major_penalty: 0.0,
143 version_minor_penalty: 0.0,
144 }
145 }
146
147 #[must_use]
150 pub fn is_normalized(&self) -> bool {
151 let sum =
152 self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
153 (sum - 1.0).abs() < 0.001
154 }
155
156 pub fn normalize(&mut self) {
159 let sum =
160 self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
161 if sum > 0.0 {
162 self.name /= sum;
163 self.version /= sum;
164 self.ecosystem /= sum;
165 self.licenses /= sum;
166 self.supplier /= sum;
167 self.group /= sum;
168 }
169 }
170}
171
172impl Default for MultiFieldWeights {
173 fn default() -> Self {
174 Self::balanced()
175 }
176}
177
178impl FuzzyMatchConfig {
179 #[must_use]
181 pub const fn strict() -> Self {
182 Self {
183 threshold: 0.95,
184 levenshtein_weight: 0.5,
185 jaro_winkler_weight: 0.5,
186 use_aliases: true,
187 use_ecosystem_rules: true,
188 max_candidates: 100,
189 field_weights: None, }
191 }
192
193 #[must_use]
195 pub const fn balanced() -> Self {
196 Self {
197 threshold: 0.85,
198 levenshtein_weight: 0.4,
199 jaro_winkler_weight: 0.6,
200 use_aliases: true,
201 use_ecosystem_rules: true,
202 max_candidates: 500,
203 field_weights: None, }
205 }
206
207 #[must_use]
209 pub const fn permissive() -> Self {
210 Self {
211 threshold: 0.70,
212 levenshtein_weight: 0.3,
213 jaro_winkler_weight: 0.7,
214 use_aliases: true,
215 use_ecosystem_rules: true,
216 max_candidates: 1000,
217 field_weights: None, }
219 }
220
221 #[must_use]
223 pub const fn with_multi_field(mut self, weights: MultiFieldWeights) -> Self {
224 self.field_weights = Some(weights);
225 self
226 }
227
228 #[must_use]
230 pub const fn with_threshold(mut self, threshold: f64) -> Self {
231 self.threshold = threshold;
232 self
233 }
234
235 #[must_use]
237 pub const fn strict_multi_field() -> Self {
238 Self::strict().with_multi_field(MultiFieldWeights::security_focused())
239 }
240
241 #[must_use]
243 pub const fn balanced_multi_field() -> Self {
244 Self::balanced().with_multi_field(MultiFieldWeights::balanced())
245 }
246
247 #[must_use]
253 pub fn from_preset(name: &str) -> Option<Self> {
254 match name.to_lowercase().as_str() {
255 "strict" => Some(Self::strict()),
256 "balanced" => Some(Self::balanced()),
257 "permissive" => Some(Self::permissive()),
258 "strict-multi" | "strict_multi" => Some(Self::strict_multi_field()),
259 "balanced-multi" | "balanced_multi" => Some(Self::balanced_multi_field()),
260 _ => None,
261 }
262 }
263}
264
265impl Default for FuzzyMatchConfig {
266 fn default() -> Self {
267 Self::balanced()
268 }
269}
270
271#[derive(Debug, Clone, Serialize, Deserialize)]
277pub struct CrossEcosystemConfig {
278 pub enabled: bool,
280 pub min_score: f64,
282 pub score_penalty: f64,
284 pub max_candidates: usize,
286 pub verified_only: bool,
288}
289
290impl Default for CrossEcosystemConfig {
291 fn default() -> Self {
292 Self {
293 enabled: true,
294 min_score: 0.80,
295 score_penalty: 0.10,
296 max_candidates: 10,
297 verified_only: false,
298 }
299 }
300}
301
302impl CrossEcosystemConfig {
303 #[must_use]
305 pub fn disabled() -> Self {
306 Self {
307 enabled: false,
308 ..Default::default()
309 }
310 }
311
312 #[must_use]
314 pub const fn strict() -> Self {
315 Self {
316 enabled: true,
317 min_score: 0.90,
318 score_penalty: 0.15,
319 max_candidates: 5,
320 verified_only: true,
321 }
322 }
323
324 #[must_use]
326 pub const fn permissive() -> Self {
327 Self {
328 enabled: true,
329 min_score: 0.70,
330 score_penalty: 0.05,
331 max_candidates: 20,
332 verified_only: false,
333 }
334 }
335}