1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct FuzzyMatchConfig {
8 pub threshold: f64,
10 pub levenshtein_weight: f64,
12 pub jaro_winkler_weight: f64,
14 pub use_aliases: bool,
16 pub use_ecosystem_rules: bool,
18 pub max_candidates: usize,
20 #[serde(default)]
22 pub field_weights: Option<MultiFieldWeights>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct MultiFieldWeights {
34 pub name: f64,
36 pub version: f64,
38 pub ecosystem: f64,
40 pub licenses: f64,
42 pub supplier: f64,
44 pub group: f64,
46
47 #[serde(default)]
50 pub ecosystem_mismatch_penalty: f64,
51 #[serde(default = "default_true")]
53 pub version_divergence_enabled: bool,
54 #[serde(default = "default_version_major_penalty")]
56 pub version_major_penalty: f64,
57 #[serde(default = "default_version_minor_penalty")]
59 pub version_minor_penalty: f64,
60}
61
62const fn default_true() -> bool {
63 true
64}
65
66const fn default_version_major_penalty() -> f64 {
67 0.10
68}
69
70const fn default_version_minor_penalty() -> f64 {
71 0.02
72}
73
74impl MultiFieldWeights {
75 #[must_use]
77 pub const fn name_focused() -> Self {
78 Self {
79 name: 0.80,
80 version: 0.05,
81 ecosystem: 0.10,
82 licenses: 0.03,
83 supplier: 0.01,
84 group: 0.01,
85 ecosystem_mismatch_penalty: -0.15,
86 version_divergence_enabled: true,
87 version_major_penalty: 0.10,
88 version_minor_penalty: 0.02,
89 }
90 }
91
92 #[must_use]
94 pub const fn balanced() -> Self {
95 Self {
96 name: 0.60,
97 version: 0.10,
98 ecosystem: 0.15,
99 licenses: 0.08,
100 supplier: 0.04,
101 group: 0.03,
102 ecosystem_mismatch_penalty: -0.15, version_divergence_enabled: true,
104 version_major_penalty: 0.10,
105 version_minor_penalty: 0.02,
106 }
107 }
108
109 #[must_use]
111 pub const fn security_focused() -> Self {
112 Self {
113 name: 0.50,
114 version: 0.20,
115 ecosystem: 0.20,
116 licenses: 0.05,
117 supplier: 0.03,
118 group: 0.02,
119 ecosystem_mismatch_penalty: -0.25, version_divergence_enabled: true,
121 version_major_penalty: 0.15, version_minor_penalty: 0.03,
123 }
124 }
125
126 #[must_use]
131 pub const fn legacy() -> Self {
132 Self {
133 name: 0.60,
134 version: 0.10,
135 ecosystem: 0.15,
136 licenses: 0.08,
137 supplier: 0.04,
138 group: 0.03,
139 ecosystem_mismatch_penalty: 0.0, version_divergence_enabled: false, version_major_penalty: 0.0,
142 version_minor_penalty: 0.0,
143 }
144 }
145
146 #[must_use]
149 pub fn is_normalized(&self) -> bool {
150 let sum =
151 self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
152 (sum - 1.0).abs() < 0.001
153 }
154
155 pub fn normalize(&mut self) {
158 let sum =
159 self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
160 if sum > 0.0 {
161 self.name /= sum;
162 self.version /= sum;
163 self.ecosystem /= sum;
164 self.licenses /= sum;
165 self.supplier /= sum;
166 self.group /= sum;
167 }
168 }
169}
170
171impl Default for MultiFieldWeights {
172 fn default() -> Self {
173 Self::balanced()
174 }
175}
176
177impl FuzzyMatchConfig {
178 #[must_use]
180 pub const fn strict() -> Self {
181 Self {
182 threshold: 0.95,
183 levenshtein_weight: 0.5,
184 jaro_winkler_weight: 0.5,
185 use_aliases: true,
186 use_ecosystem_rules: true,
187 max_candidates: 100,
188 field_weights: None, }
190 }
191
192 #[must_use]
194 pub const fn balanced() -> Self {
195 Self {
196 threshold: 0.85,
197 levenshtein_weight: 0.4,
198 jaro_winkler_weight: 0.6,
199 use_aliases: true,
200 use_ecosystem_rules: true,
201 max_candidates: 500,
202 field_weights: None, }
204 }
205
206 #[must_use]
208 pub const fn permissive() -> Self {
209 Self {
210 threshold: 0.70,
211 levenshtein_weight: 0.3,
212 jaro_winkler_weight: 0.7,
213 use_aliases: true,
214 use_ecosystem_rules: true,
215 max_candidates: 1000,
216 field_weights: None, }
218 }
219
220 #[must_use]
222 pub const fn with_multi_field(mut self, weights: MultiFieldWeights) -> Self {
223 self.field_weights = Some(weights);
224 self
225 }
226
227 #[must_use]
229 pub const fn with_threshold(mut self, threshold: f64) -> Self {
230 self.threshold = threshold;
231 self
232 }
233
234 #[must_use]
236 pub const fn strict_multi_field() -> Self {
237 Self::strict().with_multi_field(MultiFieldWeights::security_focused())
238 }
239
240 #[must_use]
242 pub const fn balanced_multi_field() -> Self {
243 Self::balanced().with_multi_field(MultiFieldWeights::balanced())
244 }
245
246 #[must_use]
252 pub fn from_preset(name: &str) -> Option<Self> {
253 match name.to_lowercase().as_str() {
254 "strict" => Some(Self::strict()),
255 "balanced" => Some(Self::balanced()),
256 "permissive" => Some(Self::permissive()),
257 "strict-multi" | "strict_multi" => Some(Self::strict_multi_field()),
258 "balanced-multi" | "balanced_multi" => Some(Self::balanced_multi_field()),
259 _ => None,
260 }
261 }
262}
263
264impl Default for FuzzyMatchConfig {
265 fn default() -> Self {
266 Self::balanced()
267 }
268}
269
270#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct CrossEcosystemConfig {
277 pub enabled: bool,
279 pub min_score: f64,
281 pub score_penalty: f64,
283 pub max_candidates: usize,
285 pub verified_only: bool,
287}
288
289impl Default for CrossEcosystemConfig {
290 fn default() -> Self {
291 Self {
292 enabled: true,
293 min_score: 0.80,
294 score_penalty: 0.10,
295 max_candidates: 10,
296 verified_only: false,
297 }
298 }
299}
300
301impl CrossEcosystemConfig {
302 #[must_use]
304 pub fn disabled() -> Self {
305 Self {
306 enabled: false,
307 ..Default::default()
308 }
309 }
310
311 #[must_use]
313 pub const fn strict() -> Self {
314 Self {
315 enabled: true,
316 min_score: 0.90,
317 score_penalty: 0.15,
318 max_candidates: 5,
319 verified_only: true,
320 }
321 }
322
323 #[must_use]
325 pub const fn permissive() -> Self {
326 Self {
327 enabled: true,
328 min_score: 0.70,
329 score_penalty: 0.05,
330 max_candidates: 20,
331 verified_only: false,
332 }
333 }
334}