1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3
4const fn default_true() -> bool {
5 true
6}
7
8const fn default_min_tokens() -> usize {
9 50
10}
11
12const fn default_min_lines() -> usize {
13 5
14}
15
16#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
18#[serde(rename_all = "camelCase")]
19pub struct DuplicatesConfig {
20 #[serde(default = "default_true")]
22 pub enabled: bool,
23
24 #[serde(default)]
26 pub mode: DetectionMode,
27
28 #[serde(default = "default_min_tokens")]
30 pub min_tokens: usize,
31
32 #[serde(default = "default_min_lines")]
34 pub min_lines: usize,
35
36 #[serde(default)]
38 pub threshold: f64,
39
40 #[serde(default)]
42 pub ignore: Vec<String>,
43
44 #[serde(default)]
46 pub skip_local: bool,
47
48 #[serde(default)]
54 pub cross_language: bool,
55
56 #[serde(default)]
58 pub normalization: NormalizationConfig,
59}
60
61impl Default for DuplicatesConfig {
62 fn default() -> Self {
63 Self {
64 enabled: true,
65 mode: DetectionMode::default(),
66 min_tokens: default_min_tokens(),
67 min_lines: default_min_lines(),
68 threshold: 0.0,
69 ignore: vec![],
70 skip_local: false,
71 cross_language: false,
72 normalization: NormalizationConfig::default(),
73 }
74 }
75}
76
77#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
83#[serde(rename_all = "camelCase")]
84pub struct NormalizationConfig {
85 #[serde(default, skip_serializing_if = "Option::is_none")]
88 pub ignore_identifiers: Option<bool>,
89
90 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub ignore_string_values: Option<bool>,
94
95 #[serde(default, skip_serializing_if = "Option::is_none")]
98 pub ignore_numeric_values: Option<bool>,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub struct ResolvedNormalization {
104 pub ignore_identifiers: bool,
105 pub ignore_string_values: bool,
106 pub ignore_numeric_values: bool,
107}
108
109impl ResolvedNormalization {
110 #[must_use]
112 pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
113 let (default_ids, default_strings, default_numbers) = match mode {
114 DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
115 DetectionMode::Weak => (false, true, false),
116 DetectionMode::Semantic => (true, true, true),
117 };
118
119 Self {
120 ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
121 ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
122 ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
123 }
124 }
125}
126
127#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
135#[serde(rename_all = "lowercase")]
136pub enum DetectionMode {
137 Strict,
139 #[default]
141 Mild,
142 Weak,
144 Semantic,
146}
147
148impl std::fmt::Display for DetectionMode {
149 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 match self {
151 Self::Strict => write!(f, "strict"),
152 Self::Mild => write!(f, "mild"),
153 Self::Weak => write!(f, "weak"),
154 Self::Semantic => write!(f, "semantic"),
155 }
156 }
157}
158
159impl std::str::FromStr for DetectionMode {
160 type Err = String;
161
162 fn from_str(s: &str) -> Result<Self, Self::Err> {
163 match s.to_lowercase().as_str() {
164 "strict" => Ok(Self::Strict),
165 "mild" => Ok(Self::Mild),
166 "weak" => Ok(Self::Weak),
167 "semantic" => Ok(Self::Semantic),
168 other => Err(format!("unknown detection mode: '{other}'")),
169 }
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176
177 #[test]
180 fn duplicates_config_defaults() {
181 let config = DuplicatesConfig::default();
182 assert!(config.enabled);
183 assert_eq!(config.mode, DetectionMode::Mild);
184 assert_eq!(config.min_tokens, 50);
185 assert_eq!(config.min_lines, 5);
186 assert!((config.threshold - 0.0).abs() < f64::EPSILON);
187 assert!(config.ignore.is_empty());
188 assert!(!config.skip_local);
189 assert!(!config.cross_language);
190 }
191
192 #[test]
195 fn detection_mode_from_str_all_variants() {
196 assert_eq!(
197 "strict".parse::<DetectionMode>().unwrap(),
198 DetectionMode::Strict
199 );
200 assert_eq!(
201 "mild".parse::<DetectionMode>().unwrap(),
202 DetectionMode::Mild
203 );
204 assert_eq!(
205 "weak".parse::<DetectionMode>().unwrap(),
206 DetectionMode::Weak
207 );
208 assert_eq!(
209 "semantic".parse::<DetectionMode>().unwrap(),
210 DetectionMode::Semantic
211 );
212 }
213
214 #[test]
215 fn detection_mode_from_str_case_insensitive() {
216 assert_eq!(
217 "STRICT".parse::<DetectionMode>().unwrap(),
218 DetectionMode::Strict
219 );
220 assert_eq!(
221 "Weak".parse::<DetectionMode>().unwrap(),
222 DetectionMode::Weak
223 );
224 assert_eq!(
225 "SEMANTIC".parse::<DetectionMode>().unwrap(),
226 DetectionMode::Semantic
227 );
228 }
229
230 #[test]
231 fn detection_mode_from_str_unknown() {
232 let err = "foobar".parse::<DetectionMode>().unwrap_err();
233 assert!(err.contains("unknown detection mode"));
234 assert!(err.contains("foobar"));
235 }
236
237 #[test]
240 fn detection_mode_display() {
241 assert_eq!(DetectionMode::Strict.to_string(), "strict");
242 assert_eq!(DetectionMode::Mild.to_string(), "mild");
243 assert_eq!(DetectionMode::Weak.to_string(), "weak");
244 assert_eq!(DetectionMode::Semantic.to_string(), "semantic");
245 }
246
247 #[test]
250 fn resolve_strict_mode_all_false() {
251 let resolved =
252 ResolvedNormalization::resolve(DetectionMode::Strict, &NormalizationConfig::default());
253 assert!(!resolved.ignore_identifiers);
254 assert!(!resolved.ignore_string_values);
255 assert!(!resolved.ignore_numeric_values);
256 }
257
258 #[test]
259 fn resolve_mild_mode_all_false() {
260 let resolved =
261 ResolvedNormalization::resolve(DetectionMode::Mild, &NormalizationConfig::default());
262 assert!(!resolved.ignore_identifiers);
263 assert!(!resolved.ignore_string_values);
264 assert!(!resolved.ignore_numeric_values);
265 }
266
267 #[test]
268 fn resolve_weak_mode_only_strings_true() {
269 let resolved =
270 ResolvedNormalization::resolve(DetectionMode::Weak, &NormalizationConfig::default());
271 assert!(!resolved.ignore_identifiers);
272 assert!(resolved.ignore_string_values);
273 assert!(!resolved.ignore_numeric_values);
274 }
275
276 #[test]
277 fn resolve_semantic_mode_all_true() {
278 let resolved = ResolvedNormalization::resolve(
279 DetectionMode::Semantic,
280 &NormalizationConfig::default(),
281 );
282 assert!(resolved.ignore_identifiers);
283 assert!(resolved.ignore_string_values);
284 assert!(resolved.ignore_numeric_values);
285 }
286
287 #[test]
288 fn resolve_override_forces_true() {
289 let overrides = NormalizationConfig {
291 ignore_identifiers: Some(true),
292 ignore_string_values: None,
293 ignore_numeric_values: None,
294 };
295 let resolved = ResolvedNormalization::resolve(DetectionMode::Strict, &overrides);
296 assert!(resolved.ignore_identifiers);
297 assert!(!resolved.ignore_string_values);
298 assert!(!resolved.ignore_numeric_values);
299 }
300
301 #[test]
302 fn resolve_override_forces_false() {
303 let overrides = NormalizationConfig {
305 ignore_identifiers: Some(false),
306 ignore_string_values: Some(false),
307 ignore_numeric_values: None,
308 };
309 let resolved = ResolvedNormalization::resolve(DetectionMode::Semantic, &overrides);
310 assert!(!resolved.ignore_identifiers);
311 assert!(!resolved.ignore_string_values);
312 assert!(resolved.ignore_numeric_values); }
314
315 #[test]
316 fn resolve_all_overrides_on_weak() {
317 let overrides = NormalizationConfig {
318 ignore_identifiers: Some(true),
319 ignore_string_values: Some(false), ignore_numeric_values: Some(true),
321 };
322 let resolved = ResolvedNormalization::resolve(DetectionMode::Weak, &overrides);
323 assert!(resolved.ignore_identifiers);
324 assert!(!resolved.ignore_string_values); assert!(resolved.ignore_numeric_values);
326 }
327
328 #[test]
331 fn duplicates_config_json_all_fields() {
332 let json = r#"{
333 "enabled": false,
334 "mode": "semantic",
335 "minTokens": 100,
336 "minLines": 10,
337 "threshold": 5.0,
338 "ignore": ["**/vendor/**"],
339 "skipLocal": true,
340 "crossLanguage": true
341 }"#;
342 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
343 assert!(!config.enabled);
344 assert_eq!(config.mode, DetectionMode::Semantic);
345 assert_eq!(config.min_tokens, 100);
346 assert_eq!(config.min_lines, 10);
347 assert!((config.threshold - 5.0).abs() < f64::EPSILON);
348 assert_eq!(config.ignore, vec!["**/vendor/**"]);
349 assert!(config.skip_local);
350 assert!(config.cross_language);
351 }
352
353 #[test]
354 fn duplicates_config_json_partial_uses_defaults() {
355 let json = r#"{"mode": "weak"}"#;
356 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
357 assert!(config.enabled); assert_eq!(config.mode, DetectionMode::Weak);
359 assert_eq!(config.min_tokens, 50); assert_eq!(config.min_lines, 5); }
362
363 #[test]
364 fn normalization_config_json_overrides() {
365 let json = r#"{
366 "ignoreIdentifiers": true,
367 "ignoreStringValues": false
368 }"#;
369 let config: NormalizationConfig = serde_json::from_str(json).unwrap();
370 assert_eq!(config.ignore_identifiers, Some(true));
371 assert_eq!(config.ignore_string_values, Some(false));
372 assert_eq!(config.ignore_numeric_values, None);
373 }
374
375 #[test]
378 fn duplicates_config_toml_all_fields() {
379 let toml_str = r#"
380enabled = false
381mode = "weak"
382minTokens = 75
383minLines = 8
384threshold = 3.0
385ignore = ["vendor/**"]
386skipLocal = true
387crossLanguage = true
388
389[normalization]
390ignoreIdentifiers = true
391ignoreStringValues = true
392ignoreNumericValues = false
393"#;
394 let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
395 assert!(!config.enabled);
396 assert_eq!(config.mode, DetectionMode::Weak);
397 assert_eq!(config.min_tokens, 75);
398 assert_eq!(config.min_lines, 8);
399 assert!((config.threshold - 3.0).abs() < f64::EPSILON);
400 assert_eq!(config.ignore, vec!["vendor/**"]);
401 assert!(config.skip_local);
402 assert!(config.cross_language);
403 assert_eq!(config.normalization.ignore_identifiers, Some(true));
404 assert_eq!(config.normalization.ignore_string_values, Some(true));
405 assert_eq!(config.normalization.ignore_numeric_values, Some(false));
406 }
407
408 #[test]
409 fn duplicates_config_toml_defaults() {
410 let toml_str = "";
411 let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
412 assert!(config.enabled);
413 assert_eq!(config.mode, DetectionMode::Mild);
414 assert_eq!(config.min_tokens, 50);
415 assert_eq!(config.min_lines, 5);
416 }
417
418 #[test]
421 fn normalization_config_default_all_none() {
422 let config = NormalizationConfig::default();
423 assert!(config.ignore_identifiers.is_none());
424 assert!(config.ignore_string_values.is_none());
425 assert!(config.ignore_numeric_values.is_none());
426 }
427
428 #[test]
429 fn normalization_config_empty_json_object() {
430 let config: NormalizationConfig = serde_json::from_str("{}").unwrap();
431 assert!(config.ignore_identifiers.is_none());
432 assert!(config.ignore_string_values.is_none());
433 assert!(config.ignore_numeric_values.is_none());
434 }
435
436 #[test]
439 fn detection_mode_default_is_mild() {
440 assert_eq!(DetectionMode::default(), DetectionMode::Mild);
441 }
442
443 #[test]
446 fn resolved_normalization_equality() {
447 let a = ResolvedNormalization {
448 ignore_identifiers: true,
449 ignore_string_values: false,
450 ignore_numeric_values: true,
451 };
452 let b = ResolvedNormalization {
453 ignore_identifiers: true,
454 ignore_string_values: false,
455 ignore_numeric_values: true,
456 };
457 assert_eq!(a, b);
458
459 let c = ResolvedNormalization {
460 ignore_identifiers: false,
461 ignore_string_values: false,
462 ignore_numeric_values: true,
463 };
464 assert_ne!(a, c);
465 }
466
467 #[test]
470 fn detection_mode_json_deserialization() {
471 let strict: DetectionMode = serde_json::from_str(r#""strict""#).unwrap();
472 assert_eq!(strict, DetectionMode::Strict);
473
474 let mild: DetectionMode = serde_json::from_str(r#""mild""#).unwrap();
475 assert_eq!(mild, DetectionMode::Mild);
476
477 let weak: DetectionMode = serde_json::from_str(r#""weak""#).unwrap();
478 assert_eq!(weak, DetectionMode::Weak);
479
480 let semantic: DetectionMode = serde_json::from_str(r#""semantic""#).unwrap();
481 assert_eq!(semantic, DetectionMode::Semantic);
482 }
483
484 #[test]
485 fn detection_mode_invalid_json() {
486 let result: Result<DetectionMode, _> = serde_json::from_str(r#""aggressive""#);
487 assert!(result.is_err());
488 }
489
490 #[test]
493 fn duplicates_config_json_roundtrip() {
494 let config = DuplicatesConfig {
495 enabled: false,
496 mode: DetectionMode::Semantic,
497 min_tokens: 100,
498 min_lines: 10,
499 threshold: 5.5,
500 ignore: vec!["test/**".to_string()],
501 skip_local: true,
502 cross_language: true,
503 normalization: NormalizationConfig {
504 ignore_identifiers: Some(true),
505 ignore_string_values: None,
506 ignore_numeric_values: Some(false),
507 },
508 };
509 let json = serde_json::to_string(&config).unwrap();
510 let restored: DuplicatesConfig = serde_json::from_str(&json).unwrap();
511 assert!(!restored.enabled);
512 assert_eq!(restored.mode, DetectionMode::Semantic);
513 assert_eq!(restored.min_tokens, 100);
514 assert_eq!(restored.min_lines, 10);
515 assert!((restored.threshold - 5.5).abs() < f64::EPSILON);
516 assert!(restored.skip_local);
517 assert!(restored.cross_language);
518 assert_eq!(restored.normalization.ignore_identifiers, Some(true));
519 assert!(restored.normalization.ignore_string_values.is_none());
520 assert_eq!(restored.normalization.ignore_numeric_values, Some(false));
521 }
522
523 #[test]
526 fn normalization_none_fields_not_serialized() {
527 let config = NormalizationConfig::default();
528 let json = serde_json::to_string(&config).unwrap();
529 assert!(
530 !json.contains("ignoreIdentifiers"),
531 "None fields should be skipped"
532 );
533 assert!(
534 !json.contains("ignoreStringValues"),
535 "None fields should be skipped"
536 );
537 assert!(
538 !json.contains("ignoreNumericValues"),
539 "None fields should be skipped"
540 );
541 }
542
543 #[test]
544 fn normalization_some_fields_serialized() {
545 let config = NormalizationConfig {
546 ignore_identifiers: Some(true),
547 ignore_string_values: None,
548 ignore_numeric_values: Some(false),
549 };
550 let json = serde_json::to_string(&config).unwrap();
551 assert!(json.contains("ignoreIdentifiers"));
552 assert!(!json.contains("ignoreStringValues"));
553 assert!(json.contains("ignoreNumericValues"));
554 }
555}