1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3
4const fn default_true() -> bool {
5 true
6}
7
8const fn default_min_tokens() -> usize {
9 50
10}
11
12const fn default_min_lines() -> usize {
13 5
14}
15
16#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
18#[serde(rename_all = "camelCase")]
19pub struct DuplicatesConfig {
20 #[serde(default = "default_true")]
22 pub enabled: bool,
23
24 #[serde(default)]
26 pub mode: DetectionMode,
27
28 #[serde(default = "default_min_tokens")]
30 pub min_tokens: usize,
31
32 #[serde(default = "default_min_lines")]
34 pub min_lines: usize,
35
36 #[serde(default)]
38 pub threshold: f64,
39
40 #[serde(default)]
42 pub ignore: Vec<String>,
43
44 #[serde(default)]
46 pub skip_local: bool,
47
48 #[serde(default)]
54 pub cross_language: bool,
55
56 #[serde(default)]
64 pub ignore_imports: bool,
65
66 #[serde(default)]
68 pub normalization: NormalizationConfig,
69}
70
71impl Default for DuplicatesConfig {
72 fn default() -> Self {
73 Self {
74 enabled: true,
75 mode: DetectionMode::default(),
76 min_tokens: default_min_tokens(),
77 min_lines: default_min_lines(),
78 threshold: 0.0,
79 ignore: vec![],
80 skip_local: false,
81 cross_language: false,
82 ignore_imports: false,
83 normalization: NormalizationConfig::default(),
84 }
85 }
86}
87
88#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
94#[serde(rename_all = "camelCase")]
95pub struct NormalizationConfig {
96 #[serde(default, skip_serializing_if = "Option::is_none")]
99 pub ignore_identifiers: Option<bool>,
100
101 #[serde(default, skip_serializing_if = "Option::is_none")]
104 pub ignore_string_values: Option<bool>,
105
106 #[serde(default, skip_serializing_if = "Option::is_none")]
109 pub ignore_numeric_values: Option<bool>,
110}
111
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114pub struct ResolvedNormalization {
115 pub ignore_identifiers: bool,
116 pub ignore_string_values: bool,
117 pub ignore_numeric_values: bool,
118}
119
120impl ResolvedNormalization {
121 #[must_use]
123 pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
124 let (default_ids, default_strings, default_numbers) = match mode {
125 DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
126 DetectionMode::Weak => (false, true, false),
127 DetectionMode::Semantic => (true, true, true),
128 };
129
130 Self {
131 ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
132 ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
133 ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
134 }
135 }
136}
137
138#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
146#[serde(rename_all = "lowercase")]
147pub enum DetectionMode {
148 Strict,
150 #[default]
152 Mild,
153 Weak,
155 Semantic,
157}
158
159impl std::fmt::Display for DetectionMode {
160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161 match self {
162 Self::Strict => write!(f, "strict"),
163 Self::Mild => write!(f, "mild"),
164 Self::Weak => write!(f, "weak"),
165 Self::Semantic => write!(f, "semantic"),
166 }
167 }
168}
169
170impl std::str::FromStr for DetectionMode {
171 type Err = String;
172
173 fn from_str(s: &str) -> Result<Self, Self::Err> {
174 match s.to_lowercase().as_str() {
175 "strict" => Ok(Self::Strict),
176 "mild" => Ok(Self::Mild),
177 "weak" => Ok(Self::Weak),
178 "semantic" => Ok(Self::Semantic),
179 other => Err(format!("unknown detection mode: '{other}'")),
180 }
181 }
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187
188 #[test]
191 fn duplicates_config_defaults() {
192 let config = DuplicatesConfig::default();
193 assert!(config.enabled);
194 assert_eq!(config.mode, DetectionMode::Mild);
195 assert_eq!(config.min_tokens, 50);
196 assert_eq!(config.min_lines, 5);
197 assert!((config.threshold - 0.0).abs() < f64::EPSILON);
198 assert!(config.ignore.is_empty());
199 assert!(!config.skip_local);
200 assert!(!config.cross_language);
201 assert!(!config.ignore_imports);
202 }
203
204 #[test]
207 fn detection_mode_from_str_all_variants() {
208 assert_eq!(
209 "strict".parse::<DetectionMode>().unwrap(),
210 DetectionMode::Strict
211 );
212 assert_eq!(
213 "mild".parse::<DetectionMode>().unwrap(),
214 DetectionMode::Mild
215 );
216 assert_eq!(
217 "weak".parse::<DetectionMode>().unwrap(),
218 DetectionMode::Weak
219 );
220 assert_eq!(
221 "semantic".parse::<DetectionMode>().unwrap(),
222 DetectionMode::Semantic
223 );
224 }
225
226 #[test]
227 fn detection_mode_from_str_case_insensitive() {
228 assert_eq!(
229 "STRICT".parse::<DetectionMode>().unwrap(),
230 DetectionMode::Strict
231 );
232 assert_eq!(
233 "Weak".parse::<DetectionMode>().unwrap(),
234 DetectionMode::Weak
235 );
236 assert_eq!(
237 "SEMANTIC".parse::<DetectionMode>().unwrap(),
238 DetectionMode::Semantic
239 );
240 }
241
242 #[test]
243 fn detection_mode_from_str_unknown() {
244 let err = "foobar".parse::<DetectionMode>().unwrap_err();
245 assert!(err.contains("unknown detection mode"));
246 assert!(err.contains("foobar"));
247 }
248
249 #[test]
252 fn detection_mode_display() {
253 assert_eq!(DetectionMode::Strict.to_string(), "strict");
254 assert_eq!(DetectionMode::Mild.to_string(), "mild");
255 assert_eq!(DetectionMode::Weak.to_string(), "weak");
256 assert_eq!(DetectionMode::Semantic.to_string(), "semantic");
257 }
258
259 #[test]
262 fn resolve_strict_mode_all_false() {
263 let resolved =
264 ResolvedNormalization::resolve(DetectionMode::Strict, &NormalizationConfig::default());
265 assert!(!resolved.ignore_identifiers);
266 assert!(!resolved.ignore_string_values);
267 assert!(!resolved.ignore_numeric_values);
268 }
269
270 #[test]
271 fn resolve_mild_mode_all_false() {
272 let resolved =
273 ResolvedNormalization::resolve(DetectionMode::Mild, &NormalizationConfig::default());
274 assert!(!resolved.ignore_identifiers);
275 assert!(!resolved.ignore_string_values);
276 assert!(!resolved.ignore_numeric_values);
277 }
278
279 #[test]
280 fn resolve_weak_mode_only_strings_true() {
281 let resolved =
282 ResolvedNormalization::resolve(DetectionMode::Weak, &NormalizationConfig::default());
283 assert!(!resolved.ignore_identifiers);
284 assert!(resolved.ignore_string_values);
285 assert!(!resolved.ignore_numeric_values);
286 }
287
288 #[test]
289 fn resolve_semantic_mode_all_true() {
290 let resolved = ResolvedNormalization::resolve(
291 DetectionMode::Semantic,
292 &NormalizationConfig::default(),
293 );
294 assert!(resolved.ignore_identifiers);
295 assert!(resolved.ignore_string_values);
296 assert!(resolved.ignore_numeric_values);
297 }
298
299 #[test]
300 fn resolve_override_forces_true() {
301 let overrides = NormalizationConfig {
303 ignore_identifiers: Some(true),
304 ignore_string_values: None,
305 ignore_numeric_values: None,
306 };
307 let resolved = ResolvedNormalization::resolve(DetectionMode::Strict, &overrides);
308 assert!(resolved.ignore_identifiers);
309 assert!(!resolved.ignore_string_values);
310 assert!(!resolved.ignore_numeric_values);
311 }
312
313 #[test]
314 fn resolve_override_forces_false() {
315 let overrides = NormalizationConfig {
317 ignore_identifiers: Some(false),
318 ignore_string_values: Some(false),
319 ignore_numeric_values: None,
320 };
321 let resolved = ResolvedNormalization::resolve(DetectionMode::Semantic, &overrides);
322 assert!(!resolved.ignore_identifiers);
323 assert!(!resolved.ignore_string_values);
324 assert!(resolved.ignore_numeric_values); }
326
327 #[test]
328 fn resolve_all_overrides_on_weak() {
329 let overrides = NormalizationConfig {
330 ignore_identifiers: Some(true),
331 ignore_string_values: Some(false), ignore_numeric_values: Some(true),
333 };
334 let resolved = ResolvedNormalization::resolve(DetectionMode::Weak, &overrides);
335 assert!(resolved.ignore_identifiers);
336 assert!(!resolved.ignore_string_values); assert!(resolved.ignore_numeric_values);
338 }
339
340 #[test]
343 fn duplicates_config_json_all_fields() {
344 let json = r#"{
345 "enabled": false,
346 "mode": "semantic",
347 "minTokens": 100,
348 "minLines": 10,
349 "threshold": 5.0,
350 "ignore": ["**/vendor/**"],
351 "skipLocal": true,
352 "crossLanguage": true,
353 "ignoreImports": true
354 }"#;
355 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
356 assert!(!config.enabled);
357 assert_eq!(config.mode, DetectionMode::Semantic);
358 assert_eq!(config.min_tokens, 100);
359 assert_eq!(config.min_lines, 10);
360 assert!((config.threshold - 5.0).abs() < f64::EPSILON);
361 assert_eq!(config.ignore, vec!["**/vendor/**"]);
362 assert!(config.skip_local);
363 assert!(config.cross_language);
364 assert!(config.ignore_imports);
365 }
366
367 #[test]
368 fn duplicates_config_json_partial_uses_defaults() {
369 let json = r#"{"mode": "weak"}"#;
370 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
371 assert!(config.enabled); assert_eq!(config.mode, DetectionMode::Weak);
373 assert_eq!(config.min_tokens, 50); assert_eq!(config.min_lines, 5); }
376
377 #[test]
378 fn normalization_config_json_overrides() {
379 let json = r#"{
380 "ignoreIdentifiers": true,
381 "ignoreStringValues": false
382 }"#;
383 let config: NormalizationConfig = serde_json::from_str(json).unwrap();
384 assert_eq!(config.ignore_identifiers, Some(true));
385 assert_eq!(config.ignore_string_values, Some(false));
386 assert_eq!(config.ignore_numeric_values, None);
387 }
388
389 #[test]
392 fn duplicates_config_toml_all_fields() {
393 let toml_str = r#"
394enabled = false
395mode = "weak"
396minTokens = 75
397minLines = 8
398threshold = 3.0
399ignore = ["vendor/**"]
400skipLocal = true
401crossLanguage = true
402ignoreImports = true
403
404[normalization]
405ignoreIdentifiers = true
406ignoreStringValues = true
407ignoreNumericValues = false
408"#;
409 let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
410 assert!(!config.enabled);
411 assert_eq!(config.mode, DetectionMode::Weak);
412 assert_eq!(config.min_tokens, 75);
413 assert_eq!(config.min_lines, 8);
414 assert!((config.threshold - 3.0).abs() < f64::EPSILON);
415 assert_eq!(config.ignore, vec!["vendor/**"]);
416 assert!(config.skip_local);
417 assert!(config.cross_language);
418 assert!(config.ignore_imports);
419 assert_eq!(config.normalization.ignore_identifiers, Some(true));
420 assert_eq!(config.normalization.ignore_string_values, Some(true));
421 assert_eq!(config.normalization.ignore_numeric_values, Some(false));
422 }
423
424 #[test]
425 fn duplicates_config_toml_defaults() {
426 let toml_str = "";
427 let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
428 assert!(config.enabled);
429 assert_eq!(config.mode, DetectionMode::Mild);
430 assert_eq!(config.min_tokens, 50);
431 assert_eq!(config.min_lines, 5);
432 }
433
434 #[test]
437 fn normalization_config_default_all_none() {
438 let config = NormalizationConfig::default();
439 assert!(config.ignore_identifiers.is_none());
440 assert!(config.ignore_string_values.is_none());
441 assert!(config.ignore_numeric_values.is_none());
442 }
443
444 #[test]
445 fn normalization_config_empty_json_object() {
446 let config: NormalizationConfig = serde_json::from_str("{}").unwrap();
447 assert!(config.ignore_identifiers.is_none());
448 assert!(config.ignore_string_values.is_none());
449 assert!(config.ignore_numeric_values.is_none());
450 }
451
452 #[test]
455 fn detection_mode_default_is_mild() {
456 assert_eq!(DetectionMode::default(), DetectionMode::Mild);
457 }
458
459 #[test]
462 fn resolved_normalization_equality() {
463 let a = ResolvedNormalization {
464 ignore_identifiers: true,
465 ignore_string_values: false,
466 ignore_numeric_values: true,
467 };
468 let b = ResolvedNormalization {
469 ignore_identifiers: true,
470 ignore_string_values: false,
471 ignore_numeric_values: true,
472 };
473 assert_eq!(a, b);
474
475 let c = ResolvedNormalization {
476 ignore_identifiers: false,
477 ignore_string_values: false,
478 ignore_numeric_values: true,
479 };
480 assert_ne!(a, c);
481 }
482
483 #[test]
486 fn detection_mode_json_deserialization() {
487 let strict: DetectionMode = serde_json::from_str(r#""strict""#).unwrap();
488 assert_eq!(strict, DetectionMode::Strict);
489
490 let mild: DetectionMode = serde_json::from_str(r#""mild""#).unwrap();
491 assert_eq!(mild, DetectionMode::Mild);
492
493 let weak: DetectionMode = serde_json::from_str(r#""weak""#).unwrap();
494 assert_eq!(weak, DetectionMode::Weak);
495
496 let semantic: DetectionMode = serde_json::from_str(r#""semantic""#).unwrap();
497 assert_eq!(semantic, DetectionMode::Semantic);
498 }
499
500 #[test]
501 fn detection_mode_invalid_json() {
502 let result: Result<DetectionMode, _> = serde_json::from_str(r#""aggressive""#);
503 assert!(result.is_err());
504 }
505
506 #[test]
509 fn duplicates_config_json_roundtrip() {
510 let config = DuplicatesConfig {
511 enabled: false,
512 mode: DetectionMode::Semantic,
513 min_tokens: 100,
514 min_lines: 10,
515 threshold: 5.5,
516 ignore: vec!["test/**".to_string()],
517 skip_local: true,
518 cross_language: true,
519 ignore_imports: true,
520 normalization: NormalizationConfig {
521 ignore_identifiers: Some(true),
522 ignore_string_values: None,
523 ignore_numeric_values: Some(false),
524 },
525 };
526 let json = serde_json::to_string(&config).unwrap();
527 let restored: DuplicatesConfig = serde_json::from_str(&json).unwrap();
528 assert!(!restored.enabled);
529 assert_eq!(restored.mode, DetectionMode::Semantic);
530 assert_eq!(restored.min_tokens, 100);
531 assert_eq!(restored.min_lines, 10);
532 assert!((restored.threshold - 5.5).abs() < f64::EPSILON);
533 assert!(restored.skip_local);
534 assert!(restored.cross_language);
535 assert!(restored.ignore_imports);
536 assert_eq!(restored.normalization.ignore_identifiers, Some(true));
537 assert!(restored.normalization.ignore_string_values.is_none());
538 assert_eq!(restored.normalization.ignore_numeric_values, Some(false));
539 }
540
541 #[test]
544 fn normalization_none_fields_not_serialized() {
545 let config = NormalizationConfig::default();
546 let json = serde_json::to_string(&config).unwrap();
547 assert!(
548 !json.contains("ignoreIdentifiers"),
549 "None fields should be skipped"
550 );
551 assert!(
552 !json.contains("ignoreStringValues"),
553 "None fields should be skipped"
554 );
555 assert!(
556 !json.contains("ignoreNumericValues"),
557 "None fields should be skipped"
558 );
559 }
560
561 #[test]
562 fn normalization_some_fields_serialized() {
563 let config = NormalizationConfig {
564 ignore_identifiers: Some(true),
565 ignore_string_values: None,
566 ignore_numeric_values: Some(false),
567 };
568 let json = serde_json::to_string(&config).unwrap();
569 assert!(json.contains("ignoreIdentifiers"));
570 assert!(!json.contains("ignoreStringValues"));
571 assert!(json.contains("ignoreNumericValues"));
572 }
573}