Skip to main content

fallow_config/config/
duplicates_config.rs

1use schemars::JsonSchema;
2use serde::{Deserialize, Deserializer, Serialize};
3
4const fn default_true() -> bool {
5    true
6}
7
8const fn default_min_tokens() -> usize {
9    50
10}
11
12const fn default_min_lines() -> usize {
13    5
14}
15
16const fn default_min_occurrences() -> usize {
17    2
18}
19
20/// Reject `< 2` at deserialize time. A single occurrence isn't a duplicate;
21/// silently clamping would poison reproducibility across config / env / CLI
22/// override sources.
23fn deserialize_min_occurrences<'de, D>(deserializer: D) -> Result<usize, D::Error>
24where
25    D: Deserializer<'de>,
26{
27    let value = usize::deserialize(deserializer)?;
28    if value < 2 {
29        return Err(serde::de::Error::custom(format!(
30            "minOccurrences must be at least 2 (got {value}); a single occurrence isn't a duplicate"
31        )));
32    }
33    Ok(value)
34}
35
36const fn default_min_corpus_size_for_shingle_filter() -> usize {
37    1024
38}
39
40const fn default_min_corpus_size_for_token_cache() -> usize {
41    5_000
42}
43
44/// Configuration for code duplication detection.
45#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
46#[serde(rename_all = "camelCase")]
47pub struct DuplicatesConfig {
48    /// Whether duplication detection is enabled.
49    #[serde(default = "default_true")]
50    pub enabled: bool,
51
52    /// Detection mode: strict, mild, weak, or semantic.
53    #[serde(default)]
54    pub mode: DetectionMode,
55
56    /// Minimum number of tokens for a clone.
57    #[serde(default = "default_min_tokens")]
58    pub min_tokens: usize,
59
60    /// Minimum number of lines for a clone.
61    #[serde(default = "default_min_lines")]
62    pub min_lines: usize,
63
64    /// Minimum number of occurrences (instances of the same clone) before a
65    /// group is reported. Defaults to 2 (every duplicated pair is reported).
66    /// Raise this to focus on widespread copy-paste worth refactoring and skip
67    /// context-sensitive pairs.
68    #[serde(
69        default = "default_min_occurrences",
70        deserialize_with = "deserialize_min_occurrences"
71    )]
72    #[schemars(range(min = 2))]
73    pub min_occurrences: usize,
74
75    /// Maximum allowed duplication percentage (0 = no limit).
76    #[serde(default)]
77    pub threshold: f64,
78
79    /// Additional ignore patterns for duplication analysis.
80    #[serde(default)]
81    pub ignore: Vec<String>,
82
83    /// Merge built-in generated-framework ignore patterns with `ignore`.
84    ///
85    /// Set to `false` to use only the user-provided `ignore` list.
86    #[serde(default = "default_true")]
87    pub ignore_defaults: bool,
88
89    /// Only report cross-directory duplicates.
90    #[serde(default)]
91    pub skip_local: bool,
92
93    /// Enable cross-language clone detection by stripping type annotations.
94    ///
95    /// When enabled, TypeScript type annotations (parameter types, return types,
96    /// generics, interfaces, type aliases) are stripped from the token stream,
97    /// allowing detection of clones between `.ts` and `.js` files.
98    #[serde(default)]
99    pub cross_language: bool,
100
101    /// Exclude ES `import` declarations from clone detection.
102    ///
103    /// When enabled, all `import` statements (value imports, type imports, and
104    /// side-effect imports) are stripped from the token stream before clone
105    /// detection. This reduces noise from sorted import blocks that naturally
106    /// look similar across files. Only affects ES `import` declarations;
107    /// CommonJS `require()` calls are not filtered.
108    #[serde(default)]
109    pub ignore_imports: bool,
110
111    /// Fine-grained normalization overrides on top of the detection mode.
112    #[serde(default)]
113    pub normalization: NormalizationConfig,
114
115    /// Minimum tokenized file count before focused duplicate analysis prefilters
116    /// unchanged files with k-token shingles.
117    #[serde(default = "default_min_corpus_size_for_shingle_filter")]
118    pub min_corpus_size_for_shingle_filter: usize,
119
120    /// Minimum source file count before the persistent duplication token cache
121    /// activates. Below this threshold the cache load/save overhead exceeds the
122    /// tokenize savings, so the cache stays disabled even when not running with
123    /// `--no-cache`.
124    #[serde(default = "default_min_corpus_size_for_token_cache")]
125    pub min_corpus_size_for_token_cache: usize,
126}
127
128impl Default for DuplicatesConfig {
129    fn default() -> Self {
130        Self {
131            enabled: true,
132            mode: DetectionMode::default(),
133            min_tokens: default_min_tokens(),
134            min_lines: default_min_lines(),
135            min_occurrences: default_min_occurrences(),
136            threshold: 0.0,
137            ignore: vec![],
138            ignore_defaults: true,
139            skip_local: false,
140            cross_language: false,
141            ignore_imports: false,
142            normalization: NormalizationConfig::default(),
143            min_corpus_size_for_shingle_filter: default_min_corpus_size_for_shingle_filter(),
144            min_corpus_size_for_token_cache: default_min_corpus_size_for_token_cache(),
145        }
146    }
147}
148
149/// Fine-grained normalization overrides.
150///
151/// Each option, when set to `Some(true)`, forces that normalization regardless of
152/// the detection mode. When set to `Some(false)`, it forces preservation. When
153/// `None`, the detection mode's default behavior applies.
154#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
155#[serde(rename_all = "camelCase")]
156pub struct NormalizationConfig {
157    /// Blind all identifiers (variable names, function names, etc.) to the same hash.
158    /// Default in `semantic` mode.
159    #[serde(default, skip_serializing_if = "Option::is_none")]
160    pub ignore_identifiers: Option<bool>,
161
162    /// Blind string literal values to the same hash.
163    /// Default in `weak` and `semantic` modes.
164    #[serde(default, skip_serializing_if = "Option::is_none")]
165    pub ignore_string_values: Option<bool>,
166
167    /// Blind numeric literal values to the same hash.
168    /// Default in `semantic` mode.
169    #[serde(default, skip_serializing_if = "Option::is_none")]
170    pub ignore_numeric_values: Option<bool>,
171}
172
173/// Resolved normalization flags: mode defaults merged with user overrides.
174#[derive(Debug, Clone, Copy, PartialEq, Eq)]
175pub struct ResolvedNormalization {
176    pub ignore_identifiers: bool,
177    pub ignore_string_values: bool,
178    pub ignore_numeric_values: bool,
179}
180
181impl ResolvedNormalization {
182    /// Resolve normalization from a detection mode and optional overrides.
183    #[must_use]
184    pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
185        let (default_ids, default_strings, default_numbers) = match mode {
186            DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
187            DetectionMode::Weak => (false, true, false),
188            DetectionMode::Semantic => (true, true, true),
189        };
190
191        Self {
192            ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
193            ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
194            ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
195        }
196    }
197}
198
199/// Detection mode controlling how aggressively tokens are normalized.
200///
201/// Since fallow uses AST-based tokenization (not lexer-based), whitespace and
202/// comments are inherently absent from the token stream. The `Strict` and `Mild`
203/// modes are currently equivalent. `Weak` mode additionally blinds string
204/// literals. `Semantic` mode blinds all identifiers and literal values for
205/// Type-2 (renamed variable) clone detection.
206#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
207#[serde(rename_all = "lowercase")]
208pub enum DetectionMode {
209    /// All tokens preserved including identifier names and literal values (Type-1 only).
210    Strict,
211    /// Default mode -- equivalent to strict for AST-based tokenization.
212    #[default]
213    Mild,
214    /// Blind string literal values (structure-preserving).
215    Weak,
216    /// Blind all identifiers and literal values for structural (Type-2) detection.
217    Semantic,
218}
219
220impl std::fmt::Display for DetectionMode {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        match self {
223            Self::Strict => write!(f, "strict"),
224            Self::Mild => write!(f, "mild"),
225            Self::Weak => write!(f, "weak"),
226            Self::Semantic => write!(f, "semantic"),
227        }
228    }
229}
230
231impl std::str::FromStr for DetectionMode {
232    type Err = String;
233
234    fn from_str(s: &str) -> Result<Self, Self::Err> {
235        match s.to_lowercase().as_str() {
236            "strict" => Ok(Self::Strict),
237            "mild" => Ok(Self::Mild),
238            "weak" => Ok(Self::Weak),
239            "semantic" => Ok(Self::Semantic),
240            other => Err(format!("unknown detection mode: '{other}'")),
241        }
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    // ── DuplicatesConfig defaults ────────────────────────────────────
250
251    #[test]
252    fn duplicates_config_defaults() {
253        let config = DuplicatesConfig::default();
254        assert!(config.enabled);
255        assert_eq!(config.mode, DetectionMode::Mild);
256        assert_eq!(config.min_tokens, 50);
257        assert_eq!(config.min_lines, 5);
258        assert_eq!(config.min_occurrences, 2);
259        assert!((config.threshold - 0.0).abs() < f64::EPSILON);
260        assert!(config.ignore.is_empty());
261        assert!(config.ignore_defaults);
262        assert!(!config.skip_local);
263        assert!(!config.cross_language);
264        assert!(!config.ignore_imports);
265        assert_eq!(config.min_corpus_size_for_shingle_filter, 1024);
266        assert_eq!(config.min_corpus_size_for_token_cache, 5_000);
267    }
268
269    // ── DetectionMode FromStr ────────────────────────────────────────
270
271    #[test]
272    fn detection_mode_from_str_all_variants() {
273        assert_eq!(
274            "strict".parse::<DetectionMode>().unwrap(),
275            DetectionMode::Strict
276        );
277        assert_eq!(
278            "mild".parse::<DetectionMode>().unwrap(),
279            DetectionMode::Mild
280        );
281        assert_eq!(
282            "weak".parse::<DetectionMode>().unwrap(),
283            DetectionMode::Weak
284        );
285        assert_eq!(
286            "semantic".parse::<DetectionMode>().unwrap(),
287            DetectionMode::Semantic
288        );
289    }
290
291    #[test]
292    fn detection_mode_from_str_case_insensitive() {
293        assert_eq!(
294            "STRICT".parse::<DetectionMode>().unwrap(),
295            DetectionMode::Strict
296        );
297        assert_eq!(
298            "Weak".parse::<DetectionMode>().unwrap(),
299            DetectionMode::Weak
300        );
301        assert_eq!(
302            "SEMANTIC".parse::<DetectionMode>().unwrap(),
303            DetectionMode::Semantic
304        );
305    }
306
307    #[test]
308    fn detection_mode_from_str_unknown() {
309        let err = "foobar".parse::<DetectionMode>().unwrap_err();
310        assert!(err.contains("unknown detection mode"));
311        assert!(err.contains("foobar"));
312    }
313
314    // ── DetectionMode Display ────────────────────────────────────────
315
316    #[test]
317    fn detection_mode_display() {
318        assert_eq!(DetectionMode::Strict.to_string(), "strict");
319        assert_eq!(DetectionMode::Mild.to_string(), "mild");
320        assert_eq!(DetectionMode::Weak.to_string(), "weak");
321        assert_eq!(DetectionMode::Semantic.to_string(), "semantic");
322    }
323
324    // ── ResolvedNormalization::resolve ────────────────────────────────
325
326    #[test]
327    fn resolve_strict_mode_all_false() {
328        let resolved =
329            ResolvedNormalization::resolve(DetectionMode::Strict, &NormalizationConfig::default());
330        assert!(!resolved.ignore_identifiers);
331        assert!(!resolved.ignore_string_values);
332        assert!(!resolved.ignore_numeric_values);
333    }
334
335    #[test]
336    fn resolve_mild_mode_all_false() {
337        let resolved =
338            ResolvedNormalization::resolve(DetectionMode::Mild, &NormalizationConfig::default());
339        assert!(!resolved.ignore_identifiers);
340        assert!(!resolved.ignore_string_values);
341        assert!(!resolved.ignore_numeric_values);
342    }
343
344    #[test]
345    fn resolve_weak_mode_only_strings_true() {
346        let resolved =
347            ResolvedNormalization::resolve(DetectionMode::Weak, &NormalizationConfig::default());
348        assert!(!resolved.ignore_identifiers);
349        assert!(resolved.ignore_string_values);
350        assert!(!resolved.ignore_numeric_values);
351    }
352
353    #[test]
354    fn resolve_semantic_mode_all_true() {
355        let resolved = ResolvedNormalization::resolve(
356            DetectionMode::Semantic,
357            &NormalizationConfig::default(),
358        );
359        assert!(resolved.ignore_identifiers);
360        assert!(resolved.ignore_string_values);
361        assert!(resolved.ignore_numeric_values);
362    }
363
364    #[test]
365    fn resolve_override_forces_true() {
366        // Strict mode defaults to all false, but override forces ignore_identifiers to true
367        let overrides = NormalizationConfig {
368            ignore_identifiers: Some(true),
369            ignore_string_values: None,
370            ignore_numeric_values: None,
371        };
372        let resolved = ResolvedNormalization::resolve(DetectionMode::Strict, &overrides);
373        assert!(resolved.ignore_identifiers);
374        assert!(!resolved.ignore_string_values);
375        assert!(!resolved.ignore_numeric_values);
376    }
377
378    #[test]
379    fn resolve_override_forces_false() {
380        // Semantic mode defaults to all true, but override forces ignore_identifiers to false
381        let overrides = NormalizationConfig {
382            ignore_identifiers: Some(false),
383            ignore_string_values: Some(false),
384            ignore_numeric_values: None,
385        };
386        let resolved = ResolvedNormalization::resolve(DetectionMode::Semantic, &overrides);
387        assert!(!resolved.ignore_identifiers);
388        assert!(!resolved.ignore_string_values);
389        assert!(resolved.ignore_numeric_values); // not overridden
390    }
391
392    #[test]
393    fn resolve_all_overrides_on_weak() {
394        let overrides = NormalizationConfig {
395            ignore_identifiers: Some(true),
396            ignore_string_values: Some(false), // override weak default (true -> false)
397            ignore_numeric_values: Some(true),
398        };
399        let resolved = ResolvedNormalization::resolve(DetectionMode::Weak, &overrides);
400        assert!(resolved.ignore_identifiers);
401        assert!(!resolved.ignore_string_values); // overridden from true to false
402        assert!(resolved.ignore_numeric_values);
403    }
404
405    // ── DuplicatesConfig deserialization ──────────────────────────────
406
407    #[test]
408    fn duplicates_config_json_all_fields() {
409        let json = r#"{
410            "enabled": false,
411            "mode": "semantic",
412            "minTokens": 100,
413            "minLines": 10,
414            "minOccurrences": 3,
415            "threshold": 5.0,
416            "ignore": ["**/vendor/**"],
417            "ignoreDefaults": false,
418            "skipLocal": true,
419            "crossLanguage": true,
420            "ignoreImports": true
421        }"#;
422        let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
423        assert!(!config.enabled);
424        assert_eq!(config.mode, DetectionMode::Semantic);
425        assert_eq!(config.min_tokens, 100);
426        assert_eq!(config.min_lines, 10);
427        assert_eq!(config.min_occurrences, 3);
428        assert!((config.threshold - 5.0).abs() < f64::EPSILON);
429        assert_eq!(config.ignore, vec!["**/vendor/**"]);
430        assert!(!config.ignore_defaults);
431        assert!(config.skip_local);
432        assert!(config.cross_language);
433        assert!(config.ignore_imports);
434    }
435
436    #[test]
437    fn duplicates_config_json_partial_uses_defaults() {
438        let json = r#"{"mode": "weak"}"#;
439        let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
440        assert!(config.enabled); // default
441        assert_eq!(config.mode, DetectionMode::Weak);
442        assert_eq!(config.min_tokens, 50); // default
443        assert_eq!(config.min_lines, 5); // default
444        assert!(config.ignore_defaults);
445    }
446
447    #[test]
448    fn duplicates_config_json_ignore_defaults_merges_by_default() {
449        let json = r#"{"ignore": ["**/foo/**"]}"#;
450        let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
451        assert_eq!(config.ignore, vec!["**/foo/**"]);
452        assert!(config.ignore_defaults);
453    }
454
455    #[test]
456    fn normalization_config_json_overrides() {
457        let json = r#"{
458            "ignoreIdentifiers": true,
459            "ignoreStringValues": false
460        }"#;
461        let config: NormalizationConfig = serde_json::from_str(json).unwrap();
462        assert_eq!(config.ignore_identifiers, Some(true));
463        assert_eq!(config.ignore_string_values, Some(false));
464        assert_eq!(config.ignore_numeric_values, None);
465    }
466
467    // ── TOML deserialization ────────────────────────────────────────
468
469    #[test]
470    fn duplicates_config_toml_all_fields() {
471        let toml_str = r#"
472enabled = false
473mode = "weak"
474minTokens = 75
475minLines = 8
476minOccurrences = 3
477threshold = 3.0
478ignore = ["vendor/**"]
479skipLocal = true
480crossLanguage = true
481ignoreImports = true
482
483[normalization]
484ignoreIdentifiers = true
485ignoreStringValues = true
486ignoreNumericValues = false
487"#;
488        let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
489        assert!(!config.enabled);
490        assert_eq!(config.mode, DetectionMode::Weak);
491        assert_eq!(config.min_tokens, 75);
492        assert_eq!(config.min_lines, 8);
493        assert_eq!(config.min_occurrences, 3);
494        assert!((config.threshold - 3.0).abs() < f64::EPSILON);
495        assert_eq!(config.ignore, vec!["vendor/**"]);
496        assert!(config.skip_local);
497        assert!(config.cross_language);
498        assert!(config.ignore_imports);
499        assert_eq!(config.normalization.ignore_identifiers, Some(true));
500        assert_eq!(config.normalization.ignore_string_values, Some(true));
501        assert_eq!(config.normalization.ignore_numeric_values, Some(false));
502    }
503
504    #[test]
505    fn duplicates_config_toml_defaults() {
506        let toml_str = "";
507        let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
508        assert!(config.enabled);
509        assert_eq!(config.mode, DetectionMode::Mild);
510        assert_eq!(config.min_tokens, 50);
511        assert_eq!(config.min_lines, 5);
512    }
513
514    // ── NormalizationConfig edge cases ──────────────────────────────
515
516    #[test]
517    fn normalization_config_default_all_none() {
518        let config = NormalizationConfig::default();
519        assert!(config.ignore_identifiers.is_none());
520        assert!(config.ignore_string_values.is_none());
521        assert!(config.ignore_numeric_values.is_none());
522    }
523
524    #[test]
525    fn normalization_config_empty_json_object() {
526        let config: NormalizationConfig = serde_json::from_str("{}").unwrap();
527        assert!(config.ignore_identifiers.is_none());
528        assert!(config.ignore_string_values.is_none());
529        assert!(config.ignore_numeric_values.is_none());
530    }
531
532    // ── DetectionMode default ───────────────────────────────────────
533
534    #[test]
535    fn detection_mode_default_is_mild() {
536        assert_eq!(DetectionMode::default(), DetectionMode::Mild);
537    }
538
539    // ── ResolvedNormalization equality ───────────────────────────────
540
541    #[test]
542    fn resolved_normalization_equality() {
543        let a = ResolvedNormalization {
544            ignore_identifiers: true,
545            ignore_string_values: false,
546            ignore_numeric_values: true,
547        };
548        let b = ResolvedNormalization {
549            ignore_identifiers: true,
550            ignore_string_values: false,
551            ignore_numeric_values: true,
552        };
553        assert_eq!(a, b);
554
555        let c = ResolvedNormalization {
556            ignore_identifiers: false,
557            ignore_string_values: false,
558            ignore_numeric_values: true,
559        };
560        assert_ne!(a, c);
561    }
562
563    // ── Detection mode JSON deserialization ──────────────────────────
564
565    #[test]
566    fn detection_mode_json_deserialization() {
567        let strict: DetectionMode = serde_json::from_str(r#""strict""#).unwrap();
568        assert_eq!(strict, DetectionMode::Strict);
569
570        let mild: DetectionMode = serde_json::from_str(r#""mild""#).unwrap();
571        assert_eq!(mild, DetectionMode::Mild);
572
573        let weak: DetectionMode = serde_json::from_str(r#""weak""#).unwrap();
574        assert_eq!(weak, DetectionMode::Weak);
575
576        let semantic: DetectionMode = serde_json::from_str(r#""semantic""#).unwrap();
577        assert_eq!(semantic, DetectionMode::Semantic);
578    }
579
580    #[test]
581    fn detection_mode_invalid_json() {
582        let result: Result<DetectionMode, _> = serde_json::from_str(r#""aggressive""#);
583        assert!(result.is_err());
584    }
585
586    // ── Serialize roundtrip ─────────────────────────────────────────
587
588    #[test]
589    fn duplicates_config_json_roundtrip() {
590        let config = DuplicatesConfig {
591            enabled: false,
592            mode: DetectionMode::Semantic,
593            min_tokens: 100,
594            min_lines: 10,
595            min_occurrences: 4,
596            threshold: 5.5,
597            ignore: vec!["test/**".to_string()],
598            ignore_defaults: false,
599            skip_local: true,
600            cross_language: true,
601            ignore_imports: true,
602            normalization: NormalizationConfig {
603                ignore_identifiers: Some(true),
604                ignore_string_values: None,
605                ignore_numeric_values: Some(false),
606            },
607            min_corpus_size_for_shingle_filter: 2048,
608            min_corpus_size_for_token_cache: 8_000,
609        };
610        let json = serde_json::to_string(&config).unwrap();
611        let restored: DuplicatesConfig = serde_json::from_str(&json).unwrap();
612        assert!(!restored.enabled);
613        assert_eq!(restored.mode, DetectionMode::Semantic);
614        assert_eq!(restored.min_tokens, 100);
615        assert_eq!(restored.min_lines, 10);
616        assert_eq!(restored.min_occurrences, 4);
617        assert!((restored.threshold - 5.5).abs() < f64::EPSILON);
618        assert!(!restored.ignore_defaults);
619        assert!(restored.skip_local);
620        assert!(restored.cross_language);
621        assert_eq!(restored.min_corpus_size_for_shingle_filter, 2048);
622        assert_eq!(restored.min_corpus_size_for_token_cache, 8_000);
623        assert!(restored.ignore_imports);
624        assert_eq!(restored.normalization.ignore_identifiers, Some(true));
625        assert!(restored.normalization.ignore_string_values.is_none());
626        assert_eq!(restored.normalization.ignore_numeric_values, Some(false));
627    }
628
629    // ── NormalizationConfig skip_serializing_if ─────────────────────
630
631    #[test]
632    fn normalization_none_fields_not_serialized() {
633        let config = NormalizationConfig::default();
634        let json = serde_json::to_string(&config).unwrap();
635        assert!(
636            !json.contains("ignoreIdentifiers"),
637            "None fields should be skipped"
638        );
639        assert!(
640            !json.contains("ignoreStringValues"),
641            "None fields should be skipped"
642        );
643        assert!(
644            !json.contains("ignoreNumericValues"),
645            "None fields should be skipped"
646        );
647    }
648
649    #[test]
650    fn normalization_some_fields_serialized() {
651        let config = NormalizationConfig {
652            ignore_identifiers: Some(true),
653            ignore_string_values: None,
654            ignore_numeric_values: Some(false),
655        };
656        let json = serde_json::to_string(&config).unwrap();
657        assert!(json.contains("ignoreIdentifiers"));
658        assert!(!json.contains("ignoreStringValues"));
659        assert!(json.contains("ignoreNumericValues"));
660    }
661
662    // ── minOccurrences validation ───────────────────────────────────
663
664    #[test]
665    fn min_occurrences_accepts_two_or_more() {
666        let json = r#"{"minOccurrences": 2}"#;
667        let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
668        assert_eq!(config.min_occurrences, 2);
669
670        let json = r#"{"minOccurrences": 5}"#;
671        let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
672        assert_eq!(config.min_occurrences, 5);
673    }
674
675    #[test]
676    fn min_occurrences_rejects_one() {
677        let json = r#"{"minOccurrences": 1}"#;
678        let err = serde_json::from_str::<DuplicatesConfig>(json).unwrap_err();
679        assert!(err.to_string().contains("at least 2"));
680    }
681
682    #[test]
683    fn min_occurrences_rejects_zero() {
684        let json = r#"{"minOccurrences": 0}"#;
685        let err = serde_json::from_str::<DuplicatesConfig>(json).unwrap_err();
686        assert!(err.to_string().contains("at least 2"));
687    }
688
689    #[test]
690    fn min_occurrences_rejects_one_in_toml() {
691        let toml_str = "minOccurrences = 1";
692        let err = toml::from_str::<DuplicatesConfig>(toml_str).unwrap_err();
693        assert!(err.to_string().contains("at least 2"));
694    }
695}