1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3
4const fn default_true() -> bool {
5 true
6}
7
8const fn default_min_tokens() -> usize {
9 50
10}
11
12const fn default_min_lines() -> usize {
13 5
14}
15
16#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
18#[serde(rename_all = "camelCase")]
19pub struct DuplicatesConfig {
20 #[serde(default = "default_true")]
22 pub enabled: bool,
23
24 #[serde(default)]
26 pub mode: DetectionMode,
27
28 #[serde(default = "default_min_tokens")]
30 pub min_tokens: usize,
31
32 #[serde(default = "default_min_lines")]
34 pub min_lines: usize,
35
36 #[serde(default)]
38 pub threshold: f64,
39
40 #[serde(default)]
42 pub ignore: Vec<String>,
43
44 #[serde(default)]
46 pub skip_local: bool,
47
48 #[serde(default)]
54 pub cross_language: bool,
55
56 #[serde(default)]
58 pub normalization: NormalizationConfig,
59}
60
61impl Default for DuplicatesConfig {
62 fn default() -> Self {
63 Self {
64 enabled: true,
65 mode: DetectionMode::default(),
66 min_tokens: default_min_tokens(),
67 min_lines: default_min_lines(),
68 threshold: 0.0,
69 ignore: vec![],
70 skip_local: false,
71 cross_language: false,
72 normalization: NormalizationConfig::default(),
73 }
74 }
75}
76
77#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
83#[serde(rename_all = "camelCase")]
84pub struct NormalizationConfig {
85 #[serde(default, skip_serializing_if = "Option::is_none")]
88 pub ignore_identifiers: Option<bool>,
89
90 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub ignore_string_values: Option<bool>,
94
95 #[serde(default, skip_serializing_if = "Option::is_none")]
98 pub ignore_numeric_values: Option<bool>,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub struct ResolvedNormalization {
104 pub ignore_identifiers: bool,
105 pub ignore_string_values: bool,
106 pub ignore_numeric_values: bool,
107}
108
109impl ResolvedNormalization {
110 #[must_use]
112 pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
113 let (default_ids, default_strings, default_numbers) = match mode {
114 DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
115 DetectionMode::Weak => (false, true, false),
116 DetectionMode::Semantic => (true, true, true),
117 };
118
119 Self {
120 ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
121 ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
122 ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
123 }
124 }
125}
126
127#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
135#[serde(rename_all = "lowercase")]
136pub enum DetectionMode {
137 Strict,
139 #[default]
141 Mild,
142 Weak,
144 Semantic,
146}
147
148impl std::fmt::Display for DetectionMode {
149 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 match self {
151 Self::Strict => write!(f, "strict"),
152 Self::Mild => write!(f, "mild"),
153 Self::Weak => write!(f, "weak"),
154 Self::Semantic => write!(f, "semantic"),
155 }
156 }
157}
158
159impl std::str::FromStr for DetectionMode {
160 type Err = String;
161
162 fn from_str(s: &str) -> Result<Self, Self::Err> {
163 match s.to_lowercase().as_str() {
164 "strict" => Ok(Self::Strict),
165 "mild" => Ok(Self::Mild),
166 "weak" => Ok(Self::Weak),
167 "semantic" => Ok(Self::Semantic),
168 other => Err(format!("unknown detection mode: '{other}'")),
169 }
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176
177 #[test]
180 fn duplicates_config_defaults() {
181 let config = DuplicatesConfig::default();
182 assert!(config.enabled);
183 assert_eq!(config.mode, DetectionMode::Mild);
184 assert_eq!(config.min_tokens, 50);
185 assert_eq!(config.min_lines, 5);
186 assert!((config.threshold - 0.0).abs() < f64::EPSILON);
187 assert!(config.ignore.is_empty());
188 assert!(!config.skip_local);
189 assert!(!config.cross_language);
190 }
191
192 #[test]
195 fn detection_mode_from_str_all_variants() {
196 assert_eq!(
197 "strict".parse::<DetectionMode>().unwrap(),
198 DetectionMode::Strict
199 );
200 assert_eq!(
201 "mild".parse::<DetectionMode>().unwrap(),
202 DetectionMode::Mild
203 );
204 assert_eq!(
205 "weak".parse::<DetectionMode>().unwrap(),
206 DetectionMode::Weak
207 );
208 assert_eq!(
209 "semantic".parse::<DetectionMode>().unwrap(),
210 DetectionMode::Semantic
211 );
212 }
213
214 #[test]
215 fn detection_mode_from_str_case_insensitive() {
216 assert_eq!(
217 "STRICT".parse::<DetectionMode>().unwrap(),
218 DetectionMode::Strict
219 );
220 assert_eq!(
221 "Weak".parse::<DetectionMode>().unwrap(),
222 DetectionMode::Weak
223 );
224 assert_eq!(
225 "SEMANTIC".parse::<DetectionMode>().unwrap(),
226 DetectionMode::Semantic
227 );
228 }
229
230 #[test]
231 fn detection_mode_from_str_unknown() {
232 let err = "foobar".parse::<DetectionMode>().unwrap_err();
233 assert!(err.contains("unknown detection mode"));
234 assert!(err.contains("foobar"));
235 }
236
237 #[test]
240 fn detection_mode_display() {
241 assert_eq!(DetectionMode::Strict.to_string(), "strict");
242 assert_eq!(DetectionMode::Mild.to_string(), "mild");
243 assert_eq!(DetectionMode::Weak.to_string(), "weak");
244 assert_eq!(DetectionMode::Semantic.to_string(), "semantic");
245 }
246
247 #[test]
250 fn resolve_strict_mode_all_false() {
251 let resolved =
252 ResolvedNormalization::resolve(DetectionMode::Strict, &NormalizationConfig::default());
253 assert!(!resolved.ignore_identifiers);
254 assert!(!resolved.ignore_string_values);
255 assert!(!resolved.ignore_numeric_values);
256 }
257
258 #[test]
259 fn resolve_mild_mode_all_false() {
260 let resolved =
261 ResolvedNormalization::resolve(DetectionMode::Mild, &NormalizationConfig::default());
262 assert!(!resolved.ignore_identifiers);
263 assert!(!resolved.ignore_string_values);
264 assert!(!resolved.ignore_numeric_values);
265 }
266
267 #[test]
268 fn resolve_weak_mode_only_strings_true() {
269 let resolved =
270 ResolvedNormalization::resolve(DetectionMode::Weak, &NormalizationConfig::default());
271 assert!(!resolved.ignore_identifiers);
272 assert!(resolved.ignore_string_values);
273 assert!(!resolved.ignore_numeric_values);
274 }
275
276 #[test]
277 fn resolve_semantic_mode_all_true() {
278 let resolved = ResolvedNormalization::resolve(
279 DetectionMode::Semantic,
280 &NormalizationConfig::default(),
281 );
282 assert!(resolved.ignore_identifiers);
283 assert!(resolved.ignore_string_values);
284 assert!(resolved.ignore_numeric_values);
285 }
286
287 #[test]
288 fn resolve_override_forces_true() {
289 let overrides = NormalizationConfig {
291 ignore_identifiers: Some(true),
292 ignore_string_values: None,
293 ignore_numeric_values: None,
294 };
295 let resolved = ResolvedNormalization::resolve(DetectionMode::Strict, &overrides);
296 assert!(resolved.ignore_identifiers);
297 assert!(!resolved.ignore_string_values);
298 assert!(!resolved.ignore_numeric_values);
299 }
300
301 #[test]
302 fn resolve_override_forces_false() {
303 let overrides = NormalizationConfig {
305 ignore_identifiers: Some(false),
306 ignore_string_values: Some(false),
307 ignore_numeric_values: None,
308 };
309 let resolved = ResolvedNormalization::resolve(DetectionMode::Semantic, &overrides);
310 assert!(!resolved.ignore_identifiers);
311 assert!(!resolved.ignore_string_values);
312 assert!(resolved.ignore_numeric_values); }
314
315 #[test]
316 fn resolve_all_overrides_on_weak() {
317 let overrides = NormalizationConfig {
318 ignore_identifiers: Some(true),
319 ignore_string_values: Some(false), ignore_numeric_values: Some(true),
321 };
322 let resolved = ResolvedNormalization::resolve(DetectionMode::Weak, &overrides);
323 assert!(resolved.ignore_identifiers);
324 assert!(!resolved.ignore_string_values); assert!(resolved.ignore_numeric_values);
326 }
327
328 #[test]
331 fn duplicates_config_json_all_fields() {
332 let json = r#"{
333 "enabled": false,
334 "mode": "semantic",
335 "minTokens": 100,
336 "minLines": 10,
337 "threshold": 5.0,
338 "ignore": ["**/vendor/**"],
339 "skipLocal": true,
340 "crossLanguage": true
341 }"#;
342 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
343 assert!(!config.enabled);
344 assert_eq!(config.mode, DetectionMode::Semantic);
345 assert_eq!(config.min_tokens, 100);
346 assert_eq!(config.min_lines, 10);
347 assert!((config.threshold - 5.0).abs() < f64::EPSILON);
348 assert_eq!(config.ignore, vec!["**/vendor/**"]);
349 assert!(config.skip_local);
350 assert!(config.cross_language);
351 }
352
353 #[test]
354 fn duplicates_config_json_partial_uses_defaults() {
355 let json = r#"{"mode": "weak"}"#;
356 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
357 assert!(config.enabled); assert_eq!(config.mode, DetectionMode::Weak);
359 assert_eq!(config.min_tokens, 50); assert_eq!(config.min_lines, 5); }
362
363 #[test]
364 fn normalization_config_json_overrides() {
365 let json = r#"{
366 "ignoreIdentifiers": true,
367 "ignoreStringValues": false
368 }"#;
369 let config: NormalizationConfig = serde_json::from_str(json).unwrap();
370 assert_eq!(config.ignore_identifiers, Some(true));
371 assert_eq!(config.ignore_string_values, Some(false));
372 assert_eq!(config.ignore_numeric_values, None);
373 }
374}