1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3
4const fn default_true() -> bool {
5 true
6}
7
8const fn default_min_tokens() -> usize {
9 50
10}
11
12const fn default_min_lines() -> usize {
13 5
14}
15
16#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
18#[serde(rename_all = "camelCase")]
19pub struct DuplicatesConfig {
20 #[serde(default = "default_true")]
22 pub enabled: bool,
23
24 #[serde(default)]
26 pub mode: DetectionMode,
27
28 #[serde(default = "default_min_tokens")]
30 pub min_tokens: usize,
31
32 #[serde(default = "default_min_lines")]
34 pub min_lines: usize,
35
36 #[serde(default)]
38 pub threshold: f64,
39
40 #[serde(default)]
42 pub ignore: Vec<String>,
43
44 #[serde(default)]
46 pub skip_local: bool,
47
48 #[serde(default)]
54 pub cross_language: bool,
55
56 #[serde(default)]
58 pub normalization: NormalizationConfig,
59}
60
61impl Default for DuplicatesConfig {
62 fn default() -> Self {
63 Self {
64 enabled: true,
65 mode: DetectionMode::default(),
66 min_tokens: default_min_tokens(),
67 min_lines: default_min_lines(),
68 threshold: 0.0,
69 ignore: vec![],
70 skip_local: false,
71 cross_language: false,
72 normalization: NormalizationConfig::default(),
73 }
74 }
75}
76
77#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
83#[serde(rename_all = "camelCase")]
84pub struct NormalizationConfig {
85 #[serde(default, skip_serializing_if = "Option::is_none")]
88 pub ignore_identifiers: Option<bool>,
89
90 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub ignore_string_values: Option<bool>,
94
95 #[serde(default, skip_serializing_if = "Option::is_none")]
98 pub ignore_numeric_values: Option<bool>,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub struct ResolvedNormalization {
104 pub ignore_identifiers: bool,
105 pub ignore_string_values: bool,
106 pub ignore_numeric_values: bool,
107}
108
109impl ResolvedNormalization {
110 pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
112 let (default_ids, default_strings, default_numbers) = match mode {
113 DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
114 DetectionMode::Weak => (false, true, false),
115 DetectionMode::Semantic => (true, true, true),
116 };
117
118 Self {
119 ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
120 ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
121 ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
122 }
123 }
124}
125
126#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
134#[serde(rename_all = "lowercase")]
135pub enum DetectionMode {
136 Strict,
138 #[default]
140 Mild,
141 Weak,
143 Semantic,
145}
146
147impl std::fmt::Display for DetectionMode {
148 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149 match self {
150 Self::Strict => write!(f, "strict"),
151 Self::Mild => write!(f, "mild"),
152 Self::Weak => write!(f, "weak"),
153 Self::Semantic => write!(f, "semantic"),
154 }
155 }
156}
157
158impl std::str::FromStr for DetectionMode {
159 type Err = String;
160
161 fn from_str(s: &str) -> Result<Self, Self::Err> {
162 match s.to_lowercase().as_str() {
163 "strict" => Ok(Self::Strict),
164 "mild" => Ok(Self::Mild),
165 "weak" => Ok(Self::Weak),
166 "semantic" => Ok(Self::Semantic),
167 other => Err(format!("unknown detection mode: '{other}'")),
168 }
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
179 fn duplicates_config_defaults() {
180 let config = DuplicatesConfig::default();
181 assert!(config.enabled);
182 assert_eq!(config.mode, DetectionMode::Mild);
183 assert_eq!(config.min_tokens, 50);
184 assert_eq!(config.min_lines, 5);
185 assert_eq!(config.threshold, 0.0);
186 assert!(config.ignore.is_empty());
187 assert!(!config.skip_local);
188 assert!(!config.cross_language);
189 }
190
191 #[test]
194 fn detection_mode_from_str_all_variants() {
195 assert_eq!(
196 "strict".parse::<DetectionMode>().unwrap(),
197 DetectionMode::Strict
198 );
199 assert_eq!(
200 "mild".parse::<DetectionMode>().unwrap(),
201 DetectionMode::Mild
202 );
203 assert_eq!(
204 "weak".parse::<DetectionMode>().unwrap(),
205 DetectionMode::Weak
206 );
207 assert_eq!(
208 "semantic".parse::<DetectionMode>().unwrap(),
209 DetectionMode::Semantic
210 );
211 }
212
213 #[test]
214 fn detection_mode_from_str_case_insensitive() {
215 assert_eq!(
216 "STRICT".parse::<DetectionMode>().unwrap(),
217 DetectionMode::Strict
218 );
219 assert_eq!(
220 "Weak".parse::<DetectionMode>().unwrap(),
221 DetectionMode::Weak
222 );
223 assert_eq!(
224 "SEMANTIC".parse::<DetectionMode>().unwrap(),
225 DetectionMode::Semantic
226 );
227 }
228
229 #[test]
230 fn detection_mode_from_str_unknown() {
231 let err = "foobar".parse::<DetectionMode>().unwrap_err();
232 assert!(err.contains("unknown detection mode"));
233 assert!(err.contains("foobar"));
234 }
235
236 #[test]
239 fn detection_mode_display() {
240 assert_eq!(DetectionMode::Strict.to_string(), "strict");
241 assert_eq!(DetectionMode::Mild.to_string(), "mild");
242 assert_eq!(DetectionMode::Weak.to_string(), "weak");
243 assert_eq!(DetectionMode::Semantic.to_string(), "semantic");
244 }
245
246 #[test]
249 fn resolve_strict_mode_all_false() {
250 let resolved =
251 ResolvedNormalization::resolve(DetectionMode::Strict, &NormalizationConfig::default());
252 assert!(!resolved.ignore_identifiers);
253 assert!(!resolved.ignore_string_values);
254 assert!(!resolved.ignore_numeric_values);
255 }
256
257 #[test]
258 fn resolve_mild_mode_all_false() {
259 let resolved =
260 ResolvedNormalization::resolve(DetectionMode::Mild, &NormalizationConfig::default());
261 assert!(!resolved.ignore_identifiers);
262 assert!(!resolved.ignore_string_values);
263 assert!(!resolved.ignore_numeric_values);
264 }
265
266 #[test]
267 fn resolve_weak_mode_only_strings_true() {
268 let resolved =
269 ResolvedNormalization::resolve(DetectionMode::Weak, &NormalizationConfig::default());
270 assert!(!resolved.ignore_identifiers);
271 assert!(resolved.ignore_string_values);
272 assert!(!resolved.ignore_numeric_values);
273 }
274
275 #[test]
276 fn resolve_semantic_mode_all_true() {
277 let resolved = ResolvedNormalization::resolve(
278 DetectionMode::Semantic,
279 &NormalizationConfig::default(),
280 );
281 assert!(resolved.ignore_identifiers);
282 assert!(resolved.ignore_string_values);
283 assert!(resolved.ignore_numeric_values);
284 }
285
286 #[test]
287 fn resolve_override_forces_true() {
288 let overrides = NormalizationConfig {
290 ignore_identifiers: Some(true),
291 ignore_string_values: None,
292 ignore_numeric_values: None,
293 };
294 let resolved = ResolvedNormalization::resolve(DetectionMode::Strict, &overrides);
295 assert!(resolved.ignore_identifiers);
296 assert!(!resolved.ignore_string_values);
297 assert!(!resolved.ignore_numeric_values);
298 }
299
300 #[test]
301 fn resolve_override_forces_false() {
302 let overrides = NormalizationConfig {
304 ignore_identifiers: Some(false),
305 ignore_string_values: Some(false),
306 ignore_numeric_values: None,
307 };
308 let resolved = ResolvedNormalization::resolve(DetectionMode::Semantic, &overrides);
309 assert!(!resolved.ignore_identifiers);
310 assert!(!resolved.ignore_string_values);
311 assert!(resolved.ignore_numeric_values); }
313
314 #[test]
315 fn resolve_all_overrides_on_weak() {
316 let overrides = NormalizationConfig {
317 ignore_identifiers: Some(true),
318 ignore_string_values: Some(false), ignore_numeric_values: Some(true),
320 };
321 let resolved = ResolvedNormalization::resolve(DetectionMode::Weak, &overrides);
322 assert!(resolved.ignore_identifiers);
323 assert!(!resolved.ignore_string_values); assert!(resolved.ignore_numeric_values);
325 }
326
327 #[test]
330 fn duplicates_config_json_all_fields() {
331 let json = r#"{
332 "enabled": false,
333 "mode": "semantic",
334 "minTokens": 100,
335 "minLines": 10,
336 "threshold": 5.0,
337 "ignore": ["**/vendor/**"],
338 "skipLocal": true,
339 "crossLanguage": true
340 }"#;
341 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
342 assert!(!config.enabled);
343 assert_eq!(config.mode, DetectionMode::Semantic);
344 assert_eq!(config.min_tokens, 100);
345 assert_eq!(config.min_lines, 10);
346 assert_eq!(config.threshold, 5.0);
347 assert_eq!(config.ignore, vec!["**/vendor/**"]);
348 assert!(config.skip_local);
349 assert!(config.cross_language);
350 }
351
352 #[test]
353 fn duplicates_config_json_partial_uses_defaults() {
354 let json = r#"{"mode": "weak"}"#;
355 let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
356 assert!(config.enabled); assert_eq!(config.mode, DetectionMode::Weak);
358 assert_eq!(config.min_tokens, 50); assert_eq!(config.min_lines, 5); }
361
362 #[test]
363 fn normalization_config_json_overrides() {
364 let json = r#"{
365 "ignoreIdentifiers": true,
366 "ignoreStringValues": false
367 }"#;
368 let config: NormalizationConfig = serde_json::from_str(json).unwrap();
369 assert_eq!(config.ignore_identifiers, Some(true));
370 assert_eq!(config.ignore_string_values, Some(false));
371 assert_eq!(config.ignore_numeric_values, None);
372 }
373}