use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
const fn default_true() -> bool {
true
}
const fn default_min_tokens() -> usize {
50
}
const fn default_min_lines() -> usize {
5
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DuplicatesConfig {
#[serde(default = "default_true")]
pub enabled: bool,
#[serde(default)]
pub mode: DetectionMode,
#[serde(default = "default_min_tokens")]
pub min_tokens: usize,
#[serde(default = "default_min_lines")]
pub min_lines: usize,
#[serde(default)]
pub threshold: f64,
#[serde(default)]
pub ignore: Vec<String>,
#[serde(default)]
pub skip_local: bool,
#[serde(default)]
pub cross_language: bool,
#[serde(default)]
pub ignore_imports: bool,
#[serde(default)]
pub normalization: NormalizationConfig,
}
impl Default for DuplicatesConfig {
fn default() -> Self {
Self {
enabled: true,
mode: DetectionMode::default(),
min_tokens: default_min_tokens(),
min_lines: default_min_lines(),
threshold: 0.0,
ignore: vec![],
skip_local: false,
cross_language: false,
ignore_imports: false,
normalization: NormalizationConfig::default(),
}
}
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct NormalizationConfig {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ignore_identifiers: Option<bool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ignore_string_values: Option<bool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ignore_numeric_values: Option<bool>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ResolvedNormalization {
pub ignore_identifiers: bool,
pub ignore_string_values: bool,
pub ignore_numeric_values: bool,
}
impl ResolvedNormalization {
#[must_use]
pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
let (default_ids, default_strings, default_numbers) = match mode {
DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
DetectionMode::Weak => (false, true, false),
DetectionMode::Semantic => (true, true, true),
};
Self {
ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
}
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "lowercase")]
pub enum DetectionMode {
Strict,
#[default]
Mild,
Weak,
Semantic,
}
impl std::fmt::Display for DetectionMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Strict => write!(f, "strict"),
Self::Mild => write!(f, "mild"),
Self::Weak => write!(f, "weak"),
Self::Semantic => write!(f, "semantic"),
}
}
}
impl std::str::FromStr for DetectionMode {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"strict" => Ok(Self::Strict),
"mild" => Ok(Self::Mild),
"weak" => Ok(Self::Weak),
"semantic" => Ok(Self::Semantic),
other => Err(format!("unknown detection mode: '{other}'")),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn duplicates_config_defaults() {
let config = DuplicatesConfig::default();
assert!(config.enabled);
assert_eq!(config.mode, DetectionMode::Mild);
assert_eq!(config.min_tokens, 50);
assert_eq!(config.min_lines, 5);
assert!((config.threshold - 0.0).abs() < f64::EPSILON);
assert!(config.ignore.is_empty());
assert!(!config.skip_local);
assert!(!config.cross_language);
assert!(!config.ignore_imports);
}
#[test]
fn detection_mode_from_str_all_variants() {
assert_eq!(
"strict".parse::<DetectionMode>().unwrap(),
DetectionMode::Strict
);
assert_eq!(
"mild".parse::<DetectionMode>().unwrap(),
DetectionMode::Mild
);
assert_eq!(
"weak".parse::<DetectionMode>().unwrap(),
DetectionMode::Weak
);
assert_eq!(
"semantic".parse::<DetectionMode>().unwrap(),
DetectionMode::Semantic
);
}
#[test]
fn detection_mode_from_str_case_insensitive() {
assert_eq!(
"STRICT".parse::<DetectionMode>().unwrap(),
DetectionMode::Strict
);
assert_eq!(
"Weak".parse::<DetectionMode>().unwrap(),
DetectionMode::Weak
);
assert_eq!(
"SEMANTIC".parse::<DetectionMode>().unwrap(),
DetectionMode::Semantic
);
}
#[test]
fn detection_mode_from_str_unknown() {
let err = "foobar".parse::<DetectionMode>().unwrap_err();
assert!(err.contains("unknown detection mode"));
assert!(err.contains("foobar"));
}
#[test]
fn detection_mode_display() {
assert_eq!(DetectionMode::Strict.to_string(), "strict");
assert_eq!(DetectionMode::Mild.to_string(), "mild");
assert_eq!(DetectionMode::Weak.to_string(), "weak");
assert_eq!(DetectionMode::Semantic.to_string(), "semantic");
}
#[test]
fn resolve_strict_mode_all_false() {
let resolved =
ResolvedNormalization::resolve(DetectionMode::Strict, &NormalizationConfig::default());
assert!(!resolved.ignore_identifiers);
assert!(!resolved.ignore_string_values);
assert!(!resolved.ignore_numeric_values);
}
#[test]
fn resolve_mild_mode_all_false() {
let resolved =
ResolvedNormalization::resolve(DetectionMode::Mild, &NormalizationConfig::default());
assert!(!resolved.ignore_identifiers);
assert!(!resolved.ignore_string_values);
assert!(!resolved.ignore_numeric_values);
}
#[test]
fn resolve_weak_mode_only_strings_true() {
let resolved =
ResolvedNormalization::resolve(DetectionMode::Weak, &NormalizationConfig::default());
assert!(!resolved.ignore_identifiers);
assert!(resolved.ignore_string_values);
assert!(!resolved.ignore_numeric_values);
}
#[test]
fn resolve_semantic_mode_all_true() {
let resolved = ResolvedNormalization::resolve(
DetectionMode::Semantic,
&NormalizationConfig::default(),
);
assert!(resolved.ignore_identifiers);
assert!(resolved.ignore_string_values);
assert!(resolved.ignore_numeric_values);
}
#[test]
fn resolve_override_forces_true() {
let overrides = NormalizationConfig {
ignore_identifiers: Some(true),
ignore_string_values: None,
ignore_numeric_values: None,
};
let resolved = ResolvedNormalization::resolve(DetectionMode::Strict, &overrides);
assert!(resolved.ignore_identifiers);
assert!(!resolved.ignore_string_values);
assert!(!resolved.ignore_numeric_values);
}
#[test]
fn resolve_override_forces_false() {
let overrides = NormalizationConfig {
ignore_identifiers: Some(false),
ignore_string_values: Some(false),
ignore_numeric_values: None,
};
let resolved = ResolvedNormalization::resolve(DetectionMode::Semantic, &overrides);
assert!(!resolved.ignore_identifiers);
assert!(!resolved.ignore_string_values);
assert!(resolved.ignore_numeric_values); }
#[test]
fn resolve_all_overrides_on_weak() {
let overrides = NormalizationConfig {
ignore_identifiers: Some(true),
ignore_string_values: Some(false), ignore_numeric_values: Some(true),
};
let resolved = ResolvedNormalization::resolve(DetectionMode::Weak, &overrides);
assert!(resolved.ignore_identifiers);
assert!(!resolved.ignore_string_values); assert!(resolved.ignore_numeric_values);
}
#[test]
fn duplicates_config_json_all_fields() {
let json = r#"{
"enabled": false,
"mode": "semantic",
"minTokens": 100,
"minLines": 10,
"threshold": 5.0,
"ignore": ["**/vendor/**"],
"skipLocal": true,
"crossLanguage": true,
"ignoreImports": true
}"#;
let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
assert!(!config.enabled);
assert_eq!(config.mode, DetectionMode::Semantic);
assert_eq!(config.min_tokens, 100);
assert_eq!(config.min_lines, 10);
assert!((config.threshold - 5.0).abs() < f64::EPSILON);
assert_eq!(config.ignore, vec!["**/vendor/**"]);
assert!(config.skip_local);
assert!(config.cross_language);
assert!(config.ignore_imports);
}
#[test]
fn duplicates_config_json_partial_uses_defaults() {
let json = r#"{"mode": "weak"}"#;
let config: DuplicatesConfig = serde_json::from_str(json).unwrap();
assert!(config.enabled); assert_eq!(config.mode, DetectionMode::Weak);
assert_eq!(config.min_tokens, 50); assert_eq!(config.min_lines, 5); }
#[test]
fn normalization_config_json_overrides() {
let json = r#"{
"ignoreIdentifiers": true,
"ignoreStringValues": false
}"#;
let config: NormalizationConfig = serde_json::from_str(json).unwrap();
assert_eq!(config.ignore_identifiers, Some(true));
assert_eq!(config.ignore_string_values, Some(false));
assert_eq!(config.ignore_numeric_values, None);
}
#[test]
fn duplicates_config_toml_all_fields() {
let toml_str = r#"
enabled = false
mode = "weak"
minTokens = 75
minLines = 8
threshold = 3.0
ignore = ["vendor/**"]
skipLocal = true
crossLanguage = true
ignoreImports = true
[normalization]
ignoreIdentifiers = true
ignoreStringValues = true
ignoreNumericValues = false
"#;
let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
assert!(!config.enabled);
assert_eq!(config.mode, DetectionMode::Weak);
assert_eq!(config.min_tokens, 75);
assert_eq!(config.min_lines, 8);
assert!((config.threshold - 3.0).abs() < f64::EPSILON);
assert_eq!(config.ignore, vec!["vendor/**"]);
assert!(config.skip_local);
assert!(config.cross_language);
assert!(config.ignore_imports);
assert_eq!(config.normalization.ignore_identifiers, Some(true));
assert_eq!(config.normalization.ignore_string_values, Some(true));
assert_eq!(config.normalization.ignore_numeric_values, Some(false));
}
#[test]
fn duplicates_config_toml_defaults() {
let toml_str = "";
let config: DuplicatesConfig = toml::from_str(toml_str).unwrap();
assert!(config.enabled);
assert_eq!(config.mode, DetectionMode::Mild);
assert_eq!(config.min_tokens, 50);
assert_eq!(config.min_lines, 5);
}
#[test]
fn normalization_config_default_all_none() {
let config = NormalizationConfig::default();
assert!(config.ignore_identifiers.is_none());
assert!(config.ignore_string_values.is_none());
assert!(config.ignore_numeric_values.is_none());
}
#[test]
fn normalization_config_empty_json_object() {
let config: NormalizationConfig = serde_json::from_str("{}").unwrap();
assert!(config.ignore_identifiers.is_none());
assert!(config.ignore_string_values.is_none());
assert!(config.ignore_numeric_values.is_none());
}
#[test]
fn detection_mode_default_is_mild() {
assert_eq!(DetectionMode::default(), DetectionMode::Mild);
}
#[test]
fn resolved_normalization_equality() {
let a = ResolvedNormalization {
ignore_identifiers: true,
ignore_string_values: false,
ignore_numeric_values: true,
};
let b = ResolvedNormalization {
ignore_identifiers: true,
ignore_string_values: false,
ignore_numeric_values: true,
};
assert_eq!(a, b);
let c = ResolvedNormalization {
ignore_identifiers: false,
ignore_string_values: false,
ignore_numeric_values: true,
};
assert_ne!(a, c);
}
#[test]
fn detection_mode_json_deserialization() {
let strict: DetectionMode = serde_json::from_str(r#""strict""#).unwrap();
assert_eq!(strict, DetectionMode::Strict);
let mild: DetectionMode = serde_json::from_str(r#""mild""#).unwrap();
assert_eq!(mild, DetectionMode::Mild);
let weak: DetectionMode = serde_json::from_str(r#""weak""#).unwrap();
assert_eq!(weak, DetectionMode::Weak);
let semantic: DetectionMode = serde_json::from_str(r#""semantic""#).unwrap();
assert_eq!(semantic, DetectionMode::Semantic);
}
#[test]
fn detection_mode_invalid_json() {
let result: Result<DetectionMode, _> = serde_json::from_str(r#""aggressive""#);
assert!(result.is_err());
}
#[test]
fn duplicates_config_json_roundtrip() {
let config = DuplicatesConfig {
enabled: false,
mode: DetectionMode::Semantic,
min_tokens: 100,
min_lines: 10,
threshold: 5.5,
ignore: vec!["test/**".to_string()],
skip_local: true,
cross_language: true,
ignore_imports: true,
normalization: NormalizationConfig {
ignore_identifiers: Some(true),
ignore_string_values: None,
ignore_numeric_values: Some(false),
},
};
let json = serde_json::to_string(&config).unwrap();
let restored: DuplicatesConfig = serde_json::from_str(&json).unwrap();
assert!(!restored.enabled);
assert_eq!(restored.mode, DetectionMode::Semantic);
assert_eq!(restored.min_tokens, 100);
assert_eq!(restored.min_lines, 10);
assert!((restored.threshold - 5.5).abs() < f64::EPSILON);
assert!(restored.skip_local);
assert!(restored.cross_language);
assert!(restored.ignore_imports);
assert_eq!(restored.normalization.ignore_identifiers, Some(true));
assert!(restored.normalization.ignore_string_values.is_none());
assert_eq!(restored.normalization.ignore_numeric_values, Some(false));
}
#[test]
fn normalization_none_fields_not_serialized() {
let config = NormalizationConfig::default();
let json = serde_json::to_string(&config).unwrap();
assert!(
!json.contains("ignoreIdentifiers"),
"None fields should be skipped"
);
assert!(
!json.contains("ignoreStringValues"),
"None fields should be skipped"
);
assert!(
!json.contains("ignoreNumericValues"),
"None fields should be skipped"
);
}
#[test]
fn normalization_some_fields_serialized() {
let config = NormalizationConfig {
ignore_identifiers: Some(true),
ignore_string_values: None,
ignore_numeric_values: Some(false),
};
let json = serde_json::to_string(&config).unwrap();
assert!(json.contains("ignoreIdentifiers"));
assert!(!json.contains("ignoreStringValues"));
assert!(json.contains("ignoreNumericValues"));
}
}