fallow_config/config/
duplicates_config.rs1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3
4const fn default_true() -> bool {
5 true
6}
7
8const fn default_min_tokens() -> usize {
9 50
10}
11
12const fn default_min_lines() -> usize {
13 5
14}
15
16#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
18#[serde(rename_all = "camelCase")]
19pub struct DuplicatesConfig {
20 #[serde(default = "default_true")]
22 pub enabled: bool,
23
24 #[serde(default)]
26 pub mode: DetectionMode,
27
28 #[serde(default = "default_min_tokens")]
30 pub min_tokens: usize,
31
32 #[serde(default = "default_min_lines")]
34 pub min_lines: usize,
35
36 #[serde(default)]
38 pub threshold: f64,
39
40 #[serde(default)]
42 pub ignore: Vec<String>,
43
44 #[serde(default)]
46 pub skip_local: bool,
47
48 #[serde(default)]
54 pub cross_language: bool,
55
56 #[serde(default)]
58 pub normalization: NormalizationConfig,
59}
60
61impl Default for DuplicatesConfig {
62 fn default() -> Self {
63 Self {
64 enabled: true,
65 mode: DetectionMode::default(),
66 min_tokens: default_min_tokens(),
67 min_lines: default_min_lines(),
68 threshold: 0.0,
69 ignore: vec![],
70 skip_local: false,
71 cross_language: false,
72 normalization: NormalizationConfig::default(),
73 }
74 }
75}
76
77#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema)]
83#[serde(rename_all = "camelCase")]
84pub struct NormalizationConfig {
85 #[serde(default, skip_serializing_if = "Option::is_none")]
88 pub ignore_identifiers: Option<bool>,
89
90 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub ignore_string_values: Option<bool>,
94
95 #[serde(default, skip_serializing_if = "Option::is_none")]
98 pub ignore_numeric_values: Option<bool>,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub struct ResolvedNormalization {
104 pub ignore_identifiers: bool,
105 pub ignore_string_values: bool,
106 pub ignore_numeric_values: bool,
107}
108
109impl ResolvedNormalization {
110 pub fn resolve(mode: DetectionMode, overrides: &NormalizationConfig) -> Self {
112 let (default_ids, default_strings, default_numbers) = match mode {
113 DetectionMode::Strict | DetectionMode::Mild => (false, false, false),
114 DetectionMode::Weak => (false, true, false),
115 DetectionMode::Semantic => (true, true, true),
116 };
117
118 Self {
119 ignore_identifiers: overrides.ignore_identifiers.unwrap_or(default_ids),
120 ignore_string_values: overrides.ignore_string_values.unwrap_or(default_strings),
121 ignore_numeric_values: overrides.ignore_numeric_values.unwrap_or(default_numbers),
122 }
123 }
124}
125
126#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
134#[serde(rename_all = "lowercase")]
135pub enum DetectionMode {
136 Strict,
138 #[default]
140 Mild,
141 Weak,
143 Semantic,
145}
146
147impl std::fmt::Display for DetectionMode {
148 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149 match self {
150 Self::Strict => write!(f, "strict"),
151 Self::Mild => write!(f, "mild"),
152 Self::Weak => write!(f, "weak"),
153 Self::Semantic => write!(f, "semantic"),
154 }
155 }
156}
157
158impl std::str::FromStr for DetectionMode {
159 type Err = String;
160
161 fn from_str(s: &str) -> Result<Self, Self::Err> {
162 match s.to_lowercase().as_str() {
163 "strict" => Ok(Self::Strict),
164 "mild" => Ok(Self::Mild),
165 "weak" => Ok(Self::Weak),
166 "semantic" => Ok(Self::Semantic),
167 other => Err(format!("unknown detection mode: '{other}'")),
168 }
169 }
170}