Skip to main content

dupes_core/
config.rs

1use std::path::{Path, PathBuf};
2
3use serde::Deserialize;
4
5/// The subset of configuration relevant to language-specific parsing.
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct AnalysisConfig {
8    /// Minimum number of AST nodes for a code unit to be analyzed.
9    pub min_nodes: usize,
10    /// Minimum number of source lines for a code unit to be analyzed.
11    pub min_lines: usize,
12}
13
14/// Configuration for cargo-dupes analysis.
15#[derive(Debug, Clone)]
16pub struct Config {
17    /// Minimum number of AST nodes for a code unit to be analyzed.
18    pub min_nodes: usize,
19    /// Similarity threshold for near-duplicates (0.0 to 1.0).
20    pub similarity_threshold: f64,
21    /// Path patterns to exclude from scanning.
22    pub exclude: Vec<String>,
23    /// Exit code threshold: fail if exact duplicate count exceeds this.
24    pub max_exact_duplicates: Option<usize>,
25    /// Exit code threshold: fail if near duplicate count exceeds this.
26    pub max_near_duplicates: Option<usize>,
27    /// Exit code threshold: fail if exact duplicate percentage exceeds this.
28    pub max_exact_percent: Option<f64>,
29    /// Exit code threshold: fail if near duplicate percentage exceeds this.
30    pub max_near_percent: Option<f64>,
31    /// Minimum number of source lines for a code unit to be analyzed.
32    pub min_lines: usize,
33    /// Exclude test code (#[test] functions and #[cfg(test)] modules).
34    pub exclude_tests: bool,
35    /// Enable sub-function duplicate detection.
36    pub sub_function: bool,
37    /// Minimum number of AST nodes for a sub-function unit to be analyzed.
38    pub min_sub_nodes: usize,
39    /// Root path to analyze.
40    pub root: PathBuf,
41}
42
43impl Default for Config {
44    fn default() -> Self {
45        Self {
46            min_nodes: 10,
47            similarity_threshold: 0.9,
48            exclude: Vec::new(),
49            max_exact_duplicates: None,
50            max_near_duplicates: None,
51            max_exact_percent: None,
52            max_near_percent: None,
53            min_lines: 0,
54            exclude_tests: false,
55            sub_function: false,
56            min_sub_nodes: 5,
57            root: PathBuf::from("."),
58        }
59    }
60}
61
62/// Config as stored in dupes.toml or Cargo.toml metadata.
63#[derive(Debug, Deserialize, Default)]
64#[serde(default)]
65struct FileConfig {
66    min_nodes: Option<usize>,
67    similarity_threshold: Option<f64>,
68    exclude: Option<Vec<String>>,
69    max_exact_duplicates: Option<usize>,
70    max_near_duplicates: Option<usize>,
71    max_exact_percent: Option<f64>,
72    max_near_percent: Option<f64>,
73    min_lines: Option<usize>,
74    exclude_tests: Option<bool>,
75    sub_function: Option<bool>,
76    min_sub_nodes: Option<usize>,
77}
78
79/// Cargo.toml metadata section.
80#[derive(Debug, Deserialize)]
81struct CargoMetadata {
82    #[serde(default)]
83    package: Option<CargoPackage>,
84}
85
86#[derive(Debug, Deserialize)]
87struct CargoPackage {
88    #[serde(default)]
89    metadata: Option<CargoPackageMetadata>,
90}
91
92#[derive(Debug, Deserialize)]
93struct CargoPackageMetadata {
94    #[serde(default)]
95    dupes: Option<FileConfig>,
96}
97
98impl Config {
99    /// Extract the parsing-relevant subset of the configuration.
100    #[must_use]
101    pub const fn analysis_config(&self) -> AnalysisConfig {
102        AnalysisConfig {
103            min_nodes: self.min_nodes,
104            min_lines: self.min_lines,
105        }
106    }
107
108    /// Load config with the following precedence:
109    /// 1. CLI overrides (applied by the caller after this method)
110    /// 2. dupes.toml in the project root
111    /// 3. `[package.metadata.dupes]` in Cargo.toml
112    /// 4. Defaults
113    #[must_use]
114    pub fn load(root: &Path) -> Self {
115        let mut config = Self {
116            root: root.to_path_buf(),
117            ..Default::default()
118        };
119
120        // Try Cargo.toml metadata first (lowest priority file config)
121        let cargo_toml = root.join("Cargo.toml");
122        if cargo_toml.exists()
123            && let Ok(content) = std::fs::read_to_string(&cargo_toml)
124            && let Ok(cargo) = toml::from_str::<CargoMetadata>(&content)
125            && let Some(pkg) = cargo.package
126            && let Some(meta) = pkg.metadata
127            && let Some(dupes) = meta.dupes
128        {
129            config.apply_file_config(&dupes);
130        }
131
132        // Try dupes.toml (higher priority)
133        let dupes_toml = root.join("dupes.toml");
134        if dupes_toml.exists()
135            && let Ok(content) = std::fs::read_to_string(&dupes_toml)
136            && let Ok(file_config) = toml::from_str::<FileConfig>(&content)
137        {
138            config.apply_file_config(&file_config);
139        }
140
141        config
142    }
143
144    fn apply_file_config(&mut self, fc: &FileConfig) {
145        if let Some(v) = fc.min_nodes {
146            self.min_nodes = v;
147        }
148        if let Some(v) = fc.similarity_threshold {
149            self.similarity_threshold = v;
150        }
151        if let Some(ref v) = fc.exclude {
152            self.exclude.clone_from(v);
153        }
154        if let Some(v) = fc.max_exact_duplicates {
155            self.max_exact_duplicates = Some(v);
156        }
157        if let Some(v) = fc.max_near_duplicates {
158            self.max_near_duplicates = Some(v);
159        }
160        if let Some(v) = fc.max_exact_percent {
161            self.max_exact_percent = Some(v);
162        }
163        if let Some(v) = fc.max_near_percent {
164            self.max_near_percent = Some(v);
165        }
166        if let Some(v) = fc.min_lines {
167            self.min_lines = v;
168        }
169        if let Some(v) = fc.exclude_tests {
170            self.exclude_tests = v;
171        }
172        if let Some(v) = fc.sub_function {
173            self.sub_function = v;
174        }
175        if let Some(v) = fc.min_sub_nodes {
176            self.min_sub_nodes = v;
177        }
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use std::fs;
185    use tempfile::TempDir;
186
187    #[test]
188    fn default_config() {
189        let config = Config::default();
190        assert_eq!(config.min_nodes, 10);
191        assert!((config.similarity_threshold - 0.9).abs() < f64::EPSILON);
192        assert!(config.exclude.is_empty());
193    }
194
195    #[test]
196    fn load_from_dupes_toml() {
197        let tmp = TempDir::new().unwrap();
198        fs::write(
199            tmp.path().join("dupes.toml"),
200            r#"
201            min_nodes = 20
202            similarity_threshold = 0.9
203            exclude = ["tests"]
204            "#,
205        )
206        .unwrap();
207        let config = Config::load(tmp.path());
208        assert_eq!(config.min_nodes, 20);
209        assert!((config.similarity_threshold - 0.9).abs() < f64::EPSILON);
210        assert_eq!(config.exclude, vec!["tests".to_string()]);
211    }
212
213    #[test]
214    fn load_from_cargo_toml_metadata() {
215        let tmp = TempDir::new().unwrap();
216        fs::write(
217            tmp.path().join("Cargo.toml"),
218            r#"
219            [package]
220            name = "test"
221            version = "0.1.0"
222            edition = "2021"
223
224            [package.metadata.dupes]
225            min_nodes = 15
226            similarity_threshold = 0.75
227            "#,
228        )
229        .unwrap();
230        let config = Config::load(tmp.path());
231        assert_eq!(config.min_nodes, 15);
232        assert!((config.similarity_threshold - 0.75).abs() < f64::EPSILON);
233    }
234
235    #[test]
236    fn dupes_toml_overrides_cargo_toml() {
237        let tmp = TempDir::new().unwrap();
238        fs::write(
239            tmp.path().join("Cargo.toml"),
240            r#"
241            [package]
242            name = "test"
243            version = "0.1.0"
244            edition = "2021"
245
246            [package.metadata.dupes]
247            min_nodes = 15
248            "#,
249        )
250        .unwrap();
251        fs::write(
252            tmp.path().join("dupes.toml"),
253            r#"
254            min_nodes = 25
255            "#,
256        )
257        .unwrap();
258        let config = Config::load(tmp.path());
259        assert_eq!(config.min_nodes, 25);
260    }
261
262    #[test]
263    fn load_no_config_files() {
264        let tmp = TempDir::new().unwrap();
265        let config = Config::load(tmp.path());
266        assert_eq!(config.min_nodes, 10); // default
267    }
268
269    #[test]
270    fn config_with_thresholds() {
271        let tmp = TempDir::new().unwrap();
272        fs::write(
273            tmp.path().join("dupes.toml"),
274            r#"
275            max_exact_duplicates = 0
276            max_near_duplicates = 5
277            "#,
278        )
279        .unwrap();
280        let config = Config::load(tmp.path());
281        assert_eq!(config.max_exact_duplicates, Some(0));
282        assert_eq!(config.max_near_duplicates, Some(5));
283    }
284
285    #[test]
286    fn config_with_exclude_tests() {
287        let tmp = TempDir::new().unwrap();
288        fs::write(
289            tmp.path().join("dupes.toml"),
290            r#"
291            exclude_tests = true
292            "#,
293        )
294        .unwrap();
295        let config = Config::load(tmp.path());
296        assert!(config.exclude_tests);
297    }
298
299    #[test]
300    fn config_with_min_lines() {
301        let tmp = TempDir::new().unwrap();
302        fs::write(
303            tmp.path().join("dupes.toml"),
304            r#"
305            min_lines = 5
306            "#,
307        )
308        .unwrap();
309        let config = Config::load(tmp.path());
310        assert_eq!(config.min_lines, 5);
311    }
312
313    #[test]
314    fn config_with_percentage_thresholds() {
315        let tmp = TempDir::new().unwrap();
316        fs::write(
317            tmp.path().join("dupes.toml"),
318            r#"
319            max_exact_percent = 5.0
320            max_near_percent = 10.5
321            "#,
322        )
323        .unwrap();
324        let config = Config::load(tmp.path());
325        assert_eq!(config.max_exact_percent, Some(5.0));
326        assert_eq!(config.max_near_percent, Some(10.5));
327    }
328}