Skip to main content

reformat_core/
indent.rs

1//! Indentation normalization transformer
2
3use std::fs;
4use std::path::Path;
5use walkdir::WalkDir;
6
7/// Indentation style
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum IndentStyle {
10    /// Use spaces for indentation
11    Spaces,
12    /// Use tabs for indentation
13    Tabs,
14}
15
16impl IndentStyle {
17    /// Parse from string representation
18    pub fn parse(s: &str) -> Option<Self> {
19        match s {
20            "spaces" | "space" => Some(IndentStyle::Spaces),
21            "tabs" | "tab" => Some(IndentStyle::Tabs),
22            _ => None,
23        }
24    }
25}
26
27/// Options for indentation normalization
28#[derive(Debug, Clone)]
29pub struct IndentOptions {
30    /// Target indentation style
31    pub style: IndentStyle,
32    /// Number of spaces per indent level (used when converting tabs to spaces,
33    /// or as the tab width when converting spaces to tabs)
34    pub width: usize,
35    /// File extensions to process
36    pub file_extensions: Vec<String>,
37    /// Process directories recursively
38    pub recursive: bool,
39    /// Dry run mode (don't modify files)
40    pub dry_run: bool,
41}
42
43impl Default for IndentOptions {
44    fn default() -> Self {
45        IndentOptions {
46            style: IndentStyle::Spaces,
47            width: 4,
48            file_extensions: vec![
49                ".py", ".pyx", ".pxd", ".pxi", ".c", ".h", ".cpp", ".hpp", ".rs", ".go", ".java",
50                ".js", ".ts", ".jsx", ".tsx", ".md", ".qmd", ".txt", ".toml", ".yaml", ".yml",
51                ".json", ".xml", ".html", ".css",
52            ]
53            .iter()
54            .map(|s| s.to_string())
55            .collect(),
56            recursive: true,
57            dry_run: false,
58        }
59    }
60}
61
62/// Indentation normalizer
63pub struct IndentNormalizer {
64    options: IndentOptions,
65}
66
67impl IndentNormalizer {
68    /// Creates a new normalizer with the given options
69    pub fn new(options: IndentOptions) -> Self {
70        IndentNormalizer { options }
71    }
72
73    /// Creates a normalizer with default options
74    pub fn with_defaults() -> Self {
75        IndentNormalizer {
76            options: IndentOptions::default(),
77        }
78    }
79
80    /// Checks if a file should be processed
81    fn should_process(&self, path: &Path) -> bool {
82        if !path.is_file() {
83            return false;
84        }
85
86        if path.components().any(|c| {
87            c.as_os_str()
88                .to_str()
89                .map(|s| s.starts_with('.'))
90                .unwrap_or(false)
91        }) {
92            return false;
93        }
94
95        let skip_dirs = [
96            "build",
97            "__pycache__",
98            ".git",
99            "node_modules",
100            "venv",
101            ".venv",
102            "target",
103        ];
104        if path.components().any(|c| {
105            c.as_os_str()
106                .to_str()
107                .map(|s| skip_dirs.contains(&s))
108                .unwrap_or(false)
109        }) {
110            return false;
111        }
112
113        if let Some(ext) = path.extension() {
114            let ext_str = format!(".{}", ext.to_string_lossy());
115            self.options.file_extensions.contains(&ext_str)
116        } else {
117            false
118        }
119    }
120
121    /// Convert leading whitespace on a single line.
122    /// Returns the converted line and whether it changed.
123    fn convert_line(&self, line: &str) -> (String, bool) {
124        // Find the leading whitespace
125        let trimmed = line.trim_start_matches([' ', '\t']);
126        let leading = &line[..line.len() - trimmed.len()];
127
128        if leading.is_empty() {
129            return (line.to_string(), false);
130        }
131
132        let width = self.options.width;
133
134        match self.options.style {
135            IndentStyle::Spaces => {
136                // Convert tabs to spaces
137                if !leading.contains('\t') {
138                    return (line.to_string(), false);
139                }
140                let mut spaces = 0usize;
141                for ch in leading.chars() {
142                    if ch == '\t' {
143                        // Align to next tab stop
144                        spaces = ((spaces / width) + 1) * width;
145                    } else {
146                        spaces += 1;
147                    }
148                }
149                let new_leading: String = " ".repeat(spaces);
150                (format!("{}{}", new_leading, trimmed), true)
151            }
152            IndentStyle::Tabs => {
153                // Convert spaces to tabs
154                if !leading.contains(' ') {
155                    return (line.to_string(), false);
156                }
157                // Count effective column width
158                let mut col = 0usize;
159                for ch in leading.chars() {
160                    if ch == '\t' {
161                        col = ((col / width) + 1) * width;
162                    } else {
163                        col += 1;
164                    }
165                }
166                let tabs = col / width;
167                let remaining_spaces = col % width;
168                let new_leading = format!("{}{}", "\t".repeat(tabs), " ".repeat(remaining_spaces));
169                let changed = new_leading != leading;
170                (format!("{}{}", new_leading, trimmed), changed)
171            }
172        }
173    }
174
175    /// Normalize indentation in a single file. Returns the number of lines changed.
176    pub fn normalize_file(&self, path: &Path) -> crate::Result<usize> {
177        if !self.should_process(path) {
178            return Ok(0);
179        }
180
181        let content = fs::read_to_string(path)?;
182        let ends_with_newline = content.ends_with('\n');
183        let lines: Vec<&str> = content.lines().collect();
184
185        let mut changed_count = 0;
186        let mut new_lines: Vec<String> = Vec::with_capacity(lines.len());
187
188        for line in &lines {
189            let (converted, changed) = self.convert_line(line);
190            if changed {
191                changed_count += 1;
192            }
193            new_lines.push(converted);
194        }
195
196        if changed_count > 0 {
197            if self.options.dry_run {
198                println!(
199                    "Would normalize {} line(s) of indentation in '{}'",
200                    changed_count,
201                    path.display()
202                );
203            } else {
204                let mut output = new_lines.join("\n");
205                if ends_with_newline {
206                    output.push('\n');
207                }
208                fs::write(path, output)?;
209                println!(
210                    "Normalized {} line(s) of indentation in '{}'",
211                    changed_count,
212                    path.display()
213                );
214            }
215        }
216
217        Ok(changed_count)
218    }
219
220    /// Processes a directory or file. Returns (files_changed, lines_changed).
221    pub fn process(&self, path: &Path) -> crate::Result<(usize, usize)> {
222        let mut total_files = 0;
223        let mut total_lines = 0;
224
225        if path.is_file() {
226            let lines = self.normalize_file(path)?;
227            if lines > 0 {
228                total_files = 1;
229                total_lines = lines;
230            }
231        } else if path.is_dir() {
232            if self.options.recursive {
233                for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
234                    if entry.file_type().is_file() {
235                        let lines = self.normalize_file(entry.path())?;
236                        if lines > 0 {
237                            total_files += 1;
238                            total_lines += lines;
239                        }
240                    }
241                }
242            } else {
243                for entry in fs::read_dir(path)? {
244                    let entry = entry?;
245                    let entry_path = entry.path();
246                    if entry_path.is_file() {
247                        let lines = self.normalize_file(&entry_path)?;
248                        if lines > 0 {
249                            total_files += 1;
250                            total_lines += lines;
251                        }
252                    }
253                }
254            }
255        }
256
257        Ok((total_files, total_lines))
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use std::fs;
265
266    #[test]
267    fn test_tabs_to_spaces() {
268        let dir = std::env::temp_dir().join("reformat_indent_t2s");
269        fs::create_dir_all(&dir).unwrap();
270
271        let file = dir.join("test.py");
272        fs::write(&file, "\tline1\n\t\tline2\nline3\n").unwrap();
273
274        let normalizer = IndentNormalizer::with_defaults();
275        let (files, lines) = normalizer.process(&file).unwrap();
276
277        assert_eq!(files, 1);
278        assert_eq!(lines, 2);
279
280        let content = fs::read_to_string(&file).unwrap();
281        assert_eq!(content, "    line1\n        line2\nline3\n");
282
283        fs::remove_dir_all(&dir).unwrap();
284    }
285
286    #[test]
287    fn test_spaces_to_tabs() {
288        let dir = std::env::temp_dir().join("reformat_indent_s2t");
289        fs::create_dir_all(&dir).unwrap();
290
291        let file = dir.join("test.py");
292        fs::write(&file, "    line1\n        line2\nline3\n").unwrap();
293
294        let options = IndentOptions {
295            style: IndentStyle::Tabs,
296            width: 4,
297            ..Default::default()
298        };
299        let normalizer = IndentNormalizer::new(options);
300        let (files, lines) = normalizer.process(&file).unwrap();
301
302        assert_eq!(files, 1);
303        assert_eq!(lines, 2);
304
305        let content = fs::read_to_string(&file).unwrap();
306        assert_eq!(content, "\tline1\n\t\tline2\nline3\n");
307
308        fs::remove_dir_all(&dir).unwrap();
309    }
310
311    #[test]
312    fn test_width_2_spaces() {
313        let dir = std::env::temp_dir().join("reformat_indent_w2");
314        fs::create_dir_all(&dir).unwrap();
315
316        let file = dir.join("test.py");
317        fs::write(&file, "\tline1\n\t\tline2\n").unwrap();
318
319        let options = IndentOptions {
320            style: IndentStyle::Spaces,
321            width: 2,
322            ..Default::default()
323        };
324        let normalizer = IndentNormalizer::new(options);
325        normalizer.process(&file).unwrap();
326
327        let content = fs::read_to_string(&file).unwrap();
328        assert_eq!(content, "  line1\n    line2\n");
329
330        fs::remove_dir_all(&dir).unwrap();
331    }
332
333    #[test]
334    fn test_partial_tab_stop_spaces_to_tabs() {
335        let dir = std::env::temp_dir().join("reformat_indent_partial");
336        fs::create_dir_all(&dir).unwrap();
337
338        let file = dir.join("test.py");
339        // 6 spaces with width 4: 1 tab + 2 spaces
340        fs::write(&file, "      line1\n").unwrap();
341
342        let options = IndentOptions {
343            style: IndentStyle::Tabs,
344            width: 4,
345            ..Default::default()
346        };
347        let normalizer = IndentNormalizer::new(options);
348        normalizer.process(&file).unwrap();
349
350        let content = fs::read_to_string(&file).unwrap();
351        assert_eq!(content, "\t  line1\n");
352
353        fs::remove_dir_all(&dir).unwrap();
354    }
355
356    #[test]
357    fn test_already_normalized() {
358        let dir = std::env::temp_dir().join("reformat_indent_noop");
359        fs::create_dir_all(&dir).unwrap();
360
361        let file = dir.join("test.py");
362        fs::write(&file, "    line1\n        line2\n").unwrap();
363
364        let normalizer = IndentNormalizer::with_defaults();
365        let (files, lines) = normalizer.process(&file).unwrap();
366
367        assert_eq!(files, 0);
368        assert_eq!(lines, 0);
369
370        fs::remove_dir_all(&dir).unwrap();
371    }
372
373    #[test]
374    fn test_dry_run() {
375        let dir = std::env::temp_dir().join("reformat_indent_dry");
376        fs::create_dir_all(&dir).unwrap();
377
378        let file = dir.join("test.py");
379        let original = "\tline1\n";
380        fs::write(&file, original).unwrap();
381
382        let options = IndentOptions {
383            dry_run: true,
384            ..Default::default()
385        };
386        let normalizer = IndentNormalizer::new(options);
387        let (_, lines) = normalizer.process(&file).unwrap();
388
389        assert_eq!(lines, 1);
390        let content = fs::read_to_string(&file).unwrap();
391        assert_eq!(content, original);
392
393        fs::remove_dir_all(&dir).unwrap();
394    }
395
396    #[test]
397    fn test_preserves_trailing_newline() {
398        let dir = std::env::temp_dir().join("reformat_indent_newline");
399        fs::create_dir_all(&dir).unwrap();
400
401        let file = dir.join("test.py");
402        fs::write(&file, "\tline1\n\tline2\n").unwrap();
403
404        let normalizer = IndentNormalizer::with_defaults();
405        normalizer.process(&file).unwrap();
406
407        let content = fs::read_to_string(&file).unwrap();
408        assert!(content.ends_with('\n'));
409        assert_eq!(content, "    line1\n    line2\n");
410
411        fs::remove_dir_all(&dir).unwrap();
412    }
413
414    #[test]
415    fn test_mixed_indent() {
416        let dir = std::env::temp_dir().join("reformat_indent_mixed");
417        fs::create_dir_all(&dir).unwrap();
418
419        let file = dir.join("test.py");
420        // Tab followed by spaces
421        fs::write(&file, "\t  line1\n").unwrap();
422
423        let normalizer = IndentNormalizer::with_defaults();
424        normalizer.process(&file).unwrap();
425
426        let content = fs::read_to_string(&file).unwrap();
427        // Tab (=4 col) + 2 spaces = 6 spaces
428        assert_eq!(content, "      line1\n");
429
430        fs::remove_dir_all(&dir).unwrap();
431    }
432
433    #[test]
434    fn test_parse_indent_style() {
435        assert_eq!(IndentStyle::parse("spaces"), Some(IndentStyle::Spaces));
436        assert_eq!(IndentStyle::parse("space"), Some(IndentStyle::Spaces));
437        assert_eq!(IndentStyle::parse("tabs"), Some(IndentStyle::Tabs));
438        assert_eq!(IndentStyle::parse("tab"), Some(IndentStyle::Tabs));
439        assert_eq!(IndentStyle::parse("bogus"), None);
440    }
441
442    #[test]
443    fn test_recursive_processing() {
444        let dir = std::env::temp_dir().join("reformat_indent_recursive");
445        fs::create_dir_all(&dir).unwrap();
446
447        let sub = dir.join("sub");
448        fs::create_dir_all(&sub).unwrap();
449
450        let f1 = dir.join("a.py");
451        let f2 = sub.join("b.py");
452        fs::write(&f1, "\tline1\n").unwrap();
453        fs::write(&f2, "\tline2\n").unwrap();
454
455        let normalizer = IndentNormalizer::with_defaults();
456        let (files, lines) = normalizer.process(&dir).unwrap();
457
458        assert_eq!(files, 2);
459        assert_eq!(lines, 2);
460
461        fs::remove_dir_all(&dir).unwrap();
462    }
463}