Skip to main content

aft/
indent.rs

1//! Shared indentation detection utility (D042).
2//!
3//! Analyzes source file content to determine the indentation style (tabs vs
4//! spaces, width) used. Falls back to language-specific defaults when the
5//! file has insufficient indented lines or mixed signals.
6
7use crate::parser::LangId;
8
9/// Detected indentation style.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum IndentStyle {
12    Tabs,
13    Spaces(u8),
14}
15
16impl IndentStyle {
17    /// Returns the whitespace string for one level of this indent.
18    pub fn as_str(&self) -> &'static str {
19        match self {
20            IndentStyle::Tabs => "\t",
21            IndentStyle::Spaces(2) => "  ",
22            IndentStyle::Spaces(4) => "    ",
23            IndentStyle::Spaces(8) => "        ",
24            IndentStyle::Spaces(n) => {
25                // For uncommon widths, leak a static string. In practice
26                // this only fires for exotic indent widths (1, 3, 5, 6, 7).
27                let s: String = " ".repeat(*n as usize);
28                Box::leak(s.into_boxed_str())
29            }
30        }
31    }
32
33    /// Language-specific default when detection has low confidence.
34    pub fn default_for(lang: LangId) -> Self {
35        match lang {
36            LangId::Python => IndentStyle::Spaces(4),
37            LangId::TypeScript
38            | LangId::Tsx
39            | LangId::JavaScript
40            | LangId::Vue
41            | LangId::Json
42            | LangId::Scala
43            | LangId::Ruby
44            | LangId::Lua
45            | LangId::Scss
46            | LangId::Yaml => IndentStyle::Spaces(2),
47            LangId::Rust => IndentStyle::Spaces(4),
48            LangId::Go => IndentStyle::Tabs,
49            LangId::C | LangId::Cpp | LangId::Zig | LangId::CSharp | LangId::Bash => {
50                IndentStyle::Spaces(4)
51            }
52            LangId::Solidity
53            | LangId::Java
54            | LangId::Kotlin
55            | LangId::Swift
56            | LangId::Php
57            | LangId::Perl => IndentStyle::Spaces(4),
58            LangId::Html => IndentStyle::Spaces(2),
59            LangId::Markdown => IndentStyle::Spaces(4),
60            LangId::Pascal => IndentStyle::Spaces(2),
61        }
62    }
63}
64
65/// Detect the indentation style of a source file.
66///
67/// Examines indented lines (those starting with whitespace) and determines
68/// whether tabs or spaces dominate. For spaces, determines the most common
69/// indent width by looking at the smallest indent unit.
70///
71/// Returns detected style if >50% of indented lines agree, otherwise falls
72/// back to the language default.
73pub fn detect_indent(source: &str, lang: LangId) -> IndentStyle {
74    let mut tab_count: u32 = 0;
75    let mut space_count: u32 = 0;
76    let mut indent_widths: [u32; 9] = [0; 9]; // index 1..8
77
78    for line in source.lines() {
79        if line.is_empty() {
80            continue;
81        }
82        let first = line.as_bytes()[0];
83        if first == b'\t' {
84            tab_count += 1;
85        } else if first == b' ' {
86            space_count += 1;
87            // Count leading spaces
88            let leading = line.len() - line.trim_start_matches(' ').len();
89            if leading > 0 && leading <= 8 {
90                indent_widths[leading] += 1;
91            }
92        }
93    }
94
95    let total = tab_count + space_count;
96    if total == 0 {
97        return IndentStyle::default_for(lang);
98    }
99
100    // Tabs win if >50% of indented lines use tabs
101    if tab_count > total / 2 {
102        return IndentStyle::Tabs;
103    }
104
105    // Spaces win if >50% of indented lines use spaces
106    if space_count > total / 2 {
107        // Determine the most likely indent unit width.
108        // The unit is the GCD of observed indent widths, or equivalently,
109        // the smallest width that has significant usage.
110        let width = determine_space_width(&indent_widths);
111        return IndentStyle::Spaces(width);
112    }
113
114    // Mixed / no clear winner — fall back
115    IndentStyle::default_for(lang)
116}
117
118/// Determine the most likely space indent width from observed leading-space counts.
119///
120/// Strategy: find the smallest observed indent width that forms a consistent
121/// pattern (all other widths are multiples of it). Prefer the smallest actual
122/// indent seen, not just the GCD.
123fn determine_space_width(widths: &[u32; 9]) -> u8 {
124    // Find the smallest observed indent width
125    let smallest = (1..=8usize).find(|&i| widths[i] > 0);
126    let smallest = match smallest {
127        Some(s) => s,
128        None => return 4,
129    };
130
131    // Check if all observed widths are multiples of this smallest
132    let all_multiples = (1..=8).all(|i| widths[i] == 0 || i % smallest == 0);
133
134    if all_multiples && smallest >= 2 {
135        return smallest as u8;
136    }
137
138    // If smallest is 1 or doesn't divide evenly, try common widths
139    for &candidate in &[4u8, 2, 8] {
140        let c = candidate as usize;
141        let mut matching: u32 = 0;
142        let mut non_matching: u32 = 0;
143        for i in 1..=8 {
144            if widths[i] > 0 {
145                if i % c == 0 {
146                    matching += widths[i];
147                } else {
148                    non_matching += widths[i];
149                }
150            }
151        }
152        if matching > 0 && non_matching == 0 {
153            return candidate;
154        }
155    }
156
157    smallest as u8
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    #[test]
165    fn detect_indent_tabs() {
166        let source = "fn main() {\n\tlet x = 1;\n\tlet y = 2;\n}\n";
167        assert_eq!(detect_indent(source, LangId::Rust), IndentStyle::Tabs);
168    }
169
170    #[test]
171    fn detect_indent_two_spaces() {
172        let source = "class Foo {\n  bar() {}\n  baz() {}\n}\n";
173        assert_eq!(
174            detect_indent(source, LangId::TypeScript),
175            IndentStyle::Spaces(2)
176        );
177    }
178
179    #[test]
180    fn detect_indent_four_spaces() {
181        let source =
182            "class Foo:\n    def bar(self):\n        pass\n    def baz(self):\n        pass\n";
183        assert_eq!(
184            detect_indent(source, LangId::Python),
185            IndentStyle::Spaces(4)
186        );
187    }
188
189    #[test]
190    fn detect_indent_empty_source_uses_default() {
191        assert_eq!(detect_indent("", LangId::Python), IndentStyle::Spaces(4));
192        assert_eq!(
193            detect_indent("", LangId::TypeScript),
194            IndentStyle::Spaces(2)
195        );
196        assert_eq!(detect_indent("", LangId::Go), IndentStyle::Tabs);
197    }
198
199    #[test]
200    fn detect_indent_no_indented_lines_uses_default() {
201        let source = "x = 1\ny = 2\n";
202        assert_eq!(
203            detect_indent(source, LangId::Python),
204            IndentStyle::Spaces(4)
205        );
206    }
207
208    #[test]
209    fn indent_style_as_str() {
210        assert_eq!(IndentStyle::Tabs.as_str(), "\t");
211        assert_eq!(IndentStyle::Spaces(2).as_str(), "  ");
212        assert_eq!(IndentStyle::Spaces(4).as_str(), "    ");
213    }
214
215    #[test]
216    fn detect_indent_four_spaces_with_nested() {
217        // Lines indented at 4 and 8 should detect 4-space indent
218        let source = "impl Foo {\n    fn bar() {\n        let x = 1;\n    }\n}\n";
219        assert_eq!(detect_indent(source, LangId::Rust), IndentStyle::Spaces(4));
220    }
221}