use crate::parser::LangId;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IndentStyle {
Tabs,
Spaces(u8),
}
impl IndentStyle {
pub fn as_str(&self) -> &'static str {
match self {
IndentStyle::Tabs => "\t",
IndentStyle::Spaces(2) => " ",
IndentStyle::Spaces(4) => " ",
IndentStyle::Spaces(8) => " ",
IndentStyle::Spaces(n) => {
let s: String = " ".repeat(*n as usize);
Box::leak(s.into_boxed_str())
}
}
}
pub fn default_for(lang: LangId) -> Self {
match lang {
LangId::Python => IndentStyle::Spaces(4),
LangId::TypeScript | LangId::Tsx | LangId::JavaScript => IndentStyle::Spaces(2),
LangId::Rust => IndentStyle::Spaces(4),
LangId::Go => IndentStyle::Tabs,
LangId::C | LangId::Cpp | LangId::Zig | LangId::CSharp | LangId::Bash => {
IndentStyle::Spaces(4)
}
LangId::Html => IndentStyle::Spaces(2),
LangId::Markdown => IndentStyle::Spaces(4),
}
}
}
pub fn detect_indent(source: &str, lang: LangId) -> IndentStyle {
let mut tab_count: u32 = 0;
let mut space_count: u32 = 0;
let mut indent_widths: [u32; 9] = [0; 9];
for line in source.lines() {
if line.is_empty() {
continue;
}
let first = line.as_bytes()[0];
if first == b'\t' {
tab_count += 1;
} else if first == b' ' {
space_count += 1;
let leading = line.len() - line.trim_start_matches(' ').len();
if leading > 0 && leading <= 8 {
indent_widths[leading] += 1;
}
}
}
let total = tab_count + space_count;
if total == 0 {
return IndentStyle::default_for(lang);
}
if tab_count > total / 2 {
return IndentStyle::Tabs;
}
if space_count > total / 2 {
let width = determine_space_width(&indent_widths);
return IndentStyle::Spaces(width);
}
IndentStyle::default_for(lang)
}
fn determine_space_width(widths: &[u32; 9]) -> u8 {
let smallest = (1..=8usize).find(|&i| widths[i] > 0);
let smallest = match smallest {
Some(s) => s,
None => return 4,
};
let all_multiples = (1..=8).all(|i| widths[i] == 0 || i % smallest == 0);
if all_multiples && smallest >= 2 {
return smallest as u8;
}
for &candidate in &[4u8, 2, 8] {
let c = candidate as usize;
let mut matching: u32 = 0;
let mut non_matching: u32 = 0;
for i in 1..=8 {
if widths[i] > 0 {
if i % c == 0 {
matching += widths[i];
} else {
non_matching += widths[i];
}
}
}
if matching > 0 && non_matching == 0 {
return candidate;
}
}
smallest as u8
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_indent_tabs() {
let source = "fn main() {\n\tlet x = 1;\n\tlet y = 2;\n}\n";
assert_eq!(detect_indent(source, LangId::Rust), IndentStyle::Tabs);
}
#[test]
fn detect_indent_two_spaces() {
let source = "class Foo {\n bar() {}\n baz() {}\n}\n";
assert_eq!(
detect_indent(source, LangId::TypeScript),
IndentStyle::Spaces(2)
);
}
#[test]
fn detect_indent_four_spaces() {
let source =
"class Foo:\n def bar(self):\n pass\n def baz(self):\n pass\n";
assert_eq!(
detect_indent(source, LangId::Python),
IndentStyle::Spaces(4)
);
}
#[test]
fn detect_indent_empty_source_uses_default() {
assert_eq!(detect_indent("", LangId::Python), IndentStyle::Spaces(4));
assert_eq!(
detect_indent("", LangId::TypeScript),
IndentStyle::Spaces(2)
);
assert_eq!(detect_indent("", LangId::Go), IndentStyle::Tabs);
}
#[test]
fn detect_indent_no_indented_lines_uses_default() {
let source = "x = 1\ny = 2\n";
assert_eq!(
detect_indent(source, LangId::Python),
IndentStyle::Spaces(4)
);
}
#[test]
fn indent_style_as_str() {
assert_eq!(IndentStyle::Tabs.as_str(), "\t");
assert_eq!(IndentStyle::Spaces(2).as_str(), " ");
assert_eq!(IndentStyle::Spaces(4).as_str(), " ");
}
#[test]
fn detect_indent_four_spaces_with_nested() {
let source = "impl Foo {\n fn bar() {\n let x = 1;\n }\n}\n";
assert_eq!(detect_indent(source, LangId::Rust), IndentStyle::Spaces(4));
}
}