use crate::model::buffer::Buffer;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Family {
CurlyBrace,
Python,
RubyLike,
LuaLike,
BashLike,
PascalLike,
}
#[derive(Debug, Clone, Default)]
pub struct IndentRulesDef {
pub increase: Option<&'static str>,
pub decrease: Option<&'static str>,
pub indent_next_line: Option<&'static str>,
pub dedent_next_line: Option<&'static str>,
pub self_close: Option<&'static str>,
pub indentation_significant: bool,
}
pub struct IndentRules {
increase: Option<Regex>,
decrease: Option<Regex>,
indent_next_line: Option<Regex>,
dedent_next_line: Option<Regex>,
self_close: Option<Regex>,
indentation_significant: bool,
}
impl IndentRules {
fn compile(def: &IndentRulesDef) -> Self {
Self::compile_parts(
def.increase,
def.decrease,
def.indent_next_line,
def.dedent_next_line,
def.self_close,
def.indentation_significant,
)
}
fn compile_parts(
increase: Option<&str>,
decrease: Option<&str>,
indent_next_line: Option<&str>,
dedent_next_line: Option<&str>,
self_close: Option<&str>,
indentation_significant: bool,
) -> Self {
let c = |p: Option<&str>| p.and_then(|s| Regex::new(s).ok());
Self {
indentation_significant,
increase: c(increase),
decrease: c(decrease),
indent_next_line: c(indent_next_line),
dedent_next_line: c(dedent_next_line),
self_close: c(self_close),
}
}
pub fn calculate_indent<F: Fn(usize) -> bool>(
&self,
buffer: &Buffer,
position: usize,
tab_size: usize,
is_code: F,
) -> usize {
let unit = tab_size.max(1);
let cur = line_bounds(buffer, position);
let cur_has_content = first_nonws(buffer, cur.start, position).is_some();
if self.indentation_significant
&& !cur_has_content
&& first_nonws(buffer, position, cur.end).is_none()
{
return visual_indent(buffer, cur.start, position, tab_size);
}
let reference = if cur_has_content {
Some(LineSpan {
start: cur.start,
end: position,
})
} else {
prev_nonblank_line(buffer, cur.start)
};
let Some(reference) = reference else {
return 0;
};
let base = visual_indent(buffer, reference.start, reference.end, tab_size);
let ref_code = code_view(buffer, reference.start, reference.end, &is_code);
let mut indent = base;
let opened = self.increases(&ref_code) || matches(&self.indent_next_line, &ref_code);
if opened {
indent += unit;
} else if matches(&self.dedent_next_line, &ref_code) {
indent = indent.saturating_sub(unit);
}
let tail = code_view(buffer, position, cur.end, &is_code);
let opener_on_current_line = opened && cur_has_content;
if matches(&self.decrease, &tail) && !opener_on_current_line {
indent = indent.saturating_sub(unit);
}
indent
}
pub fn calculate_dedent_for_delimiter<F: Fn(usize) -> bool>(
&self,
buffer: &Buffer,
position: usize,
ch: char,
tab_size: usize,
is_code: F,
) -> Option<usize> {
let probe = format!("{ch}");
if !matches(&self.decrease, &probe) {
return None;
}
let unit = tab_size.max(1);
let cur = line_bounds(buffer, position);
let reference = prev_nonblank_line(buffer, cur.start)?;
let base = visual_indent(buffer, reference.start, reference.end, tab_size);
let ref_code = code_view(buffer, reference.start, reference.end, &is_code);
let mut indent = base;
if self.increases(&ref_code) {
indent += unit;
}
Some(indent.saturating_sub(unit))
}
fn increases(&self, code: &str) -> bool {
matches(&self.increase, code) && !matches(&self.self_close, code)
}
}
fn matches(re: &Option<Regex>, text: &str) -> bool {
re.as_ref().is_some_and(|r| r.is_match(text))
}
pub fn rules_for_id(id: &str) -> Option<Arc<IndentRules>> {
if let Some(rules) = USER_RULES.read().unwrap().get(id) {
return Some(rules.clone());
}
let family = family_for_id(id)?;
FAMILY_RULES.get(&family).cloned()
}
pub fn rules_for_syntax_name(name: &str) -> Option<Arc<IndentRules>> {
let lower = name.to_ascii_lowercase();
let id = match lower.as_str() {
"c++" => "cpp",
"c#" => "csharp",
n if n.contains("typescript") => "typescript",
n if n.contains("javascript") => "javascript",
n if n.contains("bash") || n.contains("shell") => "bash",
other => other,
};
rules_for_id(id)
}
fn family_for_id(id: &str) -> Option<Family> {
let f = match id {
"rust" | "c" | "cpp" | "c++" | "csharp" | "c_sharp" | "java" | "go" | "javascript"
| "typescript" | "typescriptreact" | "javascriptreact" | "php" | "swift" | "kotlin"
| "dart" | "scala" | "json" | "jsonc" | "css" | "scss" | "less" => Family::CurlyBrace,
"python" => Family::Python,
"ruby" => Family::RubyLike,
"lua" => Family::LuaLike,
"bash" | "sh" | "shell" | "shellscript" => Family::BashLike,
"pascal" => Family::PascalLike,
_ => return None,
};
Some(f)
}
fn def_for_family(family: Family) -> &'static IndentRulesDef {
match family {
Family::CurlyBrace => &CURLY_BRACE,
Family::Python => &PYTHON,
Family::RubyLike => &RUBY_LIKE,
Family::LuaLike => &LUA_LIKE,
Family::BashLike => &BASH_LIKE,
Family::PascalLike => &PASCAL_LIKE,
}
}
static FAMILY_RULES: Lazy<HashMap<Family, Arc<IndentRules>>> = Lazy::new(|| {
let mut m = HashMap::new();
for family in [
Family::CurlyBrace,
Family::Python,
Family::RubyLike,
Family::LuaLike,
Family::BashLike,
Family::PascalLike,
] {
m.insert(
family,
Arc::new(IndentRules::compile(def_for_family(family))),
);
}
m
});
static USER_RULES: Lazy<RwLock<HashMap<String, Arc<IndentRules>>>> =
Lazy::new(|| RwLock::new(HashMap::new()));
pub fn clear_user_rules() {
USER_RULES.write().unwrap().clear();
}
pub fn set_user_rule(
id: &str,
increase: Option<&str>,
decrease: Option<&str>,
indent_next_line: Option<&str>,
dedent_next_line: Option<&str>,
self_close: Option<&str>,
) {
let base = family_for_id(id).map(def_for_family);
let rules = IndentRules::compile_parts(
increase.or(base.and_then(|d| d.increase)),
decrease.or(base.and_then(|d| d.decrease)),
indent_next_line.or(base.and_then(|d| d.indent_next_line)),
dedent_next_line.or(base.and_then(|d| d.dedent_next_line)),
self_close.or(base.and_then(|d| d.self_close)),
base.map(|d| d.indentation_significant).unwrap_or(false),
);
USER_RULES
.write()
.unwrap()
.insert(id.to_string(), Arc::new(rules));
}
const CURLY_BRACE: IndentRulesDef = IndentRulesDef {
increase: Some(r"[\{\[\(]\s*$"),
decrease: Some(r"^\s*[\}\]\)]"),
indent_next_line: Some(r"^\s*((if|for|while)\b.*\)|else)\s*$"),
dedent_next_line: None,
self_close: None,
indentation_significant: false,
};
const PYTHON: IndentRulesDef = IndentRulesDef {
increase: Some(r":\s*$"),
decrease: Some(r"^\s*(elif|else|except|finally|case)\b"),
indent_next_line: None,
dedent_next_line: Some(r"^\s*(return|pass|raise|break|continue)\b"),
self_close: None,
indentation_significant: true,
};
const RUBY_LIKE: IndentRulesDef = IndentRulesDef {
increase: Some(
r"(^\s*(if|unless|while|until|for|begin|def|class|module|case|else|elsif|when|in|rescue|ensure)\b)|(\bdo(\s*\|[^|]*\|)?\s*$)",
),
decrease: Some(r"^\s*(end|else|elsif|when|in|rescue|ensure)\b"),
indent_next_line: None,
dedent_next_line: None,
self_close: Some(r"\bend\b"),
indentation_significant: false,
};
const LUA_LIKE: IndentRulesDef = IndentRulesDef {
increase: Some(
r"(^\s*((local\s+)?function|if|elseif|else|for|while|repeat)\b)|(\b(do|then)\s*$)",
),
decrease: Some(r"^\s*(end|else|elseif|until)\b"),
indent_next_line: None,
dedent_next_line: None,
self_close: Some(r"\bend\b"),
indentation_significant: false,
};
const BASH_LIKE: IndentRulesDef = IndentRulesDef {
increase: Some(r"(\b(then|do)\s*$)|(^\s*case\b.*\bin\s*$)|(\{\s*$)"),
decrease: Some(r"^\s*(fi|done|esac|else|elif|\})"),
indent_next_line: None,
dedent_next_line: None,
self_close: None,
indentation_significant: false,
};
const PASCAL_LIKE: IndentRulesDef = IndentRulesDef {
increase: Some(r"(^\s*(begin|case|record|try|repeat|asm)\b)|(\b(begin|of)\s*$)"),
decrease: Some(r"^\s*(end|until|except|finally)\b"),
indent_next_line: None,
dedent_next_line: None,
self_close: Some(r"\bend\b"),
indentation_significant: false,
};
#[derive(Clone, Copy)]
struct LineSpan {
start: usize,
end: usize,
}
fn byte_at(buffer: &Buffer, pos: usize) -> Option<u8> {
if pos >= buffer.len() {
return None;
}
buffer.slice_bytes(pos..pos + 1).first().copied()
}
fn line_bounds(buffer: &Buffer, position: usize) -> LineSpan {
let mut start = position;
while start > 0 && byte_at(buffer, start - 1) != Some(b'\n') {
start -= 1;
}
let mut end = position;
while end < buffer.len() && byte_at(buffer, end) != Some(b'\n') {
end += 1;
}
LineSpan { start, end }
}
fn first_nonws(buffer: &Buffer, start: usize, end: usize) -> Option<usize> {
let mut p = start;
while p < end {
match byte_at(buffer, p) {
Some(b' ') | Some(b'\t') | Some(b'\r') => p += 1,
Some(_) => return Some(p),
None => return None,
}
}
None
}
fn prev_nonblank_line(buffer: &Buffer, line_start: usize) -> Option<LineSpan> {
if line_start == 0 {
return None;
}
let mut pos = line_start - 1; loop {
let span = line_bounds(buffer, pos);
if first_nonws(buffer, span.start, span.end).is_some() {
return Some(span);
}
if span.start == 0 {
return None;
}
pos = span.start - 1;
}
}
fn visual_indent(buffer: &Buffer, start: usize, end: usize, tab_size: usize) -> usize {
let mut indent = 0;
let mut p = start;
while p < end {
match byte_at(buffer, p) {
Some(b' ') => indent += 1,
Some(b'\t') => indent += tab_size,
_ => break,
}
p += 1;
}
indent
}
fn code_view<F: Fn(usize) -> bool>(
buffer: &Buffer,
start: usize,
end: usize,
is_code: &F,
) -> String {
let bytes = buffer.slice_bytes(start..end);
let mut out = String::with_capacity(bytes.len());
for (i, &b) in bytes.iter().enumerate() {
if b == b'\r' || b == b'\n' {
continue;
}
if is_code(start + i) {
out.push(b as char);
} else {
out.push(' ');
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::filesystem::NoopFileSystem;
use std::sync::Arc;
fn buf(content: &str) -> Buffer {
let fs = Arc::new(NoopFileSystem);
let mut b = Buffer::empty(fs);
b.insert(0, content);
b
}
fn indent(id: &str, content: &str, tab: usize) -> usize {
rules_for_id(id)
.unwrap()
.calculate_indent(&buf(content), content.len(), tab, |_| true)
}
fn indent_masked(id: &str, content: &str, tab: usize, masked: &[(usize, usize)]) -> usize {
let b = buf(content);
let is_code = |byte: usize| !masked.iter().any(|&(s, e)| byte >= s && byte < e);
rules_for_id(id)
.unwrap()
.calculate_indent(&b, content.len(), tab, is_code)
}
#[test]
fn curly_indents_after_open_brace() {
assert_eq!(indent("rust", "fn main() {\n", 4), 4);
assert_eq!(indent("typescript", "function f() {\n", 4), 4);
}
#[test]
fn curly_no_indent_after_balanced_line() {
assert_eq!(indent("rust", "let x = 1;\n", 4), 0);
assert_eq!(indent("rust", "fn x() { return 1; }\n", 4), 0);
}
#[test]
fn curly_dedents_before_close_brace() {
let content = "fn main() {\n }";
let pos = content.len() - 1; let b = buf(content);
let got = rules_for_id("rust")
.unwrap()
.calculate_indent(&b, pos, 4, |_| true);
assert_eq!(got, 0);
}
#[test]
fn curly_braceless_if_indents_next_line_only() {
assert_eq!(indent("c", "if (x)\n", 4), 4);
}
#[test]
fn curly_dedent_for_typed_brace() {
let content = "fn main() {\n body\n";
let dedent = rules_for_id("rust")
.unwrap()
.calculate_dedent_for_delimiter(&buf(content), content.len(), '}', 4, |_| true);
assert_eq!(dedent, Some(0));
}
#[test]
fn no_indent_for_brace_in_string() {
let content = "let x = \"{\";\n";
let open = content.find('{').unwrap();
let masked = [(content.find('"').unwrap(), open + 2)];
assert_eq!(indent_masked("rust", content, 4, &masked), 0);
assert_eq!(indent("rust", content, 4), 0); }
#[test]
fn no_indent_for_trailing_brace_in_comment() {
let content = "foo() // {\n";
let cstart = content.find("//").unwrap();
let masked = [(cstart, content.len())];
assert_eq!(indent_masked("rust", content, 4, &masked), 0);
}
#[test]
fn brace_in_comment_does_not_defeat_real_open() {
let content = "if (x) { // start {\n";
let cstart = content.find("//").unwrap();
let masked = [(cstart, content.len())];
assert_eq!(indent_masked("rust", content, 4, &masked), 4);
}
#[test]
fn python_indents_after_colon() {
assert_eq!(indent("python", "def foo():", 4), 4);
assert_eq!(indent("python", "if x:", 4), 4);
}
#[test]
fn python_dedents_after_return() {
let content = "def foo():\n return 1";
assert_eq!(indent("python", content, 4), 0);
}
#[test]
fn python_keeps_indent_inside_body() {
let content = "def foo():\n x = 1";
assert_eq!(indent("python", content, 4), 4);
}
#[test]
fn python_blank_line_keeps_manual_dedent() {
let content = "if x:\n foo()\n"; assert_eq!(indent("python", content, 4), 0);
}
#[test]
fn python_blank_line_maintains_current_column() {
let content = "if x:\n foo()\n "; assert_eq!(indent("python", content, 4), 4);
}
#[test]
fn curly_blank_line_rederives_not_preserved() {
assert_eq!(indent("rust", "fn f() {\n", 4), 4);
}
#[test]
fn python_colon_in_string_does_not_indent() {
let content = "s = \"key:\"";
let q1 = content.find('"').unwrap();
let q2 = content.rfind('"').unwrap();
let masked = [(q1, q2 + 1)];
assert_eq!(indent_masked("python", content, 4, &masked), 0);
}
#[test]
fn ruby_indents_after_def_and_do() {
assert_eq!(indent("ruby", "def foo\n", 2), 2);
assert_eq!(indent("ruby", "[1,2].each do |n|\n", 2), 2);
}
#[test]
fn ruby_one_liner_with_end_does_not_indent() {
assert_eq!(indent("ruby", "def foo; end\n", 2), 0);
assert_eq!(indent("ruby", "if x then y end\n", 2), 0);
}
#[test]
fn ruby_end_in_string_does_not_dedent_or_break() {
let content = "x = 1\ns = \"end\"\n";
let q1 = content.rfind('"').unwrap();
let qs = content[..q1].rfind('"').unwrap();
let masked = [(qs, q1 + 1)];
assert_eq!(indent_masked("ruby", content, 2, &masked), 0);
}
#[test]
fn ruby_midblock_else_reindents_body() {
let content = "if x\n a\nelse\n";
assert_eq!(indent("ruby", content, 2), 2);
}
#[test]
fn lua_indents_after_block_openers() {
assert_eq!(indent("lua", "function f()\n", 4), 4);
assert_eq!(indent("lua", "if x then\n", 4), 4);
assert_eq!(indent("lua", "for i = 1, n do\n", 4), 4);
}
#[test]
fn lua_one_liner_with_end_does_not_indent() {
assert_eq!(indent("lua", "function f() end\n", 4), 0);
}
#[test]
fn bash_indents_after_then_do_case() {
assert_eq!(indent("bash", "if true; then\n", 4), 4);
assert_eq!(indent("bash", "for x in a b; do\n", 4), 4);
assert_eq!(indent("bash", "case $x in\n", 4), 4);
}
#[test]
fn bash_resolves_from_syntect_name() {
assert!(rules_for_syntax_name("Bourne Again Shell (bash)").is_some());
}
#[test]
fn pascal_indents_after_begin() {
assert_eq!(indent("pascal", "begin\n", 4), 4);
assert_eq!(indent("pascal", "if x then begin\n", 4), 4);
}
#[test]
fn pascal_one_liner_with_end_does_not_indent() {
assert_eq!(indent("pascal", "begin end;\n", 4), 0);
}
#[test]
fn unknown_language_has_no_rules() {
assert!(rules_for_id("brainfuck").is_none());
}
#[test]
fn families_compile() {
assert!(rules_for_id("rust").unwrap().increase.is_some());
assert!(rules_for_id("python").unwrap().dedent_next_line.is_some());
assert!(rules_for_id("ruby").unwrap().self_close.is_some());
}
#[test]
fn user_overrides_register_and_merge() {
clear_user_rules();
set_user_rule(
"zz_newlang",
Some(r":\s*$"),
Some(r"^\s*end\b"),
None,
None,
None,
);
let r = rules_for_id("zz_newlang").expect("user rule registered");
assert_eq!(r.calculate_indent(&buf("foo:"), 4, 4, |_| true), 4);
set_user_rule("kotlin", Some(r"=>\s*$"), None, None, None, None);
let k = rules_for_id("kotlin").expect("kotlin via override");
assert!(
k.decrease.is_some(),
"decrease inherited from CurlyBrace family"
);
let c = "val f = x =>";
assert_eq!(k.calculate_indent(&buf(c), c.len(), 4, |_| true), 4);
clear_user_rules();
assert!(rules_for_id("zz_newlang").is_none(), "override cleared");
assert!(rules_for_id("kotlin").is_some());
}
}
#[cfg(all(test, feature = "tree-sitter"))]
mod parity {
use super::*;
use crate::model::filesystem::NoopFileSystem;
use crate::primitives::indent::IndentCalculator;
use fresh_languages::Language;
use std::sync::Arc;
fn buf(content: &str) -> Buffer {
let fs = Arc::new(NoopFileSystem);
let mut b = Buffer::empty(fs);
b.insert(0, content);
b
}
#[test]
fn rules_match_tree_sitter_on_corpus() {
let cases: &[(Language, &str, &str)] = &[
(Language::TypeScript, "typescript", "function f() {"),
(Language::TypeScript, "typescript", "class A {"),
(Language::TypeScript, "typescript", "let x = 1;"),
(Language::Go, "go", "func main() {"),
(Language::JavaScript, "javascript", "function f() {"),
];
let tab = 4;
let mut mismatches = Vec::new();
let mut compared = 0;
for (lang, id, code) in cases {
let ts = {
let mut calc = IndentCalculator::new();
calc.calculate_indent(&buf(code), code.len(), lang, tab)
};
let Some(ts) = ts else { continue }; compared += 1;
let rules = rules_for_id(id)
.unwrap_or_else(|| panic!("no rules for {id}"))
.calculate_indent(&buf(code), code.len(), tab, |_| true);
if ts != rules {
mismatches.push(format!(
" {id}: code={code:?} tree-sitter={ts} rules={rules}"
));
}
}
assert!(
mismatches.is_empty(),
"rules tier diverged from tree-sitter on {}/{} compared cases:\n{}",
mismatches.len(),
compared,
mismatches.join("\n")
);
assert!(
compared >= 4,
"too few comparable cases ({compared}); guard is vacuous"
);
}
}