Skip to main content

editor_core_treesitter/
indenter.rs

1use crate::{TreeSitterError, TreeSitterLanguage};
2use editor_core::{IndentStyle, LineIndex, TextEditSpec};
3use std::collections::HashSet;
4use streaming_iterator::StreamingIterator;
5use tree_sitter::{Parser, Query, QueryCursor, Tree};
6
7/// Configuration for [`TreeSitterIndenter`].
8#[derive(Debug, Clone)]
9pub struct TreeSitterIndenterConfig {
10    /// Tree-sitter language source (native or WASM).
11    pub language: TreeSitterLanguage,
12    /// Indentation query (`indents.scm`).
13    pub indents_query: String,
14}
15
16impl TreeSitterIndenterConfig {
17    /// Create a config from a language source and query text.
18    pub fn new(language: TreeSitterLanguage, indents_query: impl Into<String>) -> Self {
19        Self {
20            language,
21            indents_query: indents_query.into(),
22        }
23    }
24}
25
26/// A small Tree-sitter powered indenter driven by an `indents.scm` query.
27///
28/// This is an **optional** companion to [`crate::TreeSitterProcessor`]:
29/// - The processor focuses on derived state (highlighting/folding).
30/// - The indenter focuses on edit-time indentation decisions (auto-indent on newline).
31///
32/// Query conventions (minimal subset):
33/// - Captures named `@indent` indicate syntax nodes that increase indentation for their contents.
34/// - Captures named `@outdent` indicate syntax tokens/nodes that outdent when they begin a line.
35///
36/// Notes:
37/// - This implementation is intentionally conservative; it aims to provide a solid baseline.
38pub struct TreeSitterIndenter {
39    config: TreeSitterIndenterConfig,
40    parser: Parser,
41    indent_query: Query,
42    tree: Option<Tree>,
43    text: String,
44    line_index: LineIndex,
45    last_synced_version: Option<u64>,
46    // Keep the Wasmtime engine alive for the lifetime of the parser's Wasm store.
47    #[allow(dead_code)]
48    wasm_engine: Option<tree_sitter::wasmtime::Engine>,
49}
50
51impl TreeSitterIndenter {
52    /// Create a new indenter from the given config.
53    pub fn new(config: TreeSitterIndenterConfig) -> Result<Self, TreeSitterError> {
54        let mut parser = Parser::new();
55
56        let (language, wasm_engine) = match &config.language {
57            TreeSitterLanguage::Native(language) => {
58                parser
59                    .set_language(language)
60                    .map_err(|e| TreeSitterError::Language(e.to_string()))?;
61                (language.clone(), None)
62            }
63            TreeSitterLanguage::Wasm {
64                language_id,
65                wasm_bytes,
66            } => {
67                let engine = tree_sitter::wasmtime::Engine::default();
68
69                // Parser store (must use the same engine as the language).
70                let store = tree_sitter::WasmStore::new(&engine)
71                    .map_err(|e| TreeSitterError::Wasm(e.to_string()))?;
72                parser
73                    .set_wasm_store(store)
74                    .map_err(|e| TreeSitterError::Language(e.to_string()))?;
75
76                // Load the language (can use a separate store, but must share the same engine).
77                let mut store = tree_sitter::WasmStore::new(&engine)
78                    .map_err(|e| TreeSitterError::Wasm(e.to_string()))?;
79                let language = store
80                    .load_language(language_id, wasm_bytes)
81                    .map_err(|e| TreeSitterError::Wasm(e.to_string()))?;
82
83                parser
84                    .set_language(&language)
85                    .map_err(|e| TreeSitterError::Language(e.to_string()))?;
86
87                (language, Some(engine))
88            }
89        };
90
91        let indent_query = Query::new(&language, &config.indents_query)
92            .map_err(|e| TreeSitterError::Query(e.to_string()))?;
93
94        Ok(Self {
95            config,
96            parser,
97            indent_query,
98            tree: None,
99            text: String::new(),
100            line_index: LineIndex::new(),
101            last_synced_version: None,
102            wasm_engine,
103        })
104    }
105
106    /// Synchronize the indenter with the given editor `version` and full `text`.
107    ///
108    /// Notes:
109    /// - This API is intentionally "full text" based (no `TextDelta` required), to make it easy
110    ///   to drive from UI/FFI boundaries.
111    /// - Re-parsing is skipped when `version` is unchanged.
112    pub fn sync_to_text(&mut self, version: u64, text: &str) -> Result<(), TreeSitterError> {
113        if self.last_synced_version == Some(version) {
114            return Ok(());
115        }
116
117        self.text.clear();
118        self.text.push_str(text);
119        self.line_index = LineIndex::from_text(&self.text);
120
121        self.tree = self.parser.parse(&self.text, None);
122        if self.tree.is_none() {
123            return Err(TreeSitterError::Language(
124                "tree-sitter parse returned None".to_string(),
125            ));
126        }
127
128        self.last_synced_version = Some(version);
129        Ok(())
130    }
131
132    fn is_indent_capture(name: &str) -> bool {
133        name == "indent" || name.starts_with("indent.")
134    }
135
136    fn is_outdent_capture(name: &str) -> bool {
137        name == "outdent" || name.starts_with("outdent.") || name == "dedent"
138    }
139
140    fn indent_unit(indent_style: IndentStyle) -> String {
141        match indent_style {
142            IndentStyle::Tabs => "\t".to_string(),
143            IndentStyle::Spaces(width) => " ".repeat(width.max(1) as usize),
144        }
145    }
146
147    /// Compute the desired leading whitespace for a logical line.
148    ///
149    /// Returns `None` if the indenter is not yet synchronized (call [`Self::sync_to_text`] first),
150    /// or if `line` is out of range.
151    pub fn indent_string_for_line(&self, line: usize, indent_style: IndentStyle) -> Option<String> {
152        let tree = self.tree.as_ref()?;
153        let line_text = self.line_index.get_line_text(line)?;
154
155        let line_start_char = self.line_index.position_to_char_offset(line, 0);
156        let line_start_byte = self.line_index.char_offset_to_byte_offset(line_start_char);
157
158        // Use the *next line start* as the end bound when possible so empty lines still have
159        // a non-empty search range (include the newline byte).
160        let line_end_byte = if line + 1 < self.line_index.line_count() {
161            let next_line_start_char = self.line_index.position_to_char_offset(line + 1, 0);
162            self.line_index
163                .char_offset_to_byte_offset(next_line_start_char)
164        } else {
165            self.text.len()
166        };
167
168        let leading_ws_bytes = line_text
169            .bytes()
170            .take_while(|b| *b == b' ' || *b == b'\t')
171            .count();
172
173        let mut cursor = QueryCursor::new();
174        if line_end_byte > line_start_byte {
175            cursor.set_byte_range(line_start_byte..line_end_byte);
176        }
177
178        let root = tree.root_node();
179        let capture_names = self.indent_query.capture_names();
180
181        let mut indent_nodes: HashSet<(usize, usize)> = HashSet::new();
182        let mut indent_count = 0usize;
183        let mut should_outdent = false;
184
185        let mut matches = cursor.matches(&self.indent_query, root, self.text.as_bytes());
186        while let Some(m) = matches.next() {
187            for capture in m.captures {
188                let name = capture_names
189                    .get(capture.index as usize)
190                    .copied()
191                    .unwrap_or("");
192                let node = capture.node;
193
194                if Self::is_indent_capture(name) {
195                    // De-dupe by node byte range to avoid double-counting overlapping query patterns.
196                    if !indent_nodes.insert((node.start_byte(), node.end_byte())) {
197                        continue;
198                    }
199
200                    let start_row = node.start_position().row;
201                    let end_row = node.end_position().row;
202
203                    // Indent applies to lines *after* the node's start row, up to and including
204                    // its end row (outdent capture can offset this on closing lines).
205                    if line > start_row && line <= end_row {
206                        indent_count = indent_count.saturating_add(1);
207                    }
208
209                    continue;
210                }
211
212                if Self::is_outdent_capture(name) {
213                    let start_pos = node.start_position();
214                    if start_pos.row != line {
215                        continue;
216                    }
217
218                    // Only outdent when the token begins the line (ignoring leading whitespace).
219                    if start_pos.column <= leading_ws_bytes {
220                        should_outdent = true;
221                    }
222                }
223            }
224        }
225
226        let indent_level = indent_count.saturating_sub(if should_outdent { 1 } else { 0 });
227        let unit = Self::indent_unit(indent_style);
228        Some(unit.repeat(indent_level))
229    }
230
231    /// Build a text edit that replaces the current line's leading whitespace with the computed
232    /// Tree-sitter indentation.
233    ///
234    /// Returns `None` if no change is needed, if the indenter is not synchronized, or if `line`
235    /// is out of range.
236    pub fn reindent_text_edit_for_line(
237        &self,
238        line: usize,
239        indent_style: IndentStyle,
240    ) -> Option<TextEditSpec> {
241        let line_text = self.line_index.get_line_text(line)?;
242        let desired = self.indent_string_for_line(line, indent_style)?;
243
244        let existing_prefix: String = line_text
245            .chars()
246            .take_while(|ch| *ch == ' ' || *ch == '\t')
247            .collect();
248
249        if existing_prefix == desired {
250            return None;
251        }
252
253        let prefix_len = existing_prefix.chars().count();
254        let start = self.line_index.position_to_char_offset(line, 0);
255        let end = self.line_index.position_to_char_offset(line, prefix_len);
256
257        Some(TextEditSpec {
258            start,
259            end,
260            text: desired,
261        })
262    }
263}
264
265impl std::fmt::Debug for TreeSitterIndenter {
266    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
267        f.debug_struct("TreeSitterIndenter")
268            .field("config.language", &"<language>")
269            .field("config.indents_query.len", &self.config.indents_query.len())
270            .field("has_tree", &self.tree.is_some())
271            .field("text.len", &self.text.len())
272            .field("last_synced_version", &self.last_synced_version)
273            .finish()
274    }
275}