Skip to main content

sipha/
incremental.rs

1//! Incremental reparse: reuse unchanged parts of the green tree after a text edit.
2//!
3//! Full re-parse with reuse: run the parser on the new source, then when building
4//! the green tree, reuse `Arc<GreenToken>` and `Arc<GreenNode>` from the old tree
5//! when the new parse produces the same span (in unchanged regions).
6
7use std::collections::HashMap;
8use std::sync::Arc;
9
10use crate::engine::{Engine, ParseError};
11use crate::green::{GreenElement, GreenNode, GreenToken};
12use crate::insn::ParseGraph;
13use crate::red::SyntaxNode;
14use crate::types::{FieldId, Pos, SyntaxKind, TreeEvent};
15
16/// Stack element when building green tree from events (with or without reuse).
17type IncrementalStackEntry = (
18    SyntaxKind,
19    Option<FieldId>,
20    Vec<(Option<FieldId>, GreenElement)>,
21);
22/// Root or child element: (optional field id, node or token).
23type IncrementalRootElem = (Option<FieldId>, GreenElement);
24
25/// A text edit: replace `old_source[start..end]` with `new_text`.
26#[derive(Clone, Debug)]
27pub struct TextEdit {
28    /// Start byte offset in the old source (inclusive).
29    pub start: Pos,
30    /// End byte offset in the old source (exclusive).
31    pub end: Pos,
32    /// Replacement bytes (UTF-8).
33    pub new_text: Vec<u8>,
34}
35
36impl TextEdit {
37    /// Build the new source buffer after applying this edit.
38    #[must_use]
39    pub fn apply(&self, old_source: &[u8]) -> Vec<u8> {
40        let mut out = Vec::with_capacity(
41            old_source
42                .len()
43                .saturating_sub((self.end - self.start) as usize)
44                + self.new_text.len(),
45        );
46        out.extend_from_slice(&old_source[..self.start as usize]);
47        out.extend_from_slice(&self.new_text);
48        out.extend_from_slice(&old_source[self.end as usize..]);
49        out
50    }
51}
52
53/// Map a span in the *new* source to the corresponding span in the *old* source
54/// if that range is unchanged; otherwise return `None`.
55#[allow(clippy::cast_possible_truncation)] // const fn; new_text_len is small in practice
56const fn new_span_to_old(
57    new_start: Pos,
58    new_end: Pos,
59    edit_start: Pos,
60    edit_end: Pos,
61    new_text_len: usize,
62) -> Option<(Pos, Pos)> {
63    let new_text_len_u = new_text_len as u32;
64    let delta = (edit_start + new_text_len_u).saturating_sub(edit_end);
65    if new_end <= edit_start {
66        Some((new_start, new_end))
67    } else if new_start >= edit_start + new_text_len_u {
68        Some((new_start - delta, new_end - delta))
69    } else {
70        None
71    }
72}
73
74/// Remove and return trailing trivia from `children` (from the end, while elements are trivia).
75/// Must match `green::drain_trailing_trivia` so incremental tree structure matches full parse.
76fn drain_trailing_trivia(children: &mut Vec<IncrementalRootElem>) -> Vec<IncrementalRootElem> {
77    let n = children
78        .iter()
79        .rev()
80        .take_while(|(_, el)| el.is_trivia())
81        .count();
82    if n == 0 {
83        return Vec::new();
84    }
85    children.drain(children.len() - n..).collect()
86}
87
88/// Build a map from (`old_start`, `old_end`) to the green token at that span by walking the tree.
89fn old_tree_token_map(
90    root: &GreenNode,
91    _old_source: &[u8],
92) -> HashMap<(Pos, Pos), Arc<GreenToken>> {
93    let mut map = HashMap::new();
94    let mut stack: Vec<(&GreenNode, Pos)> = vec![(root, 0)];
95    while let Some((node, offset)) = stack.pop() {
96        let mut off = offset;
97        for child in &node.children {
98            let start = off;
99            off += child.text_len();
100            match child {
101                GreenElement::Token(t) => {
102                    map.insert((start, off), Arc::clone(t));
103                }
104                GreenElement::Node(n) => {
105                    stack.push((n.as_ref(), start));
106                }
107            }
108        }
109    }
110    map
111}
112
113/// Build a green tree from events, reusing tokens (and optionally nodes) from the old tree
114/// when the new parse produces the same span in an unchanged region.
115#[must_use]
116pub fn build_green_tree_with_reuse(
117    new_source: &[u8],
118    events: &[TreeEvent],
119    old_root: &GreenNode,
120    old_source: &[u8],
121    edit: &TextEdit,
122) -> Option<Arc<GreenNode>> {
123    let token_map = old_tree_token_map(old_root, old_source);
124    let edit_start = edit.start;
125    let edit_end = edit.end;
126    let new_text_len = edit.new_text.len();
127
128    let mut stack: Vec<IncrementalStackEntry> = Vec::new();
129    let mut roots: Vec<IncrementalRootElem> = Vec::new();
130
131    for ev in events {
132        match *ev {
133            TreeEvent::NodeOpen { kind, field, .. } => {
134                // Move trailing trivia from the current top node into the new node as leading trivia.
135                // Must match build_green_tree so tree structure (and thus token positions) is identical to full parse.
136                let leading = if let Some((_, _, children)) = stack.last_mut() {
137                    drain_trailing_trivia(children)
138                } else {
139                    Vec::new()
140                };
141                stack.push((kind, field, leading));
142            }
143
144            TreeEvent::NodeClose { .. } => {
145                let (kind, my_field, children_with_fields) = stack.pop()?;
146                let node = GreenNode::new_with_fields(kind, children_with_fields);
147                push_element(&mut stack, &mut roots, (my_field, GreenElement::Node(node)));
148            }
149
150            TreeEvent::Token {
151                kind,
152                start,
153                end,
154                is_trivia,
155            } => {
156                if start == end {
157                    continue;
158                }
159                let tok_arc = if let Some((old_s, old_e)) =
160                    new_span_to_old(start, end, edit_start, edit_end, new_text_len)
161                {
162                    token_map.get(&(old_s, old_e)).cloned()
163                } else {
164                    None
165                };
166                let tok = match tok_arc {
167                    Some(t) if t.kind == kind && t.is_trivia == is_trivia => t,
168                    _ => {
169                        let text = new_source
170                            .get(start as usize..end as usize)
171                            .and_then(|b| std::str::from_utf8(b).ok())
172                            .unwrap_or("");
173                        GreenToken::new(kind, text, is_trivia)
174                    }
175                };
176                push_element(&mut stack, &mut roots, (None, GreenElement::Token(tok)));
177            }
178        }
179    }
180
181    if !stack.is_empty() {
182        return None;
183    }
184
185    match roots.len() {
186        0 => None,
187        1 => {
188            let (_, elem) = roots.remove(0);
189            match elem {
190                GreenElement::Node(n) => Some(n),
191                GreenElement::Token(t) => {
192                    Some(GreenNode::new(t.kind, vec![GreenElement::Token(t)]))
193                }
194            }
195        }
196        _ => {
197            let children_with_fields: Vec<IncrementalRootElem> = roots.into_iter().collect();
198            Some(GreenNode::new_with_fields(u16::MAX, children_with_fields))
199        }
200    }
201}
202
203#[inline]
204#[allow(clippy::ptr_arg)] // need Vec for .push
205fn push_element(
206    stack: &mut Vec<IncrementalStackEntry>,
207    roots: &mut Vec<IncrementalRootElem>,
208    elem: IncrementalRootElem,
209) {
210    match stack.last_mut() {
211        Some((_, _, children)) => children.push(elem),
212        None => roots.push(elem),
213    }
214}
215
216/// Reparse after a text edit: build new source, run the parser, then build the green tree
217/// reusing unchanged tokens from the old tree.
218///
219/// Returns the new syntax root, or `None` if the parse produced no root (e.g. empty or error).
220///
221/// # Errors
222///
223/// Propagates [`ParseError`] from the parser if the new source fails to parse.
224pub fn reparse(
225    engine: &mut Engine,
226    graph: &ParseGraph,
227    old_source: &[u8],
228    old_root: &SyntaxNode,
229    edit: &TextEdit,
230) -> Result<Option<SyntaxNode>, ParseError> {
231    let new_source = edit.apply(old_source);
232    let out = engine.parse(graph, &new_source)?;
233    let new_green = build_green_tree_with_reuse(
234        &new_source,
235        &out.tree_events,
236        old_root.green(),
237        old_source,
238        edit,
239    );
240    Ok(new_green.map(SyntaxNode::new_root))
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use crate::green::build_green_tree;
247    use crate::types::TreeEvent;
248
249    #[test]
250    fn text_edit_apply() {
251        let old = b"hello world";
252        let edit = TextEdit {
253            start: 0,
254            end: 5,
255            new_text: b"hi".to_vec(),
256        };
257        let new = edit.apply(old);
258        assert_eq!(new.as_slice(), b"hi world");
259    }
260
261    #[test]
262    fn reparse_reuses_unchanged_tokens() {
263        let old_source = b"ab";
264        let events_old = [
265            TreeEvent::NodeOpen {
266                kind: 1,
267                field: None,
268                pos: 0,
269            },
270            TreeEvent::Token {
271                kind: 10,
272                start: 0,
273                end: 1,
274                is_trivia: false,
275            },
276            TreeEvent::Token {
277                kind: 10,
278                start: 1,
279                end: 2,
280                is_trivia: false,
281            },
282            TreeEvent::NodeClose { pos: 2 },
283        ];
284        let old_root = build_green_tree(old_source, &events_old).expect("build");
285        let edit = TextEdit {
286            start: 1,
287            end: 2,
288            new_text: b"xy".to_vec(),
289        };
290        let new_source = edit.apply(old_source);
291        assert_eq!(new_source.as_slice(), b"axy");
292        let events_new = [
293            TreeEvent::NodeOpen {
294                kind: 1,
295                field: None,
296                pos: 0,
297            },
298            TreeEvent::Token {
299                kind: 10,
300                start: 0,
301                end: 1,
302                is_trivia: false,
303            },
304            TreeEvent::Token {
305                kind: 10,
306                start: 1,
307                end: 3,
308                is_trivia: false,
309            },
310            TreeEvent::NodeClose { pos: 3 },
311        ];
312        let new_root =
313            build_green_tree_with_reuse(&new_source, &events_new, &old_root, old_source, &edit)
314                .expect("build_with_reuse");
315        assert_eq!(new_root.text_len, 3);
316        let first_tok = match &new_root.children[0] {
317            GreenElement::Token(t) => t.as_ref(),
318            _ => panic!("expected token"),
319        };
320        let second_tok = match &new_root.children[1] {
321            GreenElement::Token(t) => t.as_ref(),
322            _ => panic!("expected token"),
323        };
324        assert_eq!(first_tok.text(), "a");
325        assert_eq!(second_tok.text(), "xy");
326        if let (GreenElement::Token(new_t), GreenElement::Token(old_t)) =
327            (&new_root.children[0], &old_root.children[0])
328        {
329            assert!(
330                Arc::ptr_eq(new_t, old_t),
331                "first token (unchanged span) should be reused"
332            );
333        }
334    }
335}