tree_house/
parse.rs

1use std::mem::take;
2use std::time::Duration;
3
4use ropey::RopeSlice;
5use tree_sitter::Parser;
6
7use crate::config::LanguageLoader;
8use crate::{Error, LayerData, Syntax};
9
10impl Syntax {
11    pub fn update(
12        &mut self,
13        source: RopeSlice,
14        timeout: Duration,
15        edits: &[tree_sitter::InputEdit],
16        loader: &impl LanguageLoader,
17    ) -> Result<(), Error> {
18        // size limit of 512MiB, TS just cannot handle files this big (too
19        // slow). Furthermore, TS uses 32 (signed) bit indices so this limit
20        // must never be raised above 2GiB
21        if source.len_bytes() >= 512 * 1024 * 1024 {
22            return Err(Error::ExceededMaximumSize);
23        }
24
25        let mut queue = Vec::with_capacity(32);
26        let root_flags = &mut self.layer_mut(self.root).flags;
27        // The root layer is always considered.
28        root_flags.touched = true;
29        // If there was an edit then the root layer must've been modified.
30        root_flags.modified = true;
31        queue.push(self.root);
32
33        let mut parser = Parser::new();
34        parser.set_timeout(timeout);
35
36        while let Some(layer) = queue.pop() {
37            let layer_data = self.layer_mut(layer);
38            if layer_data.ranges.is_empty() {
39                // Skip re-parsing and querying layers without any ranges.
40                continue;
41            }
42            if let Some(tree) = &mut layer_data.parse_tree {
43                if layer_data.flags.moved || layer_data.flags.modified {
44                    for edit in edits.iter().rev() {
45                        // Apply the edits in reverse.
46                        // If we applied them in order then edit 1 would disrupt the positioning
47                        // of edit 2.
48                        tree.edit(edit);
49                    }
50                }
51                if layer_data.flags.modified {
52                    // Re-parse the tree.
53                    layer_data.parse(&mut parser, source, loader)?;
54                }
55            } else {
56                // always parse if this layer has never been parsed before
57                layer_data.parse(&mut parser, source, loader)?;
58            }
59            self.run_injection_query(layer, edits, source, loader, |layer| queue.push(layer));
60            self.run_local_query(layer, source, loader);
61        }
62
63        if self.layer(self.root).parse_tree.is_none() {
64            return Err(Error::NoRootConfig);
65        }
66
67        self.prune_dead_layers();
68        Ok(())
69    }
70
71    /// Reset all `LayerUpdateFlags` and remove all untouched layers
72    fn prune_dead_layers(&mut self) {
73        self.layers
74            .retain(|_, layer| take(&mut layer.flags).touched);
75    }
76}
77
78impl LayerData {
79    fn parse(
80        &mut self,
81        parser: &mut Parser,
82        source: RopeSlice,
83        loader: &impl LanguageLoader,
84    ) -> Result<(), Error> {
85        let Some(config) = loader.get_config(self.language) else {
86            return Ok(());
87        };
88        if let Err(err) = parser.set_grammar(config.grammar) {
89            return Err(Error::IncompatibleGrammar(self.language, err));
90        }
91        parser
92            .set_included_ranges(&self.ranges)
93            .map_err(|_| Error::InvalidRanges)?;
94
95        // HACK:
96        // This is a workaround for a bug within the lexer (in the C library) or maybe within
97        // tree-sitter-markdown which needs more debugging. When adding a new range to a combined
98        // injection and passing the old tree, if the old tree doesn't already cover a wider range
99        // than the newly added range, some assumptions are violated in the lexer and it tries to
100        // access some invalid memory, resulting in a segfault. This workaround avoids that
101        // situation by avoiding passing the old tree when the old tree's range doesn't cover the
102        // total range of `self.ranges`.
103        //
104        // See <https://github.com/helix-editor/helix/pull/12972#issuecomment-2725410409>.
105        let tree = self.parse_tree.as_ref().filter(|tree| {
106            let included_ranges_range = self.ranges.first().map(|r| r.start_byte).unwrap_or(0)
107                ..self.ranges.last().map(|r| r.end_byte).unwrap_or(u32::MAX);
108            // Allow re-parsing the root layer even though the range is larger. The root always
109            // covers `0..u32::MAX`:
110            if included_ranges_range == (0..u32::MAX) {
111                return true;
112            }
113            let tree_range = tree.root_node().byte_range();
114            tree_range.start <= included_ranges_range.start
115                && tree_range.end >= included_ranges_range.end
116        });
117
118        let tree = parser.parse(source, tree).ok_or(Error::Timeout)?;
119        self.parse_tree = Some(tree);
120        Ok(())
121    }
122}
123
124#[derive(Debug, PartialEq, Eq, Default, Clone)]
125pub(crate) struct LayerUpdateFlags {
126    pub reused: bool,
127    pub modified: bool,
128    pub moved: bool,
129    pub touched: bool,
130}