tree_house/
parse.rs

1use std::mem::take;
2use std::time::Duration;
3
4use ropey::RopeSlice;
5use tree_sitter::{InactiveQueryCursor, Parser};
6
7use crate::config::LanguageLoader;
8use crate::{Error, LayerData, Syntax, TREE_SITTER_MATCH_LIMIT};
9
10impl Syntax {
11    pub fn update(
12        &mut self,
13        source: RopeSlice,
14        timeout: Duration,
15        edits: &[tree_sitter::InputEdit],
16        loader: &impl LanguageLoader,
17    ) -> Result<(), Error> {
18        // size limit of 512MiB, TS just cannot handle files this big (too
19        // slow). Furthermore, TS uses 32 (signed) bit indices so this limit
20        // must never be raised above 2GiB
21        if source.len_bytes() >= 512 * 1024 * 1024 {
22            return Err(Error::ExceededMaximumSize);
23        }
24
25        let mut queue = Vec::with_capacity(32);
26        let root_flags = &mut self.layer_mut(self.root).flags;
27        // The root layer is always considered.
28        root_flags.touched = true;
29        // If there was an edit then the root layer must've been modified.
30        root_flags.modified = true;
31        queue.push(self.root);
32
33        let mut parser = Parser::new();
34        parser.set_timeout(timeout);
35        let mut cursor = InactiveQueryCursor::new();
36        // TODO: might need to set cursor range
37        cursor.set_byte_range(0..u32::MAX);
38        cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
39
40        while let Some(layer) = queue.pop() {
41            let layer_data = self.layer_mut(layer);
42            if layer_data.ranges.is_empty() {
43                // Skip re-parsing and querying layers without any ranges.
44                continue;
45            }
46            if let Some(tree) = &mut layer_data.parse_tree {
47                if layer_data.flags.moved || layer_data.flags.modified {
48                    for edit in edits.iter().rev() {
49                        // Apply the edits in reverse.
50                        // If we applied them in order then edit 1 would disrupt the positioning
51                        // of edit 2.
52                        tree.edit(edit);
53                    }
54                }
55                if layer_data.flags.modified {
56                    // Re-parse the tree.
57                    layer_data.parse(&mut parser, source, loader)?;
58                }
59            } else {
60                // always parse if this layer has never been parsed before
61                layer_data.parse(&mut parser, source, loader)?;
62            }
63            self.run_injection_query(layer, edits, source, loader, |layer| queue.push(layer));
64            self.run_local_query(layer, source, loader);
65        }
66
67        if self.layer(self.root).parse_tree.is_none() {
68            return Err(Error::NoRootConfig);
69        }
70
71        self.prune_dead_layers();
72        Ok(())
73    }
74
75    /// Reset all `LayerUpdateFlags` and remove all untouched layers
76    fn prune_dead_layers(&mut self) {
77        self.layers
78            .retain(|_, layer| take(&mut layer.flags).touched);
79    }
80}
81
82impl LayerData {
83    fn parse(
84        &mut self,
85        parser: &mut Parser,
86        source: RopeSlice,
87        loader: &impl LanguageLoader,
88    ) -> Result<(), Error> {
89        let Some(config) = loader.get_config(self.language) else {
90            return Ok(());
91        };
92        if let Err(err) = parser.set_grammar(config.grammar) {
93            return Err(Error::IncompatibleGrammar(self.language, err));
94        }
95        parser
96            .set_included_ranges(&self.ranges)
97            .map_err(|_| Error::InvalidRanges)?;
98
99        // HACK:
100        // This is a workaround for a bug within the lexer (in the C library) or maybe within
101        // tree-sitter-markdown which needs more debugging. When adding a new range to a combined
102        // injection and passing the old tree, if the old tree doesn't already cover a wider range
103        // than the newly added range, some assumptions are violated in the lexer and it tries to
104        // access some invalid memory, resulting in a segfault. This workaround avoids that
105        // situation by avoiding passing the old tree when the old tree's range doesn't cover the
106        // total range of `self.ranges`.
107        //
108        // See <https://github.com/helix-editor/helix/pull/12972#issuecomment-2725410409>.
109        let tree = self.parse_tree.as_ref().filter(|tree| {
110            let included_ranges_range = self.ranges.first().map(|r| r.start_byte).unwrap_or(0)
111                ..self.ranges.last().map(|r| r.end_byte).unwrap_or(u32::MAX);
112            // Allow re-parsing the root layer even though the range is larger. The root always
113            // covers `0..u32::MAX`:
114            if included_ranges_range == (0..u32::MAX) {
115                return true;
116            }
117            let tree_range = tree.root_node().byte_range();
118            tree_range.start <= included_ranges_range.start
119                && tree_range.end >= included_ranges_range.end
120        });
121
122        let tree = parser.parse(source, tree).ok_or(Error::Timeout)?;
123        self.parse_tree = Some(tree);
124        Ok(())
125    }
126}
127
128#[derive(Debug, PartialEq, Eq, Default, Clone)]
129pub(crate) struct LayerUpdateFlags {
130    pub reused: bool,
131    pub modified: bool,
132    pub moved: bool,
133    pub touched: bool,
134}