tree_house/
highlighter.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::mem::replace;
4use std::num::NonZeroU32;
5use std::ops::RangeBounds;
6use std::slice;
7use std::sync::Arc;
8
9use crate::config::{LanguageConfig, LanguageLoader};
10use crate::locals::ScopeCursor;
11use crate::query_iter::{MatchedNode, QueryIter, QueryIterEvent, QueryLoader};
12use crate::{Injection, Language, Layer, Syntax};
13use arc_swap::ArcSwap;
14use hashbrown::{HashMap, HashSet};
15use ropey::RopeSlice;
16use tree_sitter::{
17    query::{self, InvalidPredicateError, Query, UserPredicate},
18    Capture, Grammar,
19};
20use tree_sitter::{Pattern, QueryMatch};
21
22/// Contains the data needed to highlight code written in a particular language.
23///
24/// This struct is immutable and can be shared between threads.
25#[derive(Debug)]
26pub struct HighlightQuery {
27    pub query: Query,
28    highlight_indices: ArcSwap<Vec<Option<Highlight>>>,
29    #[allow(dead_code)]
30    /// Patterns that do not match when the node is a local.
31    non_local_patterns: HashSet<Pattern>,
32    local_reference_capture: Option<Capture>,
33}
34
35impl HighlightQuery {
36    pub(crate) fn new(
37        grammar: Grammar,
38        highlight_query_text: &str,
39        local_query_text: &str,
40    ) -> Result<Self, query::ParseError> {
41        // Concatenate the highlights and locals queries.
42        let mut query_source =
43            String::with_capacity(highlight_query_text.len() + local_query_text.len());
44        query_source.push_str(highlight_query_text);
45        query_source.push_str(local_query_text);
46
47        let mut non_local_patterns = HashSet::new();
48        let mut query = Query::new(grammar, &query_source, |pattern, predicate| {
49            match predicate {
50                // Allow the `(#set! local.scope-inherits <bool>)` property to be parsed.
51                // This information is not used by this query though, it's used in the
52                // injection query instead.
53                UserPredicate::SetProperty {
54                    key: "local.scope-inherits",
55                    ..
56                } => (),
57                // TODO: `(#is(-not)? local)` applies to the entire pattern. Ideally you
58                // should be able to supply capture(s?) which are each checked.
59                UserPredicate::IsPropertySet {
60                    negate: true,
61                    key: "local",
62                    val: None,
63                } => {
64                    non_local_patterns.insert(pattern);
65                }
66                _ => return Err(InvalidPredicateError::unknown(predicate)),
67            }
68            Ok(())
69        })?;
70
71        // The highlight query only cares about local.reference captures. All scope and definition
72        // captures can be disabled.
73        query.disable_capture("local.scope");
74        let local_definition_captures: Vec<_> = query
75            .captures()
76            .filter(|&(_, name)| name.starts_with("local.definition."))
77            .map(|(_, name)| Box::<str>::from(name))
78            .collect();
79        for name in local_definition_captures {
80            query.disable_capture(&name);
81        }
82
83        Ok(Self {
84            highlight_indices: ArcSwap::from_pointee(vec![None; query.num_captures() as usize]),
85            non_local_patterns,
86            local_reference_capture: query.get_capture("local.reference"),
87            query,
88        })
89    }
90
91    /// Configures the list of recognized highlight names.
92    ///
93    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
94    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
95    /// these queries can choose to recognize highlights with different levels of specificity.
96    /// For example, the string `function.builtin` will match against `function.builtin.constructor`
97    /// but will not match `function.method.builtin` and `function.method`.
98    ///
99    /// The closure provided to this function should therefore try to first lookup the full
100    /// name. If no highlight was found for that name it should [`rsplit_once('.')`](str::rsplit_once)
101    /// and retry until a highlight has been found. If none of the parent scopes are defined
102    /// then `Highlight::NONE` should be returned.
103    ///
104    /// When highlighting, results are returned as `Highlight` values, configured by this function.
105    /// The meaning of these indices is up to the user of the implementation. The highlighter
106    /// treats the indices as entirely opaque.
107    pub(crate) fn configure(&self, f: &mut impl FnMut(&str) -> Option<Highlight>) {
108        let highlight_indices = self
109            .query
110            .captures()
111            .map(|(_, capture_name)| f(capture_name))
112            .collect();
113        self.highlight_indices.store(Arc::new(highlight_indices));
114    }
115}
116
117/// Indicates which highlight should be applied to a region of source code.
118///
119/// This type is represented as a non-max u32 - a u32 which cannot be `u32::MAX`. This is checked
120/// at runtime with assertions in `Highlight::new`.
121#[derive(Copy, Clone, PartialEq, Eq)]
122pub struct Highlight(NonZeroU32);
123
124impl Highlight {
125    pub const MAX: u32 = u32::MAX - 1;
126
127    pub const fn new(inner: u32) -> Self {
128        assert!(inner != u32::MAX);
129        // SAFETY: must be non-zero because `inner` is not `u32::MAX`.
130        Self(unsafe { NonZeroU32::new_unchecked(inner ^ u32::MAX) })
131    }
132
133    pub const fn get(&self) -> u32 {
134        self.0.get() ^ u32::MAX
135    }
136
137    pub const fn idx(&self) -> usize {
138        self.get() as usize
139    }
140}
141
142impl fmt::Debug for Highlight {
143    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144        f.debug_tuple("Highlight").field(&self.get()).finish()
145    }
146}
147
148#[derive(Debug)]
149struct HighlightedNode {
150    end: u32,
151    highlight: Highlight,
152}
153
154#[derive(Debug, Default)]
155pub struct LayerData {
156    parent_highlights: usize,
157    dormant_highlights: Vec<HighlightedNode>,
158}
159
160pub struct Highlighter<'a, 'tree, Loader: LanguageLoader> {
161    query: QueryIter<'a, 'tree, HighlightQueryLoader<&'a Loader>, ()>,
162    next_query_event: Option<QueryIterEvent<'tree, ()>>,
163    active_highlights: Vec<HighlightedNode>,
164    next_highlight_end: u32,
165    next_highlight_start: u32,
166    active_config: Option<&'a LanguageConfig>,
167    // The current layer and per-layer state could be tracked on the QueryIter itself (see
168    // `QueryIter::current_layer` and `QueryIter::layer_state`) however the highlighter peeks the
169    // query iter. The query iter is always one event ahead, so it will enter/exit injections
170    // before we get a chance to in the highlighter. So instead we track these on the highlighter.
171    // Also see `Self::advance_query_iter`.
172    current_layer: Layer,
173    layer_states: HashMap<Layer, LayerData>,
174}
175
176pub struct HighlightList<'a>(slice::Iter<'a, HighlightedNode>);
177
178impl Iterator for HighlightList<'_> {
179    type Item = Highlight;
180
181    fn next(&mut self) -> Option<Highlight> {
182        self.0.next().map(|node| node.highlight)
183    }
184
185    fn size_hint(&self) -> (usize, Option<usize>) {
186        self.0.size_hint()
187    }
188}
189
190impl DoubleEndedIterator for HighlightList<'_> {
191    fn next_back(&mut self) -> Option<Self::Item> {
192        self.0.next_back().map(|node| node.highlight)
193    }
194}
195
196impl ExactSizeIterator for HighlightList<'_> {
197    fn len(&self) -> usize {
198        self.0.len()
199    }
200}
201
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub enum HighlightEvent {
204    /// Reset the active set of highlights to the given ones.
205    Refresh,
206    /// Add more highlights which build on the existing highlights.
207    Push,
208}
209
210impl<'a, 'tree: 'a, Loader: LanguageLoader> Highlighter<'a, 'tree, Loader> {
211    pub fn new(
212        syntax: &'tree Syntax,
213        src: RopeSlice<'a>,
214        loader: &'a Loader,
215        range: impl RangeBounds<u32>,
216    ) -> Self {
217        let mut query = QueryIter::new(syntax, src, HighlightQueryLoader(loader), range);
218        let active_language = query.current_language();
219        let mut res = Highlighter {
220            active_config: query.loader().0.get_config(active_language),
221            next_query_event: None,
222            current_layer: query.current_layer(),
223            layer_states: Default::default(),
224            active_highlights: Vec::new(),
225            next_highlight_end: u32::MAX,
226            next_highlight_start: 0,
227            query,
228        };
229        res.advance_query_iter();
230        res
231    }
232
233    pub fn active_highlights(&self) -> HighlightList<'_> {
234        HighlightList(self.active_highlights.iter())
235    }
236
237    pub fn next_event_offset(&self) -> u32 {
238        self.next_highlight_start.min(self.next_highlight_end)
239    }
240
241    pub fn advance(&mut self) -> (HighlightEvent, HighlightList<'_>) {
242        let mut refresh = false;
243        let prev_stack_size = self.active_highlights.len();
244
245        let pos = self.next_event_offset();
246        if self.next_highlight_end == pos {
247            self.process_highlight_end(pos);
248            refresh = true;
249        }
250
251        let mut first_highlight = true;
252        while self.next_highlight_start == pos {
253            let Some(query_event) = self.advance_query_iter() else {
254                break;
255            };
256            match query_event {
257                QueryIterEvent::EnterInjection(injection) => self.enter_injection(injection.layer),
258                QueryIterEvent::Match(node) => self.start_highlight(node, &mut first_highlight),
259                QueryIterEvent::ExitInjection { injection, state } => {
260                    // state is returned if the layer is finished, if it isn't we have
261                    // a combined injection and need to deactivate its highlights
262                    if state.is_none() {
263                        self.deactivate_layer(injection);
264                        refresh = true;
265                    } else {
266                        self.layer_states.remove(&injection.layer);
267                    }
268                    let active_language = self.query.syntax().layer(self.current_layer).language;
269                    self.active_config = self.query.loader().0.get_config(active_language);
270                }
271            }
272        }
273        self.next_highlight_end = self
274            .active_highlights
275            .last()
276            .map_or(u32::MAX, |node| node.end);
277
278        if refresh {
279            (
280                HighlightEvent::Refresh,
281                HighlightList(self.active_highlights.iter()),
282            )
283        } else {
284            (
285                HighlightEvent::Push,
286                HighlightList(self.active_highlights[prev_stack_size..].iter()),
287            )
288        }
289    }
290
291    fn advance_query_iter(&mut self) -> Option<QueryIterEvent<'tree, ()>> {
292        // Track the current layer **before** calling `QueryIter::next`. The QueryIter moves
293        // to the next event with `QueryIter::next` but we're treating that event as peeked - it
294        // hasn't occurred yet - so the current layer is the one the query iter was on _before_
295        // `QueryIter::next`.
296        self.current_layer = self.query.current_layer();
297        let event = replace(&mut self.next_query_event, self.query.next());
298        self.next_highlight_start = self
299            .next_query_event
300            .as_ref()
301            .map_or(u32::MAX, |event| event.start_byte());
302        event
303    }
304
305    fn process_highlight_end(&mut self, pos: u32) {
306        let i = self
307            .active_highlights
308            .iter()
309            .rposition(|highlight| highlight.end != pos)
310            .map_or(0, |i| i + 1);
311        self.active_highlights.truncate(i);
312    }
313
314    fn enter_injection(&mut self, layer: Layer) {
315        debug_assert_eq!(layer, self.current_layer);
316        let active_language = self.query.syntax().layer(layer).language;
317        self.active_config = self.query.loader().0.get_config(active_language);
318
319        let state = self.layer_states.entry(layer).or_default();
320        state.parent_highlights = self.active_highlights.len();
321        self.active_highlights.append(&mut state.dormant_highlights);
322    }
323
324    fn deactivate_layer(&mut self, injection: Injection) {
325        let LayerData {
326            mut parent_highlights,
327            ref mut dormant_highlights,
328            ..
329        } = self.layer_states.get_mut(&injection.layer).unwrap();
330        parent_highlights = parent_highlights.min(self.active_highlights.len());
331        dormant_highlights.extend(self.active_highlights.drain(parent_highlights..));
332        self.process_highlight_end(injection.range.end);
333    }
334
335    fn start_highlight(&mut self, node: MatchedNode, first_highlight: &mut bool) {
336        let range = node.node.byte_range();
337        // `<QueryIter as Iterator>::next` skips matches with empty ranges.
338        debug_assert!(
339            !range.is_empty(),
340            "QueryIter should not emit matches with empty ranges"
341        );
342
343        let config = self
344            .active_config
345            .expect("must have an active config to emit matches");
346
347        let highlight = if Some(node.capture) == config.highlight_query.local_reference_capture {
348            // If this capture was a `@local.reference` from the locals queries, look up the
349            // text of the node in the current locals cursor and use that highlight.
350            let text: Cow<str> = self
351                .query
352                .source()
353                .byte_slice(range.start as usize..range.end as usize)
354                .into();
355            let Some(definition) = self
356                .query
357                .syntax()
358                .layer(self.current_layer)
359                .locals
360                .lookup_reference(node.scope, &text)
361                .filter(|def| range.start >= def.range.end)
362            else {
363                return;
364            };
365            config
366                .injection_query
367                .local_definition_captures
368                .load()
369                .get(&definition.capture)
370                .copied()
371        } else {
372            config.highlight_query.highlight_indices.load()[node.capture.idx()]
373        };
374
375        // If multiple patterns match this exact node, prefer the last one which matched.
376        // This matches the precedence of Neovim, Zed, and tree-sitter-cli.
377        if !*first_highlight
378            && self
379                .active_highlights
380                .last()
381                .is_some_and(|prev_node| prev_node.end == range.end)
382        {
383            self.active_highlights.pop();
384        }
385        if let Some(highlight) = highlight {
386            self.active_highlights.push(HighlightedNode {
387                end: range.end,
388                highlight,
389            });
390            *first_highlight = false;
391        }
392    }
393}
394
395pub(crate) struct HighlightQueryLoader<T>(T);
396
397impl<'a, T: LanguageLoader> QueryLoader<'a> for HighlightQueryLoader<&'a T> {
398    fn get_query(&mut self, lang: Language) -> Option<&'a Query> {
399        self.0
400            .get_config(lang)
401            .map(|config| &config.highlight_query.query)
402    }
403
404    fn are_predicates_satisfied(
405        &self,
406        lang: Language,
407        mat: &QueryMatch<'_, '_>,
408        source: RopeSlice<'_>,
409        locals_cursor: &ScopeCursor<'_>,
410    ) -> bool {
411        let highlight_query = &self
412            .0
413            .get_config(lang)
414            .expect("must have a config to emit matches")
415            .highlight_query;
416
417        // Highlight queries should reject the match when a pattern is marked with
418        // `(#is-not? local)` and any capture in the pattern matches a definition in scope.
419        //
420        // TODO: in the future we should propose that `#is-not? local` takes one or more
421        // captures as arguments. Ideally we would check that the captured node is also captured
422        // by a `local.reference` capture from the locals query but that's really messy to pass
423        // around that information. For now we assume that all matches in the pattern are also
424        // captured as `local.reference` in the locals, which covers most cases.
425        if highlight_query.local_reference_capture.is_some()
426            && highlight_query.non_local_patterns.contains(&mat.pattern())
427        {
428            let has_local_reference = mat.matched_nodes().any(|n| {
429                let range = n.node.byte_range();
430                let text: Cow<str> = source
431                    .byte_slice(range.start as usize..range.end as usize)
432                    .into();
433                locals_cursor
434                    .locals
435                    .lookup_reference(locals_cursor.current_scope(), &text)
436                    .is_some_and(|def| range.start >= def.range.start)
437            });
438            if has_local_reference {
439                return false;
440            }
441        }
442
443        true
444    }
445}