tree_sitter_highlight/
highlight.rs

1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2
3pub mod c_lib;
4use core::slice;
5use std::{
6    collections::HashSet,
7    iter,
8    marker::PhantomData,
9    mem::{self, MaybeUninit},
10    ops::{self, ControlFlow},
11    str,
12    sync::{
13        atomic::{AtomicUsize, Ordering},
14        LazyLock,
15    },
16};
17
18pub use c_lib as c;
19use streaming_iterator::StreamingIterator;
20use thiserror::Error;
21use tree_sitter::{
22    ffi, Language, LossyUtf8, Node, ParseOptions, Parser, Point, Query, QueryCapture,
23    QueryCaptures, QueryCursor, QueryError, QueryMatch, Range, TextProvider, Tree,
24};
25
26const CANCELLATION_CHECK_INTERVAL: usize = 100;
27const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
28const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
29
30static STANDARD_CAPTURE_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
31    vec![
32        "attribute",
33        "boolean",
34        "carriage-return",
35        "comment",
36        "comment.documentation",
37        "constant",
38        "constant.builtin",
39        "constructor",
40        "constructor.builtin",
41        "embedded",
42        "error",
43        "escape",
44        "function",
45        "function.builtin",
46        "keyword",
47        "markup",
48        "markup.bold",
49        "markup.heading",
50        "markup.italic",
51        "markup.link",
52        "markup.link.url",
53        "markup.list",
54        "markup.list.checked",
55        "markup.list.numbered",
56        "markup.list.unchecked",
57        "markup.list.unnumbered",
58        "markup.quote",
59        "markup.raw",
60        "markup.raw.block",
61        "markup.raw.inline",
62        "markup.strikethrough",
63        "module",
64        "number",
65        "operator",
66        "property",
67        "property.builtin",
68        "punctuation",
69        "punctuation.bracket",
70        "punctuation.delimiter",
71        "punctuation.special",
72        "string",
73        "string.escape",
74        "string.regexp",
75        "string.special",
76        "string.special.symbol",
77        "tag",
78        "type",
79        "type.builtin",
80        "variable",
81        "variable.builtin",
82        "variable.member",
83        "variable.parameter",
84    ]
85    .into_iter()
86    .collect()
87});
88
89/// Indicates which highlight should be applied to a region of source code.
90#[derive(Copy, Clone, Debug, PartialEq, Eq)]
91pub struct Highlight(pub usize);
92
93/// Represents the reason why syntax highlighting failed.
94#[derive(Debug, Error, PartialEq, Eq)]
95pub enum Error {
96    #[error("Cancelled")]
97    Cancelled,
98    #[error("Invalid language")]
99    InvalidLanguage,
100    #[error("Unknown error")]
101    Unknown,
102}
103
104/// Represents a single step in rendering a syntax-highlighted document.
105#[derive(Copy, Clone, Debug)]
106pub enum HighlightEvent {
107    Source { start: usize, end: usize },
108    HighlightStart(Highlight),
109    HighlightEnd,
110}
111
112/// Contains the data needed to highlight code written in a particular language.
113///
114/// This struct is immutable and can be shared between threads.
115pub struct HighlightConfiguration {
116    pub language: Language,
117    pub language_name: String,
118    pub query: Query,
119    combined_injections_query: Option<Query>,
120    locals_pattern_index: usize,
121    highlights_pattern_index: usize,
122    highlight_indices: Vec<Option<Highlight>>,
123    non_local_variable_patterns: Vec<bool>,
124    injection_content_capture_index: Option<u32>,
125    injection_language_capture_index: Option<u32>,
126    local_scope_capture_index: Option<u32>,
127    local_def_capture_index: Option<u32>,
128    local_def_value_capture_index: Option<u32>,
129    local_ref_capture_index: Option<u32>,
130}
131
132/// Performs syntax highlighting, recognizing a given list of highlight names.
133///
134/// For the best performance `Highlighter` values should be reused between
135/// syntax highlighting calls. A separate highlighter is needed for each thread that
136/// is performing highlighting.
137pub struct Highlighter {
138    pub parser: Parser,
139    cursors: Vec<QueryCursor>,
140}
141
142/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
143pub struct HtmlRenderer {
144    pub html: Vec<u8>,
145    pub line_offsets: Vec<u32>,
146    carriage_return_highlight: Option<Highlight>,
147    // The offset in `self.html` of the last carriage return.
148    last_carriage_return: Option<usize>,
149}
150
151#[derive(Debug)]
152struct LocalDef<'a> {
153    name: &'a str,
154    value_range: ops::Range<usize>,
155    highlight: Option<Highlight>,
156}
157
158#[derive(Debug)]
159struct LocalScope<'a> {
160    inherits: bool,
161    range: ops::Range<usize>,
162    local_defs: Vec<LocalDef<'a>>,
163}
164
165struct HighlightIter<'a, F>
166where
167    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
168{
169    source: &'a [u8],
170    language_name: &'a str,
171    byte_offset: usize,
172    highlighter: &'a mut Highlighter,
173    injection_callback: F,
174    cancellation_flag: Option<&'a AtomicUsize>,
175    layers: Vec<HighlightIterLayer<'a>>,
176    iter_count: usize,
177    next_event: Option<HighlightEvent>,
178    last_highlight_range: Option<(usize, usize, usize)>,
179}
180
181struct HighlightIterLayer<'a> {
182    _tree: Tree,
183    cursor: QueryCursor,
184    captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
185    config: &'a HighlightConfiguration,
186    highlight_end_stack: Vec<usize>,
187    scope_stack: Vec<LocalScope<'a>>,
188    ranges: Vec<Range>,
189    depth: usize,
190}
191
192pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
193    ptr: *mut ffi::TSQueryCursor,
194    query: &'query Query,
195    text_provider: T,
196    buffer1: Vec<u8>,
197    buffer2: Vec<u8>,
198    _current_match: Option<(QueryMatch<'query, 'tree>, usize)>,
199    _options: Option<*mut ffi::TSQueryCursorOptions>,
200    _phantom: PhantomData<(&'tree (), I)>,
201}
202
203struct _QueryMatch<'cursor, 'tree> {
204    pub _pattern_index: usize,
205    pub _captures: &'cursor [QueryCapture<'tree>],
206    _id: u32,
207    _cursor: *mut ffi::TSQueryCursor,
208}
209
210impl<'tree> _QueryMatch<'_, 'tree> {
211    fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
212        _QueryMatch {
213            _cursor: cursor,
214            _id: m.id,
215            _pattern_index: m.pattern_index as usize,
216            _captures: (m.capture_count > 0)
217                .then(|| unsafe {
218                    slice::from_raw_parts(
219                        m.captures.cast::<QueryCapture<'tree>>(),
220                        m.capture_count as usize,
221                    )
222                })
223                .unwrap_or_default(),
224        }
225    }
226}
227
228impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
229    for _QueryCaptures<'query, 'tree, T, I>
230{
231    type Item = (QueryMatch<'query, 'tree>, usize);
232
233    fn next(&mut self) -> Option<Self::Item> {
234        unsafe {
235            loop {
236                let mut capture_index = 0u32;
237                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
238                if ffi::ts_query_cursor_next_capture(
239                    self.ptr,
240                    m.as_mut_ptr(),
241                    core::ptr::addr_of_mut!(capture_index),
242                ) {
243                    let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new(
244                        &m.assume_init(),
245                        self.ptr,
246                    ));
247                    if result.satisfies_text_predicates(
248                        self.query,
249                        &mut self.buffer1,
250                        &mut self.buffer2,
251                        &mut self.text_provider,
252                    ) {
253                        return Some((result, capture_index as usize));
254                    }
255                    result.remove();
256                } else {
257                    return None;
258                }
259            }
260        }
261    }
262}
263
264impl Default for Highlighter {
265    fn default() -> Self {
266        Self::new()
267    }
268}
269
270impl Highlighter {
271    #[must_use]
272    pub fn new() -> Self {
273        Self {
274            parser: Parser::new(),
275            cursors: Vec::new(),
276        }
277    }
278
279    pub const fn parser(&mut self) -> &mut Parser {
280        &mut self.parser
281    }
282
283    /// Iterate over the highlighted regions for a given slice of source code.
284    pub fn highlight<'a>(
285        &'a mut self,
286        config: &'a HighlightConfiguration,
287        source: &'a [u8],
288        cancellation_flag: Option<&'a AtomicUsize>,
289        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
290    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
291        let layers = HighlightIterLayer::new(
292            source,
293            None,
294            self,
295            cancellation_flag,
296            &mut injection_callback,
297            config,
298            0,
299            vec![Range {
300                start_byte: 0,
301                end_byte: usize::MAX,
302                start_point: Point::new(0, 0),
303                end_point: Point::new(usize::MAX, usize::MAX),
304            }],
305        )?;
306        assert_ne!(layers.len(), 0);
307        let mut result = HighlightIter {
308            source,
309            language_name: &config.language_name,
310            byte_offset: 0,
311            injection_callback,
312            cancellation_flag,
313            highlighter: self,
314            iter_count: 0,
315            layers,
316            next_event: None,
317            last_highlight_range: None,
318        };
319        result.sort_layers();
320        Ok(result)
321    }
322}
323
324impl HighlightConfiguration {
325    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
326    /// queries.
327    ///
328    /// # Parameters
329    ///
330    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
331    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
332    ///   should be non-empty, otherwise no syntax highlights will be added.
333    /// * `injections_query` -  A string containing tree patterns for injecting other languages into
334    ///   the document. This can be empty if no injections are desired.
335    /// * `locals_query` - A string containing tree patterns for tracking local variable definitions
336    ///   and references. This can be empty if local variable tracking is not needed.
337    ///
338    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
339    pub fn new(
340        language: Language,
341        name: impl Into<String>,
342        highlights_query: &str,
343        injection_query: &str,
344        locals_query: &str,
345    ) -> Result<Self, QueryError> {
346        // Concatenate the query strings, keeping track of the start offset of each section.
347        let mut query_source = String::with_capacity(
348            injection_query.len() + locals_query.len() + highlights_query.len(),
349        );
350        query_source.push_str(injection_query);
351        let locals_query_offset = injection_query.len();
352        query_source.push_str(locals_query);
353        let highlights_query_offset = injection_query.len() + locals_query.len();
354        query_source.push_str(highlights_query);
355
356        // Construct a single query by concatenating the three query strings, but record the
357        // range of pattern indices that belong to each individual string.
358        let mut query = Query::new(&language, &query_source)?;
359        let mut locals_pattern_index = 0;
360        let mut highlights_pattern_index = 0;
361        for i in 0..(query.pattern_count()) {
362            let pattern_offset = query.start_byte_for_pattern(i);
363            if pattern_offset < highlights_query_offset {
364                if pattern_offset < highlights_query_offset {
365                    highlights_pattern_index += 1;
366                }
367                if pattern_offset < locals_query_offset {
368                    locals_pattern_index += 1;
369                }
370            }
371        }
372
373        // Construct a separate query just for dealing with the 'combined injections'.
374        // Disable the combined injection patterns in the main query.
375        let mut combined_injections_query = Query::new(&language, injection_query)?;
376        let mut has_combined_queries = false;
377        for pattern_index in 0..locals_pattern_index {
378            let settings = query.property_settings(pattern_index);
379            if settings.iter().any(|s| &*s.key == "injection.combined") {
380                has_combined_queries = true;
381                query.disable_pattern(pattern_index);
382            } else {
383                combined_injections_query.disable_pattern(pattern_index);
384            }
385        }
386        let combined_injections_query = if has_combined_queries {
387            Some(combined_injections_query)
388        } else {
389            None
390        };
391
392        // Find all of the highlighting patterns that are disabled for nodes that
393        // have been identified as local variables.
394        let non_local_variable_patterns = (0..query.pattern_count())
395            .map(|i| {
396                query
397                    .property_predicates(i)
398                    .iter()
399                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
400            })
401            .collect();
402
403        // Store the numeric ids for all of the special captures.
404        let mut injection_content_capture_index = None;
405        let mut injection_language_capture_index = None;
406        let mut local_def_capture_index = None;
407        let mut local_def_value_capture_index = None;
408        let mut local_ref_capture_index = None;
409        let mut local_scope_capture_index = None;
410        for (i, name) in query.capture_names().iter().enumerate() {
411            let i = Some(i as u32);
412            match *name {
413                "injection.content" => injection_content_capture_index = i,
414                "injection.language" => injection_language_capture_index = i,
415                "local.definition" => local_def_capture_index = i,
416                "local.definition-value" => local_def_value_capture_index = i,
417                "local.reference" => local_ref_capture_index = i,
418                "local.scope" => local_scope_capture_index = i,
419                _ => {}
420            }
421        }
422
423        let highlight_indices = vec![None; query.capture_names().len()];
424        Ok(Self {
425            language,
426            language_name: name.into(),
427            query,
428            combined_injections_query,
429            locals_pattern_index,
430            highlights_pattern_index,
431            highlight_indices,
432            non_local_variable_patterns,
433            injection_content_capture_index,
434            injection_language_capture_index,
435            local_def_capture_index,
436            local_def_value_capture_index,
437            local_ref_capture_index,
438            local_scope_capture_index,
439        })
440    }
441
442    /// Get a slice containing all of the highlight names used in the configuration.
443    #[must_use]
444    pub const fn names(&self) -> &[&str] {
445        self.query.capture_names()
446    }
447
448    /// Set the list of recognized highlight names.
449    ///
450    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
451    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
452    /// these queries can choose to recognize highlights with different levels of specificity.
453    /// For example, the string `function.builtin` will match against `function.method.builtin`
454    /// and `function.builtin.constructor`, but will not match `function.method`.
455    ///
456    /// When highlighting, results are returned as `Highlight` values, which contain the index
457    /// of the matched highlight this list of highlight names.
458    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
459        let mut capture_parts = Vec::new();
460        self.highlight_indices.clear();
461        self.highlight_indices
462            .extend(self.query.capture_names().iter().map(move |capture_name| {
463                capture_parts.clear();
464                capture_parts.extend(capture_name.split('.'));
465
466                let mut best_index = None;
467                let mut best_match_len = 0;
468                for (i, recognized_name) in recognized_names.iter().enumerate() {
469                    let mut len = 0;
470                    let mut matches = true;
471                    for part in recognized_name.as_ref().split('.') {
472                        len += 1;
473                        if !capture_parts.contains(&part) {
474                            matches = false;
475                            break;
476                        }
477                    }
478                    if matches && len > best_match_len {
479                        best_index = Some(i);
480                        best_match_len = len;
481                    }
482                }
483                best_index.map(Highlight)
484            }));
485    }
486
487    // Return the list of this configuration's capture names that are neither present in the
488    // list of predefined 'canonical' names nor start with an underscore (denoting 'private'
489    // captures used as part of capture internals).
490    #[must_use]
491    pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
492        let capture_names = if capture_names.is_empty() {
493            &*STANDARD_CAPTURE_NAMES
494        } else {
495            capture_names
496        };
497        self.names()
498            .iter()
499            .filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
500            .copied()
501            .collect()
502    }
503}
504
505impl<'a> HighlightIterLayer<'a> {
506    /// Create a new 'layer' of highlighting for this document.
507    ///
508    /// In the event that the new layer contains "combined injections" (injections where multiple
509    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
510    /// added to the returned vector.
511    #[allow(clippy::too_many_arguments)]
512    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
513        source: &'a [u8],
514        parent_name: Option<&str>,
515        highlighter: &mut Highlighter,
516        cancellation_flag: Option<&'a AtomicUsize>,
517        injection_callback: &mut F,
518        mut config: &'a HighlightConfiguration,
519        mut depth: usize,
520        mut ranges: Vec<Range>,
521    ) -> Result<Vec<Self>, Error> {
522        let mut result = Vec::with_capacity(1);
523        let mut queue = Vec::new();
524        loop {
525            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
526                highlighter
527                    .parser
528                    .set_language(&config.language)
529                    .map_err(|_| Error::InvalidLanguage)?;
530
531                let tree = highlighter
532                    .parser
533                    .parse_with_options(
534                        &mut |i, _| {
535                            if i < source.len() {
536                                &source[i..]
537                            } else {
538                                &[]
539                            }
540                        },
541                        None,
542                        Some(ParseOptions::new().progress_callback(&mut |_| {
543                            if let Some(cancellation_flag) = cancellation_flag {
544                                if cancellation_flag.load(Ordering::SeqCst) != 0 {
545                                    ControlFlow::Break(())
546                                } else {
547                                    ControlFlow::Continue(())
548                                }
549                            } else {
550                                ControlFlow::Continue(())
551                            }
552                        })),
553                    )
554                    .ok_or(Error::Cancelled)?;
555                let mut cursor = highlighter.cursors.pop().unwrap_or_default();
556
557                // Process combined injections.
558                if let Some(combined_injections_query) = &config.combined_injections_query {
559                    let mut injections_by_pattern_index =
560                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
561                    let mut matches =
562                        cursor.matches(combined_injections_query, tree.root_node(), source);
563                    while let Some(mat) = matches.next() {
564                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
565                        let (language_name, content_node, include_children) = injection_for_match(
566                            config,
567                            parent_name,
568                            combined_injections_query,
569                            mat,
570                            source,
571                        );
572                        if language_name.is_some() {
573                            entry.0 = language_name;
574                        }
575                        if let Some(content_node) = content_node {
576                            entry.1.push(content_node);
577                        }
578                        entry.2 = include_children;
579                    }
580                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
581                    {
582                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
583                            if let Some(next_config) = (injection_callback)(lang_name) {
584                                let ranges = Self::intersect_ranges(
585                                    &ranges,
586                                    &content_nodes,
587                                    includes_children,
588                                );
589                                if !ranges.is_empty() {
590                                    queue.push((next_config, depth + 1, ranges));
591                                }
592                            }
593                        }
594                    }
595                }
596
597                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
598                // prevents them from being moved. But both of these values are really just
599                // pointers, so it's actually ok to move them.
600                let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
601                let cursor_ref = unsafe {
602                    mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
603                };
604                let captures = unsafe {
605                    std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
606                        cursor_ref.captures(&config.query, tree_ref.root_node(), source),
607                    )
608                }
609                .peekable();
610
611                result.push(HighlightIterLayer {
612                    highlight_end_stack: Vec::new(),
613                    scope_stack: vec![LocalScope {
614                        inherits: false,
615                        range: 0..usize::MAX,
616                        local_defs: Vec::new(),
617                    }],
618                    cursor,
619                    depth,
620                    _tree: tree,
621                    captures,
622                    config,
623                    ranges,
624                });
625            }
626
627            if queue.is_empty() {
628                break;
629            }
630
631            let (next_config, next_depth, next_ranges) = queue.remove(0);
632            config = next_config;
633            depth = next_depth;
634            ranges = next_ranges;
635        }
636
637        Ok(result)
638    }
639
640    // Compute the ranges that should be included when parsing an injection.
641    // This takes into account three things:
642    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
643    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the
644    //   ranges of those nodes.
645    // * `includes_children` - For some injections, the content nodes' children should be excluded
646    //   from the nested document, so that only the content nodes' *own* content is reparsed. For
647    //   other injections, the content nodes' entire ranges should be reparsed, including the ranges
648    //   of their children.
649    fn intersect_ranges(
650        parent_ranges: &[Range],
651        nodes: &[Node],
652        includes_children: bool,
653    ) -> Vec<Range> {
654        let mut cursor = nodes[0].walk();
655        let mut result = Vec::new();
656        let mut parent_range_iter = parent_ranges.iter();
657        let mut parent_range = parent_range_iter
658            .next()
659            .expect("Layers should only be constructed with non-empty ranges vectors");
660        for node in nodes {
661            let mut preceding_range = Range {
662                start_byte: 0,
663                start_point: Point::new(0, 0),
664                end_byte: node.start_byte(),
665                end_point: node.start_position(),
666            };
667            let following_range = Range {
668                start_byte: node.end_byte(),
669                start_point: node.end_position(),
670                end_byte: usize::MAX,
671                end_point: Point::new(usize::MAX, usize::MAX),
672            };
673
674            for excluded_range in node
675                .children(&mut cursor)
676                .filter_map(|child| {
677                    if includes_children {
678                        None
679                    } else {
680                        Some(child.range())
681                    }
682                })
683                .chain(std::iter::once(following_range))
684            {
685                let mut range = Range {
686                    start_byte: preceding_range.end_byte,
687                    start_point: preceding_range.end_point,
688                    end_byte: excluded_range.start_byte,
689                    end_point: excluded_range.start_point,
690                };
691                preceding_range = excluded_range;
692
693                if range.end_byte < parent_range.start_byte {
694                    continue;
695                }
696
697                while parent_range.start_byte <= range.end_byte {
698                    if parent_range.end_byte > range.start_byte {
699                        if range.start_byte < parent_range.start_byte {
700                            range.start_byte = parent_range.start_byte;
701                            range.start_point = parent_range.start_point;
702                        }
703
704                        if parent_range.end_byte < range.end_byte {
705                            if range.start_byte < parent_range.end_byte {
706                                result.push(Range {
707                                    start_byte: range.start_byte,
708                                    start_point: range.start_point,
709                                    end_byte: parent_range.end_byte,
710                                    end_point: parent_range.end_point,
711                                });
712                            }
713                            range.start_byte = parent_range.end_byte;
714                            range.start_point = parent_range.end_point;
715                        } else {
716                            if range.start_byte < range.end_byte {
717                                result.push(range);
718                            }
719                            break;
720                        }
721                    }
722
723                    if let Some(next_range) = parent_range_iter.next() {
724                        parent_range = next_range;
725                    } else {
726                        return result;
727                    }
728                }
729            }
730        }
731        result
732    }
733
734    // First, sort scope boundaries by their byte offset in the document. At a
735    // given position, emit scope endings before scope beginnings. Finally, emit
736    // scope boundaries from deeper layers first.
737    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
738        let depth = -(self.depth as isize);
739        let next_start = self
740            .captures
741            .peek()
742            .map(|(m, i)| m.captures[*i].node.start_byte());
743        let next_end = self.highlight_end_stack.last().copied();
744        match (next_start, next_end) {
745            (Some(start), Some(end)) => {
746                if start < end {
747                    Some((start, true, depth))
748                } else {
749                    Some((end, false, depth))
750                }
751            }
752            (Some(i), None) => Some((i, true, depth)),
753            (None, Some(j)) => Some((j, false, depth)),
754            _ => None,
755        }
756    }
757}
758
759impl<'a, F> HighlightIter<'a, F>
760where
761    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
762{
763    fn emit_event(
764        &mut self,
765        offset: usize,
766        event: Option<HighlightEvent>,
767    ) -> Option<Result<HighlightEvent, Error>> {
768        let result;
769        if self.byte_offset < offset {
770            result = Some(Ok(HighlightEvent::Source {
771                start: self.byte_offset,
772                end: offset,
773            }));
774            self.byte_offset = offset;
775            self.next_event = event;
776        } else {
777            result = event.map(Ok);
778        }
779        self.sort_layers();
780        result
781    }
782
783    fn sort_layers(&mut self) {
784        while !self.layers.is_empty() {
785            if let Some(sort_key) = self.layers[0].sort_key() {
786                let mut i = 0;
787                while i + 1 < self.layers.len() {
788                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
789                        if next_offset < sort_key {
790                            i += 1;
791                            continue;
792                        }
793                    }
794                    break;
795                }
796                if i > 0 {
797                    self.layers[0..=i].rotate_left(1);
798                }
799                break;
800            }
801            let layer = self.layers.remove(0);
802            self.highlighter.cursors.push(layer.cursor);
803        }
804    }
805
806    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
807        if let Some(sort_key) = layer.sort_key() {
808            let mut i = 1;
809            while i < self.layers.len() {
810                if let Some(sort_key_i) = self.layers[i].sort_key() {
811                    if sort_key_i > sort_key {
812                        self.layers.insert(i, layer);
813                        return;
814                    }
815                    i += 1;
816                } else {
817                    self.layers.remove(i);
818                }
819            }
820            self.layers.push(layer);
821        }
822    }
823}
824
825impl<'a, F> Iterator for HighlightIter<'a, F>
826where
827    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
828{
829    type Item = Result<HighlightEvent, Error>;
830
831    fn next(&mut self) -> Option<Self::Item> {
832        'main: loop {
833            // If we've already determined the next highlight boundary, just return it.
834            if let Some(e) = self.next_event.take() {
835                return Some(Ok(e));
836            }
837
838            // Periodically check for cancellation, returning `Cancelled` error if the
839            // cancellation flag was flipped.
840            if let Some(cancellation_flag) = self.cancellation_flag {
841                self.iter_count += 1;
842                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
843                    self.iter_count = 0;
844                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
845                        return Some(Err(Error::Cancelled));
846                    }
847                }
848            }
849
850            // If none of the layers have any more highlight boundaries, terminate.
851            if self.layers.is_empty() {
852                return if self.byte_offset < self.source.len() {
853                    let result = Some(Ok(HighlightEvent::Source {
854                        start: self.byte_offset,
855                        end: self.source.len(),
856                    }));
857                    self.byte_offset = self.source.len();
858                    result
859                } else {
860                    None
861                };
862            }
863
864            // Get the next capture from whichever layer has the earliest highlight boundary.
865            let range;
866            let layer = &mut self.layers[0];
867            if let Some((next_match, capture_index)) = layer.captures.peek() {
868                let next_capture = next_match.captures[*capture_index];
869                range = next_capture.node.byte_range();
870
871                // If any previous highlight ends before this node starts, then before
872                // processing this capture, emit the source code up until the end of the
873                // previous highlight, and an end event for that highlight.
874                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
875                    if end_byte <= range.start {
876                        layer.highlight_end_stack.pop();
877                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
878                    }
879                }
880            }
881            // If there are no more captures, then emit any remaining highlight end events.
882            // And if there are none of those, then just advance to the end of the document.
883            else {
884                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
885                    layer.highlight_end_stack.pop();
886                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
887                }
888                return self.emit_event(self.source.len(), None);
889            }
890
891            let (mut match_, capture_index) = layer.captures.next().unwrap();
892            let mut capture = match_.captures[capture_index];
893
894            // If this capture represents an injection, then process the injection.
895            if match_.pattern_index < layer.config.locals_pattern_index {
896                let (language_name, content_node, include_children) = injection_for_match(
897                    layer.config,
898                    Some(self.language_name),
899                    &layer.config.query,
900                    &match_,
901                    self.source,
902                );
903
904                // Explicitly remove this match so that none of its other captures will remain
905                // in the stream of captures.
906                match_.remove();
907
908                // If a language is found with the given name, then add a new language layer
909                // to the highlighted document.
910                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
911                    if let Some(config) = (self.injection_callback)(language_name) {
912                        let ranges = HighlightIterLayer::intersect_ranges(
913                            &self.layers[0].ranges,
914                            &[content_node],
915                            include_children,
916                        );
917                        if !ranges.is_empty() {
918                            match HighlightIterLayer::new(
919                                self.source,
920                                Some(self.language_name),
921                                self.highlighter,
922                                self.cancellation_flag,
923                                &mut self.injection_callback,
924                                config,
925                                self.layers[0].depth + 1,
926                                ranges,
927                            ) {
928                                Ok(layers) => {
929                                    for layer in layers {
930                                        self.insert_layer(layer);
931                                    }
932                                }
933                                Err(e) => return Some(Err(e)),
934                            }
935                        }
936                    }
937                }
938
939                self.sort_layers();
940                continue 'main;
941            }
942
943            // Remove from the local scope stack any local scopes that have already ended.
944            while range.start > layer.scope_stack.last().unwrap().range.end {
945                layer.scope_stack.pop();
946            }
947
948            // If this capture is for tracking local variables, then process the
949            // local variable info.
950            let mut reference_highlight = None;
951            let mut definition_highlight = None;
952            while match_.pattern_index < layer.config.highlights_pattern_index {
953                // If the node represents a local scope, push a new local scope onto
954                // the scope stack.
955                if Some(capture.index) == layer.config.local_scope_capture_index {
956                    definition_highlight = None;
957                    let mut scope = LocalScope {
958                        inherits: true,
959                        range: range.clone(),
960                        local_defs: Vec::new(),
961                    };
962                    for prop in layer.config.query.property_settings(match_.pattern_index) {
963                        if prop.key.as_ref() == "local.scope-inherits" {
964                            scope.inherits =
965                                prop.value.as_ref().is_none_or(|r| r.as_ref() == "true");
966                        }
967                    }
968                    layer.scope_stack.push(scope);
969                }
970                // If the node represents a definition, add a new definition to the
971                // local scope at the top of the scope stack.
972                else if Some(capture.index) == layer.config.local_def_capture_index {
973                    reference_highlight = None;
974                    definition_highlight = None;
975                    let scope = layer.scope_stack.last_mut().unwrap();
976
977                    let mut value_range = 0..0;
978                    for capture in match_.captures {
979                        if Some(capture.index) == layer.config.local_def_value_capture_index {
980                            value_range = capture.node.byte_range();
981                        }
982                    }
983
984                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
985                        scope.local_defs.push(LocalDef {
986                            name,
987                            value_range,
988                            highlight: None,
989                        });
990                        definition_highlight =
991                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
992                    }
993                }
994                // If the node represents a reference, then try to find the corresponding
995                // definition in the scope stack.
996                else if Some(capture.index) == layer.config.local_ref_capture_index
997                    && definition_highlight.is_none()
998                {
999                    definition_highlight = None;
1000                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
1001                        for scope in layer.scope_stack.iter().rev() {
1002                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
1003                                if def.name == name && range.start >= def.value_range.end {
1004                                    Some(def.highlight)
1005                                } else {
1006                                    None
1007                                }
1008                            }) {
1009                                reference_highlight = highlight;
1010                                break;
1011                            }
1012                            if !scope.inherits {
1013                                break;
1014                            }
1015                        }
1016                    }
1017                }
1018
1019                // Continue processing any additional matches for the same node.
1020                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
1021                    let next_capture = next_match.captures[*next_capture_index];
1022                    if next_capture.node == capture.node {
1023                        capture = next_capture;
1024                        match_ = layer.captures.next().unwrap().0;
1025                        continue;
1026                    }
1027                }
1028
1029                self.sort_layers();
1030                continue 'main;
1031            }
1032
1033            // Otherwise, this capture must represent a highlight.
1034            // If this exact range has already been highlighted by an earlier pattern, or by
1035            // a different layer, then skip over this one.
1036            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
1037                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
1038                    self.sort_layers();
1039                    continue 'main;
1040                }
1041            }
1042
1043            // Once a highlighting pattern is found for the current node, keep iterating over
1044            // any later highlighting patterns that also match this node and set the match to it.
1045            // Captures for a given node are ordered by pattern index, so these subsequent
1046            // captures are guaranteed to be for highlighting, not injections or
1047            // local variables.
1048            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
1049                let next_capture = next_match.captures[*next_capture_index];
1050                if next_capture.node == capture.node {
1051                    let following_match = layer.captures.next().unwrap().0;
1052                    // If the current node was found to be a local variable, then ignore
1053                    // the following match if it's a highlighting pattern that is disabled
1054                    // for local variables.
1055                    if (definition_highlight.is_some() || reference_highlight.is_some())
1056                        && layer.config.non_local_variable_patterns[following_match.pattern_index]
1057                    {
1058                        continue;
1059                    }
1060                    match_.remove();
1061                    capture = next_capture;
1062                    match_ = following_match;
1063                } else {
1064                    break;
1065                }
1066            }
1067
1068            let current_highlight = layer.config.highlight_indices[capture.index as usize];
1069
1070            // If this node represents a local definition, then store the current
1071            // highlight value on the local scope entry representing this node.
1072            if let Some(definition_highlight) = definition_highlight {
1073                *definition_highlight = current_highlight;
1074            }
1075
1076            // Emit a scope start event and push the node's end position to the stack.
1077            if let Some(highlight) = reference_highlight.or(current_highlight) {
1078                self.last_highlight_range = Some((range.start, range.end, layer.depth));
1079                layer.highlight_end_stack.push(range.end);
1080                return self
1081                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
1082            }
1083
1084            self.sort_layers();
1085        }
1086    }
1087}
1088
1089impl Default for HtmlRenderer {
1090    fn default() -> Self {
1091        Self::new()
1092    }
1093}
1094
1095impl HtmlRenderer {
1096    #[must_use]
1097    pub fn new() -> Self {
1098        let mut result = Self {
1099            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
1100            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
1101            carriage_return_highlight: None,
1102            last_carriage_return: None,
1103        };
1104        result.line_offsets.push(0);
1105        result
1106    }
1107
1108    pub const fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
1109        self.carriage_return_highlight = highlight;
1110    }
1111
1112    pub fn reset(&mut self) {
1113        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
1114        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
1115        self.line_offsets.push(0);
1116    }
1117
1118    pub fn render<F>(
1119        &mut self,
1120        highlighter: impl Iterator<Item = Result<HighlightEvent, Error>>,
1121        source: &[u8],
1122        attribute_callback: &F,
1123    ) -> Result<(), Error>
1124    where
1125        F: Fn(Highlight, &mut Vec<u8>),
1126    {
1127        let mut highlights = Vec::new();
1128        for event in highlighter {
1129            match event {
1130                Ok(HighlightEvent::HighlightStart(s)) => {
1131                    highlights.push(s);
1132                    self.start_highlight(s, &attribute_callback);
1133                }
1134                Ok(HighlightEvent::HighlightEnd) => {
1135                    highlights.pop();
1136                    self.end_highlight();
1137                }
1138                Ok(HighlightEvent::Source { start, end }) => {
1139                    self.add_text(&source[start..end], &highlights, &attribute_callback);
1140                }
1141                Err(a) => return Err(a),
1142            }
1143        }
1144        if let Some(offset) = self.last_carriage_return.take() {
1145            self.add_carriage_return(offset, attribute_callback);
1146        }
1147        if self.html.last() != Some(&b'\n') {
1148            self.html.push(b'\n');
1149        }
1150        if self.line_offsets.last() == Some(&(self.html.len() as u32)) {
1151            self.line_offsets.pop();
1152        }
1153        Ok(())
1154    }
1155
1156    pub fn lines(&self) -> impl Iterator<Item = &str> {
1157        self.line_offsets
1158            .iter()
1159            .enumerate()
1160            .map(move |(i, line_start)| {
1161                let line_start = *line_start as usize;
1162                let line_end = if i + 1 == self.line_offsets.len() {
1163                    self.html.len()
1164                } else {
1165                    self.line_offsets[i + 1] as usize
1166                };
1167                str::from_utf8(&self.html[line_start..line_end]).unwrap()
1168            })
1169    }
1170
1171    fn add_carriage_return<F>(&mut self, offset: usize, attribute_callback: &F)
1172    where
1173        F: Fn(Highlight, &mut Vec<u8>),
1174    {
1175        if let Some(highlight) = self.carriage_return_highlight {
1176            // If a CR is the last character in a `HighlightEvent::Source`
1177            // region, then we don't know until the next `Source` event or EOF
1178            // whether it is part of CRLF or on its own. To avoid unbounded
1179            // lookahead, save the offset of the CR and insert there now that we
1180            // know.
1181            let rest = self.html.split_off(offset);
1182            self.html.extend(b"<span ");
1183            (attribute_callback)(highlight, &mut self.html);
1184            self.html.extend(b"></span>");
1185            self.html.extend(rest);
1186        }
1187    }
1188
1189    fn start_highlight<F>(&mut self, h: Highlight, attribute_callback: &F)
1190    where
1191        F: Fn(Highlight, &mut Vec<u8>),
1192    {
1193        self.html.extend(b"<span ");
1194        (attribute_callback)(h, &mut self.html);
1195        self.html.extend(b">");
1196    }
1197
1198    fn end_highlight(&mut self) {
1199        self.html.extend(b"</span>");
1200    }
1201
1202    fn add_text<F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F)
1203    where
1204        F: Fn(Highlight, &mut Vec<u8>),
1205    {
1206        pub const fn html_escape(c: u8) -> Option<&'static [u8]> {
1207            match c as char {
1208                '>' => Some(b"&gt;"),
1209                '<' => Some(b"&lt;"),
1210                '&' => Some(b"&amp;"),
1211                '\'' => Some(b"&#39;"),
1212                '"' => Some(b"&quot;"),
1213                _ => None,
1214            }
1215        }
1216
1217        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
1218            // Don't render carriage return characters, but allow lone carriage returns (not
1219            // followed by line feeds) to be styled via the attribute callback.
1220            if c == b'\r' {
1221                self.last_carriage_return = Some(self.html.len());
1222                continue;
1223            }
1224            if let Some(offset) = self.last_carriage_return.take() {
1225                if c != b'\n' {
1226                    self.add_carriage_return(offset, attribute_callback);
1227                }
1228            }
1229
1230            // At line boundaries, close and re-open all of the open tags.
1231            if c == b'\n' {
1232                for _ in highlights {
1233                    self.end_highlight();
1234                }
1235                self.html.push(c);
1236                self.line_offsets.push(self.html.len() as u32);
1237                for scope in highlights {
1238                    self.start_highlight(*scope, attribute_callback);
1239                }
1240            } else if let Some(escape) = html_escape(c) {
1241                self.html.extend_from_slice(escape);
1242            } else {
1243                self.html.push(c);
1244            }
1245        }
1246    }
1247}
1248
1249fn injection_for_match<'a>(
1250    config: &'a HighlightConfiguration,
1251    parent_name: Option<&'a str>,
1252    query: &'a Query,
1253    query_match: &QueryMatch<'a, 'a>,
1254    source: &'a [u8],
1255) -> (Option<&'a str>, Option<Node<'a>>, bool) {
1256    let content_capture_index = config.injection_content_capture_index;
1257    let language_capture_index = config.injection_language_capture_index;
1258
1259    let mut language_name = None;
1260    let mut content_node = None;
1261
1262    for capture in query_match.captures {
1263        let index = Some(capture.index);
1264        if index == language_capture_index {
1265            language_name = capture.node.utf8_text(source).ok();
1266        } else if index == content_capture_index {
1267            content_node = Some(capture.node);
1268        }
1269    }
1270
1271    let mut include_children = false;
1272    for prop in query.property_settings(query_match.pattern_index) {
1273        match prop.key.as_ref() {
1274            // In addition to specifying the language name via the text of a
1275            // captured node, it can also be hard-coded via a `#set!` predicate
1276            // that sets the injection.language key.
1277            "injection.language" => {
1278                if language_name.is_none() {
1279                    language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
1280                }
1281            }
1282
1283            // Setting the `injection.self` key can be used to specify that the
1284            // language name should be the same as the language of the current
1285            // layer.
1286            "injection.self" => {
1287                if language_name.is_none() {
1288                    language_name = Some(config.language_name.as_str());
1289                }
1290            }
1291
1292            // Setting the `injection.parent` key can be used to specify that
1293            // the language name should be the same as the language of the
1294            // parent layer
1295            "injection.parent" => {
1296                if language_name.is_none() {
1297                    language_name = parent_name;
1298                }
1299            }
1300
1301            // By default, injections do not include the *children* of an
1302            // `injection.content` node - only the ranges that belong to the
1303            // node itself. This can be changed using a `#set!` predicate that
1304            // sets the `injection.include-children` key.
1305            "injection.include-children" => include_children = true,
1306            _ => {}
1307        }
1308    }
1309
1310    (language_name, content_node, include_children)
1311}
1312
1313fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
1314    if vec.len() > capacity {
1315        vec.truncate(capacity);
1316        vec.shrink_to_fit();
1317    }
1318    vec.clear();
1319}