tree_sitter_highlight/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod c_lib;
4use core::slice;
5use std::{
6    collections::HashSet,
7    iter,
8    marker::PhantomData,
9    mem::{self, MaybeUninit},
10    ops, str,
11    sync::{
12        atomic::{AtomicUsize, Ordering},
13        LazyLock,
14    },
15};
16
17pub use c_lib as c;
18use streaming_iterator::StreamingIterator;
19use thiserror::Error;
20use tree_sitter::{
21    ffi, Language, LossyUtf8, Node, ParseOptions, Parser, Point, Query, QueryCapture,
22    QueryCaptures, QueryCursor, QueryError, QueryMatch, Range, TextProvider, Tree,
23};
24
25const CANCELLATION_CHECK_INTERVAL: usize = 100;
26const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
27const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
28
29static STANDARD_CAPTURE_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
30    vec![
31        "attribute",
32        "boolean",
33        "carriage-return",
34        "comment",
35        "comment.documentation",
36        "constant",
37        "constant.builtin",
38        "constructor",
39        "constructor.builtin",
40        "embedded",
41        "error",
42        "escape",
43        "function",
44        "function.builtin",
45        "keyword",
46        "markup",
47        "markup.bold",
48        "markup.heading",
49        "markup.italic",
50        "markup.link",
51        "markup.link.url",
52        "markup.list",
53        "markup.list.checked",
54        "markup.list.numbered",
55        "markup.list.unchecked",
56        "markup.list.unnumbered",
57        "markup.quote",
58        "markup.raw",
59        "markup.raw.block",
60        "markup.raw.inline",
61        "markup.strikethrough",
62        "module",
63        "number",
64        "operator",
65        "property",
66        "property.builtin",
67        "punctuation",
68        "punctuation.bracket",
69        "punctuation.delimiter",
70        "punctuation.special",
71        "string",
72        "string.escape",
73        "string.regexp",
74        "string.special",
75        "string.special.symbol",
76        "tag",
77        "type",
78        "type.builtin",
79        "variable",
80        "variable.builtin",
81        "variable.member",
82        "variable.parameter",
83    ]
84    .into_iter()
85    .collect()
86});
87
88/// Indicates which highlight should be applied to a region of source code.
89#[derive(Copy, Clone, Debug, PartialEq, Eq)]
90pub struct Highlight(pub usize);
91
92/// Represents the reason why syntax highlighting failed.
93#[derive(Debug, Error, PartialEq, Eq)]
94pub enum Error {
95    #[error("Cancelled")]
96    Cancelled,
97    #[error("Invalid language")]
98    InvalidLanguage,
99    #[error("Unknown error")]
100    Unknown,
101}
102
103/// Represents a single step in rendering a syntax-highlighted document.
104#[derive(Copy, Clone, Debug)]
105pub enum HighlightEvent {
106    Source { start: usize, end: usize },
107    HighlightStart(Highlight),
108    HighlightEnd,
109}
110
111/// Contains the data needed to highlight code written in a particular language.
112///
113/// This struct is immutable and can be shared between threads.
114pub struct HighlightConfiguration {
115    pub language: Language,
116    pub language_name: String,
117    pub query: Query,
118    combined_injections_query: Option<Query>,
119    locals_pattern_index: usize,
120    highlights_pattern_index: usize,
121    highlight_indices: Vec<Option<Highlight>>,
122    non_local_variable_patterns: Vec<bool>,
123    injection_content_capture_index: Option<u32>,
124    injection_language_capture_index: Option<u32>,
125    local_scope_capture_index: Option<u32>,
126    local_def_capture_index: Option<u32>,
127    local_def_value_capture_index: Option<u32>,
128    local_ref_capture_index: Option<u32>,
129}
130
131/// Performs syntax highlighting, recognizing a given list of highlight names.
132///
133/// For the best performance `Highlighter` values should be reused between
134/// syntax highlighting calls. A separate highlighter is needed for each thread that
135/// is performing highlighting.
136pub struct Highlighter {
137    pub parser: Parser,
138    cursors: Vec<QueryCursor>,
139}
140
141/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
142pub struct HtmlRenderer {
143    pub html: Vec<u8>,
144    pub line_offsets: Vec<u32>,
145    carriage_return_highlight: Option<Highlight>,
146    // The offset in `self.html` of the last carriage return.
147    last_carriage_return: Option<usize>,
148}
149
150#[derive(Debug)]
151struct LocalDef<'a> {
152    name: &'a str,
153    value_range: ops::Range<usize>,
154    highlight: Option<Highlight>,
155}
156
157#[derive(Debug)]
158struct LocalScope<'a> {
159    inherits: bool,
160    range: ops::Range<usize>,
161    local_defs: Vec<LocalDef<'a>>,
162}
163
164struct HighlightIter<'a, F>
165where
166    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
167{
168    source: &'a [u8],
169    language_name: &'a str,
170    byte_offset: usize,
171    highlighter: &'a mut Highlighter,
172    injection_callback: F,
173    cancellation_flag: Option<&'a AtomicUsize>,
174    layers: Vec<HighlightIterLayer<'a>>,
175    iter_count: usize,
176    next_event: Option<HighlightEvent>,
177    last_highlight_range: Option<(usize, usize, usize)>,
178}
179
180struct HighlightIterLayer<'a> {
181    _tree: Tree,
182    cursor: QueryCursor,
183    captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
184    config: &'a HighlightConfiguration,
185    highlight_end_stack: Vec<usize>,
186    scope_stack: Vec<LocalScope<'a>>,
187    ranges: Vec<Range>,
188    depth: usize,
189}
190
191pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
192    ptr: *mut ffi::TSQueryCursor,
193    query: &'query Query,
194    text_provider: T,
195    buffer1: Vec<u8>,
196    buffer2: Vec<u8>,
197    _current_match: Option<(QueryMatch<'query, 'tree>, usize)>,
198    _options: Option<*mut ffi::TSQueryCursorOptions>,
199    _phantom: PhantomData<(&'tree (), I)>,
200}
201
202struct _QueryMatch<'cursor, 'tree> {
203    pub _pattern_index: usize,
204    pub _captures: &'cursor [QueryCapture<'tree>],
205    _id: u32,
206    _cursor: *mut ffi::TSQueryCursor,
207}
208
209impl<'tree> _QueryMatch<'_, 'tree> {
210    fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
211        _QueryMatch {
212            _cursor: cursor,
213            _id: m.id,
214            _pattern_index: m.pattern_index as usize,
215            _captures: (m.capture_count > 0)
216                .then(|| unsafe {
217                    slice::from_raw_parts(
218                        m.captures.cast::<QueryCapture<'tree>>(),
219                        m.capture_count as usize,
220                    )
221                })
222                .unwrap_or_default(),
223        }
224    }
225}
226
227impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
228    for _QueryCaptures<'query, 'tree, T, I>
229{
230    type Item = (QueryMatch<'query, 'tree>, usize);
231
232    fn next(&mut self) -> Option<Self::Item> {
233        unsafe {
234            loop {
235                let mut capture_index = 0u32;
236                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
237                if ffi::ts_query_cursor_next_capture(
238                    self.ptr,
239                    m.as_mut_ptr(),
240                    core::ptr::addr_of_mut!(capture_index),
241                ) {
242                    let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new(
243                        &m.assume_init(),
244                        self.ptr,
245                    ));
246                    if result.satisfies_text_predicates(
247                        self.query,
248                        &mut self.buffer1,
249                        &mut self.buffer2,
250                        &mut self.text_provider,
251                    ) {
252                        return Some((result, capture_index as usize));
253                    }
254                    result.remove();
255                } else {
256                    return None;
257                }
258            }
259        }
260    }
261}
262
263impl Default for Highlighter {
264    fn default() -> Self {
265        Self::new()
266    }
267}
268
269impl Highlighter {
270    #[must_use]
271    pub fn new() -> Self {
272        Self {
273            parser: Parser::new(),
274            cursors: Vec::new(),
275        }
276    }
277
278    pub fn parser(&mut self) -> &mut Parser {
279        &mut self.parser
280    }
281
282    /// Iterate over the highlighted regions for a given slice of source code.
283    pub fn highlight<'a>(
284        &'a mut self,
285        config: &'a HighlightConfiguration,
286        source: &'a [u8],
287        cancellation_flag: Option<&'a AtomicUsize>,
288        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
289    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
290        let layers = HighlightIterLayer::new(
291            source,
292            None,
293            self,
294            cancellation_flag,
295            &mut injection_callback,
296            config,
297            0,
298            vec![Range {
299                start_byte: 0,
300                end_byte: usize::MAX,
301                start_point: Point::new(0, 0),
302                end_point: Point::new(usize::MAX, usize::MAX),
303            }],
304        )?;
305        assert_ne!(layers.len(), 0);
306        let mut result = HighlightIter {
307            source,
308            language_name: &config.language_name,
309            byte_offset: 0,
310            injection_callback,
311            cancellation_flag,
312            highlighter: self,
313            iter_count: 0,
314            layers,
315            next_event: None,
316            last_highlight_range: None,
317        };
318        result.sort_layers();
319        Ok(result)
320    }
321}
322
323impl HighlightConfiguration {
324    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
325    /// queries.
326    ///
327    /// # Parameters
328    ///
329    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
330    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
331    ///   should be non-empty, otherwise no syntax highlights will be added.
332    /// * `injections_query` -  A string containing tree patterns for injecting other languages into
333    ///   the document. This can be empty if no injections are desired.
334    /// * `locals_query` - A string containing tree patterns for tracking local variable definitions
335    ///   and references. This can be empty if local variable tracking is not needed.
336    ///
337    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
338    pub fn new(
339        language: Language,
340        name: impl Into<String>,
341        highlights_query: &str,
342        injection_query: &str,
343        locals_query: &str,
344    ) -> Result<Self, QueryError> {
345        // Concatenate the query strings, keeping track of the start offset of each section.
346        let mut query_source = String::new();
347        query_source.push_str(injection_query);
348        let locals_query_offset = query_source.len();
349        query_source.push_str(locals_query);
350        let highlights_query_offset = query_source.len();
351        query_source.push_str(highlights_query);
352
353        // Construct a single query by concatenating the three query strings, but record the
354        // range of pattern indices that belong to each individual string.
355        let mut query = Query::new(&language, &query_source)?;
356        let mut locals_pattern_index = 0;
357        let mut highlights_pattern_index = 0;
358        for i in 0..(query.pattern_count()) {
359            let pattern_offset = query.start_byte_for_pattern(i);
360            if pattern_offset < highlights_query_offset {
361                if pattern_offset < highlights_query_offset {
362                    highlights_pattern_index += 1;
363                }
364                if pattern_offset < locals_query_offset {
365                    locals_pattern_index += 1;
366                }
367            }
368        }
369
370        // Construct a separate query just for dealing with the 'combined injections'.
371        // Disable the combined injection patterns in the main query.
372        let mut combined_injections_query = Query::new(&language, injection_query)?;
373        let mut has_combined_queries = false;
374        for pattern_index in 0..locals_pattern_index {
375            let settings = query.property_settings(pattern_index);
376            if settings.iter().any(|s| &*s.key == "injection.combined") {
377                has_combined_queries = true;
378                query.disable_pattern(pattern_index);
379            } else {
380                combined_injections_query.disable_pattern(pattern_index);
381            }
382        }
383        let combined_injections_query = if has_combined_queries {
384            Some(combined_injections_query)
385        } else {
386            None
387        };
388
389        // Find all of the highlighting patterns that are disabled for nodes that
390        // have been identified as local variables.
391        let non_local_variable_patterns = (0..query.pattern_count())
392            .map(|i| {
393                query
394                    .property_predicates(i)
395                    .iter()
396                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
397            })
398            .collect();
399
400        // Store the numeric ids for all of the special captures.
401        let mut injection_content_capture_index = None;
402        let mut injection_language_capture_index = None;
403        let mut local_def_capture_index = None;
404        let mut local_def_value_capture_index = None;
405        let mut local_ref_capture_index = None;
406        let mut local_scope_capture_index = None;
407        for (i, name) in query.capture_names().iter().enumerate() {
408            let i = Some(i as u32);
409            match *name {
410                "injection.content" => injection_content_capture_index = i,
411                "injection.language" => injection_language_capture_index = i,
412                "local.definition" => local_def_capture_index = i,
413                "local.definition-value" => local_def_value_capture_index = i,
414                "local.reference" => local_ref_capture_index = i,
415                "local.scope" => local_scope_capture_index = i,
416                _ => {}
417            }
418        }
419
420        let highlight_indices = vec![None; query.capture_names().len()];
421        Ok(Self {
422            language,
423            language_name: name.into(),
424            query,
425            combined_injections_query,
426            locals_pattern_index,
427            highlights_pattern_index,
428            highlight_indices,
429            non_local_variable_patterns,
430            injection_content_capture_index,
431            injection_language_capture_index,
432            local_def_capture_index,
433            local_def_value_capture_index,
434            local_ref_capture_index,
435            local_scope_capture_index,
436        })
437    }
438
439    /// Get a slice containing all of the highlight names used in the configuration.
440    #[must_use]
441    pub const fn names(&self) -> &[&str] {
442        self.query.capture_names()
443    }
444
445    /// Set the list of recognized highlight names.
446    ///
447    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
448    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
449    /// these queries can choose to recognize highlights with different levels of specificity.
450    /// For example, the string `function.builtin` will match against `function.method.builtin`
451    /// and `function.builtin.constructor`, but will not match `function.method`.
452    ///
453    /// When highlighting, results are returned as `Highlight` values, which contain the index
454    /// of the matched highlight this list of highlight names.
455    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
456        let mut capture_parts = Vec::new();
457        self.highlight_indices.clear();
458        self.highlight_indices
459            .extend(self.query.capture_names().iter().map(move |capture_name| {
460                capture_parts.clear();
461                capture_parts.extend(capture_name.split('.'));
462
463                let mut best_index = None;
464                let mut best_match_len = 0;
465                for (i, recognized_name) in recognized_names.iter().enumerate() {
466                    let mut len = 0;
467                    let mut matches = true;
468                    for part in recognized_name.as_ref().split('.') {
469                        len += 1;
470                        if !capture_parts.contains(&part) {
471                            matches = false;
472                            break;
473                        }
474                    }
475                    if matches && len > best_match_len {
476                        best_index = Some(i);
477                        best_match_len = len;
478                    }
479                }
480                best_index.map(Highlight)
481            }));
482    }
483
484    // Return the list of this configuration's capture names that are neither present in the
485    // list of predefined 'canonical' names nor start with an underscore (denoting 'private'
486    // captures used as part of capture internals).
487    #[must_use]
488    pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
489        let capture_names = if capture_names.is_empty() {
490            &*STANDARD_CAPTURE_NAMES
491        } else {
492            capture_names
493        };
494        self.names()
495            .iter()
496            .filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
497            .copied()
498            .collect()
499    }
500}
501
502impl<'a> HighlightIterLayer<'a> {
503    /// Create a new 'layer' of highlighting for this document.
504    ///
505    /// In the event that the new layer contains "combined injections" (injections where multiple
506    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
507    /// added to the returned vector.
508    #[allow(clippy::too_many_arguments)]
509    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
510        source: &'a [u8],
511        parent_name: Option<&str>,
512        highlighter: &mut Highlighter,
513        cancellation_flag: Option<&'a AtomicUsize>,
514        injection_callback: &mut F,
515        mut config: &'a HighlightConfiguration,
516        mut depth: usize,
517        mut ranges: Vec<Range>,
518    ) -> Result<Vec<Self>, Error> {
519        let mut result = Vec::with_capacity(1);
520        let mut queue = Vec::new();
521        loop {
522            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
523                highlighter
524                    .parser
525                    .set_language(&config.language)
526                    .map_err(|_| Error::InvalidLanguage)?;
527
528                let tree = highlighter
529                    .parser
530                    .parse_with_options(
531                        &mut |i, _| {
532                            if i < source.len() {
533                                &source[i..]
534                            } else {
535                                &[]
536                            }
537                        },
538                        None,
539                        Some(ParseOptions::new().progress_callback(&mut |_| {
540                            if let Some(cancellation_flag) = cancellation_flag {
541                                cancellation_flag.load(Ordering::SeqCst) != 0
542                            } else {
543                                false
544                            }
545                        })),
546                    )
547                    .ok_or(Error::Cancelled)?;
548                let mut cursor = highlighter.cursors.pop().unwrap_or_default();
549
550                // Process combined injections.
551                if let Some(combined_injections_query) = &config.combined_injections_query {
552                    let mut injections_by_pattern_index =
553                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
554                    let mut matches =
555                        cursor.matches(combined_injections_query, tree.root_node(), source);
556                    while let Some(mat) = matches.next() {
557                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
558                        let (language_name, content_node, include_children) = injection_for_match(
559                            config,
560                            parent_name,
561                            combined_injections_query,
562                            mat,
563                            source,
564                        );
565                        if language_name.is_some() {
566                            entry.0 = language_name;
567                        }
568                        if let Some(content_node) = content_node {
569                            entry.1.push(content_node);
570                        }
571                        entry.2 = include_children;
572                    }
573                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
574                    {
575                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
576                            if let Some(next_config) = (injection_callback)(lang_name) {
577                                let ranges = Self::intersect_ranges(
578                                    &ranges,
579                                    &content_nodes,
580                                    includes_children,
581                                );
582                                if !ranges.is_empty() {
583                                    queue.push((next_config, depth + 1, ranges));
584                                }
585                            }
586                        }
587                    }
588                }
589
590                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
591                // prevents them from being moved. But both of these values are really just
592                // pointers, so it's actually ok to move them.
593                let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
594                let cursor_ref = unsafe {
595                    mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
596                };
597                let captures = unsafe {
598                    std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
599                        cursor_ref.captures(&config.query, tree_ref.root_node(), source),
600                    )
601                }
602                .peekable();
603
604                result.push(HighlightIterLayer {
605                    highlight_end_stack: Vec::new(),
606                    scope_stack: vec![LocalScope {
607                        inherits: false,
608                        range: 0..usize::MAX,
609                        local_defs: Vec::new(),
610                    }],
611                    cursor,
612                    depth,
613                    _tree: tree,
614                    captures,
615                    config,
616                    ranges,
617                });
618            }
619
620            if queue.is_empty() {
621                break;
622            }
623
624            let (next_config, next_depth, next_ranges) = queue.remove(0);
625            config = next_config;
626            depth = next_depth;
627            ranges = next_ranges;
628        }
629
630        Ok(result)
631    }
632
633    // Compute the ranges that should be included when parsing an injection.
634    // This takes into account three things:
635    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
636    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the
637    //   ranges of those nodes.
638    // * `includes_children` - For some injections, the content nodes' children should be excluded
639    //   from the nested document, so that only the content nodes' *own* content is reparsed. For
640    //   other injections, the content nodes' entire ranges should be reparsed, including the ranges
641    //   of their children.
642    fn intersect_ranges(
643        parent_ranges: &[Range],
644        nodes: &[Node],
645        includes_children: bool,
646    ) -> Vec<Range> {
647        let mut cursor = nodes[0].walk();
648        let mut result = Vec::new();
649        let mut parent_range_iter = parent_ranges.iter();
650        let mut parent_range = parent_range_iter
651            .next()
652            .expect("Layers should only be constructed with non-empty ranges vectors");
653        for node in nodes {
654            let mut preceding_range = Range {
655                start_byte: 0,
656                start_point: Point::new(0, 0),
657                end_byte: node.start_byte(),
658                end_point: node.start_position(),
659            };
660            let following_range = Range {
661                start_byte: node.end_byte(),
662                start_point: node.end_position(),
663                end_byte: usize::MAX,
664                end_point: Point::new(usize::MAX, usize::MAX),
665            };
666
667            for excluded_range in node
668                .children(&mut cursor)
669                .filter_map(|child| {
670                    if includes_children {
671                        None
672                    } else {
673                        Some(child.range())
674                    }
675                })
676                .chain(std::iter::once(following_range))
677            {
678                let mut range = Range {
679                    start_byte: preceding_range.end_byte,
680                    start_point: preceding_range.end_point,
681                    end_byte: excluded_range.start_byte,
682                    end_point: excluded_range.start_point,
683                };
684                preceding_range = excluded_range;
685
686                if range.end_byte < parent_range.start_byte {
687                    continue;
688                }
689
690                while parent_range.start_byte <= range.end_byte {
691                    if parent_range.end_byte > range.start_byte {
692                        if range.start_byte < parent_range.start_byte {
693                            range.start_byte = parent_range.start_byte;
694                            range.start_point = parent_range.start_point;
695                        }
696
697                        if parent_range.end_byte < range.end_byte {
698                            if range.start_byte < parent_range.end_byte {
699                                result.push(Range {
700                                    start_byte: range.start_byte,
701                                    start_point: range.start_point,
702                                    end_byte: parent_range.end_byte,
703                                    end_point: parent_range.end_point,
704                                });
705                            }
706                            range.start_byte = parent_range.end_byte;
707                            range.start_point = parent_range.end_point;
708                        } else {
709                            if range.start_byte < range.end_byte {
710                                result.push(range);
711                            }
712                            break;
713                        }
714                    }
715
716                    if let Some(next_range) = parent_range_iter.next() {
717                        parent_range = next_range;
718                    } else {
719                        return result;
720                    }
721                }
722            }
723        }
724        result
725    }
726
727    // First, sort scope boundaries by their byte offset in the document. At a
728    // given position, emit scope endings before scope beginnings. Finally, emit
729    // scope boundaries from deeper layers first.
730    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
731        let depth = -(self.depth as isize);
732        let next_start = self
733            .captures
734            .peek()
735            .map(|(m, i)| m.captures[*i].node.start_byte());
736        let next_end = self.highlight_end_stack.last().copied();
737        match (next_start, next_end) {
738            (Some(start), Some(end)) => {
739                if start < end {
740                    Some((start, true, depth))
741                } else {
742                    Some((end, false, depth))
743                }
744            }
745            (Some(i), None) => Some((i, true, depth)),
746            (None, Some(j)) => Some((j, false, depth)),
747            _ => None,
748        }
749    }
750}
751
752impl<'a, F> HighlightIter<'a, F>
753where
754    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
755{
756    fn emit_event(
757        &mut self,
758        offset: usize,
759        event: Option<HighlightEvent>,
760    ) -> Option<Result<HighlightEvent, Error>> {
761        let result;
762        if self.byte_offset < offset {
763            result = Some(Ok(HighlightEvent::Source {
764                start: self.byte_offset,
765                end: offset,
766            }));
767            self.byte_offset = offset;
768            self.next_event = event;
769        } else {
770            result = event.map(Ok);
771        }
772        self.sort_layers();
773        result
774    }
775
776    fn sort_layers(&mut self) {
777        while !self.layers.is_empty() {
778            if let Some(sort_key) = self.layers[0].sort_key() {
779                let mut i = 0;
780                while i + 1 < self.layers.len() {
781                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
782                        if next_offset < sort_key {
783                            i += 1;
784                            continue;
785                        }
786                    }
787                    break;
788                }
789                if i > 0 {
790                    self.layers[0..=i].rotate_left(1);
791                }
792                break;
793            }
794            let layer = self.layers.remove(0);
795            self.highlighter.cursors.push(layer.cursor);
796        }
797    }
798
799    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
800        if let Some(sort_key) = layer.sort_key() {
801            let mut i = 1;
802            while i < self.layers.len() {
803                if let Some(sort_key_i) = self.layers[i].sort_key() {
804                    if sort_key_i > sort_key {
805                        self.layers.insert(i, layer);
806                        return;
807                    }
808                    i += 1;
809                } else {
810                    self.layers.remove(i);
811                }
812            }
813            self.layers.push(layer);
814        }
815    }
816}
817
818impl<'a, F> Iterator for HighlightIter<'a, F>
819where
820    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
821{
822    type Item = Result<HighlightEvent, Error>;
823
824    fn next(&mut self) -> Option<Self::Item> {
825        'main: loop {
826            // If we've already determined the next highlight boundary, just return it.
827            if let Some(e) = self.next_event.take() {
828                return Some(Ok(e));
829            }
830
831            // Periodically check for cancellation, returning `Cancelled` error if the
832            // cancellation flag was flipped.
833            if let Some(cancellation_flag) = self.cancellation_flag {
834                self.iter_count += 1;
835                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
836                    self.iter_count = 0;
837                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
838                        return Some(Err(Error::Cancelled));
839                    }
840                }
841            }
842
843            // If none of the layers have any more highlight boundaries, terminate.
844            if self.layers.is_empty() {
845                return if self.byte_offset < self.source.len() {
846                    let result = Some(Ok(HighlightEvent::Source {
847                        start: self.byte_offset,
848                        end: self.source.len(),
849                    }));
850                    self.byte_offset = self.source.len();
851                    result
852                } else {
853                    None
854                };
855            }
856
857            // Get the next capture from whichever layer has the earliest highlight boundary.
858            let range;
859            let layer = &mut self.layers[0];
860            if let Some((next_match, capture_index)) = layer.captures.peek() {
861                let next_capture = next_match.captures[*capture_index];
862                range = next_capture.node.byte_range();
863
864                // If any previous highlight ends before this node starts, then before
865                // processing this capture, emit the source code up until the end of the
866                // previous highlight, and an end event for that highlight.
867                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
868                    if end_byte <= range.start {
869                        layer.highlight_end_stack.pop();
870                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
871                    }
872                }
873            }
874            // If there are no more captures, then emit any remaining highlight end events.
875            // And if there are none of those, then just advance to the end of the document.
876            else {
877                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
878                    layer.highlight_end_stack.pop();
879                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
880                }
881                return self.emit_event(self.source.len(), None);
882            }
883
884            let (mut match_, capture_index) = layer.captures.next().unwrap();
885            let mut capture = match_.captures[capture_index];
886
887            // If this capture represents an injection, then process the injection.
888            if match_.pattern_index < layer.config.locals_pattern_index {
889                let (language_name, content_node, include_children) = injection_for_match(
890                    layer.config,
891                    Some(self.language_name),
892                    &layer.config.query,
893                    &match_,
894                    self.source,
895                );
896
897                // Explicitly remove this match so that none of its other captures will remain
898                // in the stream of captures.
899                match_.remove();
900
901                // If a language is found with the given name, then add a new language layer
902                // to the highlighted document.
903                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
904                    if let Some(config) = (self.injection_callback)(language_name) {
905                        let ranges = HighlightIterLayer::intersect_ranges(
906                            &self.layers[0].ranges,
907                            &[content_node],
908                            include_children,
909                        );
910                        if !ranges.is_empty() {
911                            match HighlightIterLayer::new(
912                                self.source,
913                                Some(self.language_name),
914                                self.highlighter,
915                                self.cancellation_flag,
916                                &mut self.injection_callback,
917                                config,
918                                self.layers[0].depth + 1,
919                                ranges,
920                            ) {
921                                Ok(layers) => {
922                                    for layer in layers {
923                                        self.insert_layer(layer);
924                                    }
925                                }
926                                Err(e) => return Some(Err(e)),
927                            }
928                        }
929                    }
930                }
931
932                self.sort_layers();
933                continue 'main;
934            }
935
936            // Remove from the local scope stack any local scopes that have already ended.
937            while range.start > layer.scope_stack.last().unwrap().range.end {
938                layer.scope_stack.pop();
939            }
940
941            // If this capture is for tracking local variables, then process the
942            // local variable info.
943            let mut reference_highlight = None;
944            let mut definition_highlight = None;
945            while match_.pattern_index < layer.config.highlights_pattern_index {
946                // If the node represents a local scope, push a new local scope onto
947                // the scope stack.
948                if Some(capture.index) == layer.config.local_scope_capture_index {
949                    definition_highlight = None;
950                    let mut scope = LocalScope {
951                        inherits: true,
952                        range: range.clone(),
953                        local_defs: Vec::new(),
954                    };
955                    for prop in layer.config.query.property_settings(match_.pattern_index) {
956                        if prop.key.as_ref() == "local.scope-inherits" {
957                            scope.inherits =
958                                prop.value.as_ref().is_none_or(|r| r.as_ref() == "true");
959                        }
960                    }
961                    layer.scope_stack.push(scope);
962                }
963                // If the node represents a definition, add a new definition to the
964                // local scope at the top of the scope stack.
965                else if Some(capture.index) == layer.config.local_def_capture_index {
966                    reference_highlight = None;
967                    definition_highlight = None;
968                    let scope = layer.scope_stack.last_mut().unwrap();
969
970                    let mut value_range = 0..0;
971                    for capture in match_.captures {
972                        if Some(capture.index) == layer.config.local_def_value_capture_index {
973                            value_range = capture.node.byte_range();
974                        }
975                    }
976
977                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
978                        scope.local_defs.push(LocalDef {
979                            name,
980                            value_range,
981                            highlight: None,
982                        });
983                        definition_highlight =
984                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
985                    }
986                }
987                // If the node represents a reference, then try to find the corresponding
988                // definition in the scope stack.
989                else if Some(capture.index) == layer.config.local_ref_capture_index
990                    && definition_highlight.is_none()
991                {
992                    definition_highlight = None;
993                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
994                        for scope in layer.scope_stack.iter().rev() {
995                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
996                                if def.name == name && range.start >= def.value_range.end {
997                                    Some(def.highlight)
998                                } else {
999                                    None
1000                                }
1001                            }) {
1002                                reference_highlight = highlight;
1003                                break;
1004                            }
1005                            if !scope.inherits {
1006                                break;
1007                            }
1008                        }
1009                    }
1010                }
1011
1012                // Continue processing any additional matches for the same node.
1013                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
1014                    let next_capture = next_match.captures[*next_capture_index];
1015                    if next_capture.node == capture.node {
1016                        capture = next_capture;
1017                        match_ = layer.captures.next().unwrap().0;
1018                        continue;
1019                    }
1020                }
1021
1022                self.sort_layers();
1023                continue 'main;
1024            }
1025
1026            // Otherwise, this capture must represent a highlight.
1027            // If this exact range has already been highlighted by an earlier pattern, or by
1028            // a different layer, then skip over this one.
1029            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
1030                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
1031                    self.sort_layers();
1032                    continue 'main;
1033                }
1034            }
1035
1036            // Once a highlighting pattern is found for the current node, keep iterating over
1037            // any later highlighting patterns that also match this node and set the match to it.
1038            // Captures for a given node are ordered by pattern index, so these subsequent
1039            // captures are guaranteed to be for highlighting, not injections or
1040            // local variables.
1041            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
1042                let next_capture = next_match.captures[*next_capture_index];
1043                if next_capture.node == capture.node {
1044                    let following_match = layer.captures.next().unwrap().0;
1045                    // If the current node was found to be a local variable, then ignore
1046                    // the following match if it's a highlighting pattern that is disabled
1047                    // for local variables.
1048                    if (definition_highlight.is_some() || reference_highlight.is_some())
1049                        && layer.config.non_local_variable_patterns[following_match.pattern_index]
1050                    {
1051                        continue;
1052                    }
1053                    match_.remove();
1054                    capture = next_capture;
1055                    match_ = following_match;
1056                } else {
1057                    break;
1058                }
1059            }
1060
1061            let current_highlight = layer.config.highlight_indices[capture.index as usize];
1062
1063            // If this node represents a local definition, then store the current
1064            // highlight value on the local scope entry representing this node.
1065            if let Some(definition_highlight) = definition_highlight {
1066                *definition_highlight = current_highlight;
1067            }
1068
1069            // Emit a scope start event and push the node's end position to the stack.
1070            if let Some(highlight) = reference_highlight.or(current_highlight) {
1071                self.last_highlight_range = Some((range.start, range.end, layer.depth));
1072                layer.highlight_end_stack.push(range.end);
1073                return self
1074                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
1075            }
1076
1077            self.sort_layers();
1078        }
1079    }
1080}
1081
1082impl Default for HtmlRenderer {
1083    fn default() -> Self {
1084        Self::new()
1085    }
1086}
1087
1088impl HtmlRenderer {
1089    #[must_use]
1090    pub fn new() -> Self {
1091        let mut result = Self {
1092            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
1093            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
1094            carriage_return_highlight: None,
1095            last_carriage_return: None,
1096        };
1097        result.line_offsets.push(0);
1098        result
1099    }
1100
1101    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
1102        self.carriage_return_highlight = highlight;
1103    }
1104
1105    pub fn reset(&mut self) {
1106        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
1107        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
1108        self.line_offsets.push(0);
1109    }
1110
1111    pub fn render<F>(
1112        &mut self,
1113        highlighter: impl Iterator<Item = Result<HighlightEvent, Error>>,
1114        source: &[u8],
1115        attribute_callback: &F,
1116    ) -> Result<(), Error>
1117    where
1118        F: Fn(Highlight, &mut Vec<u8>),
1119    {
1120        let mut highlights = Vec::new();
1121        for event in highlighter {
1122            match event {
1123                Ok(HighlightEvent::HighlightStart(s)) => {
1124                    highlights.push(s);
1125                    self.start_highlight(s, &attribute_callback);
1126                }
1127                Ok(HighlightEvent::HighlightEnd) => {
1128                    highlights.pop();
1129                    self.end_highlight();
1130                }
1131                Ok(HighlightEvent::Source { start, end }) => {
1132                    self.add_text(&source[start..end], &highlights, &attribute_callback);
1133                }
1134                Err(a) => return Err(a),
1135            }
1136        }
1137        if let Some(offset) = self.last_carriage_return.take() {
1138            self.add_carriage_return(offset, attribute_callback);
1139        }
1140        if self.html.last() != Some(&b'\n') {
1141            self.html.push(b'\n');
1142        }
1143        if self.line_offsets.last() == Some(&(self.html.len() as u32)) {
1144            self.line_offsets.pop();
1145        }
1146        Ok(())
1147    }
1148
1149    pub fn lines(&self) -> impl Iterator<Item = &str> {
1150        self.line_offsets
1151            .iter()
1152            .enumerate()
1153            .map(move |(i, line_start)| {
1154                let line_start = *line_start as usize;
1155                let line_end = if i + 1 == self.line_offsets.len() {
1156                    self.html.len()
1157                } else {
1158                    self.line_offsets[i + 1] as usize
1159                };
1160                str::from_utf8(&self.html[line_start..line_end]).unwrap()
1161            })
1162    }
1163
1164    fn add_carriage_return<F>(&mut self, offset: usize, attribute_callback: &F)
1165    where
1166        F: Fn(Highlight, &mut Vec<u8>),
1167    {
1168        if let Some(highlight) = self.carriage_return_highlight {
1169            // If a CR is the last character in a `HighlightEvent::Source`
1170            // region, then we don't know until the next `Source` event or EOF
1171            // whether it is part of CRLF or on its own. To avoid unbounded
1172            // lookahead, save the offset of the CR and insert there now that we
1173            // know.
1174            let rest = self.html.split_off(offset);
1175            self.html.extend(b"<span ");
1176            (attribute_callback)(highlight, &mut self.html);
1177            self.html.extend(b"></span>");
1178            self.html.extend(rest);
1179        }
1180    }
1181
1182    fn start_highlight<F>(&mut self, h: Highlight, attribute_callback: &F)
1183    where
1184        F: Fn(Highlight, &mut Vec<u8>),
1185    {
1186        self.html.extend(b"<span ");
1187        (attribute_callback)(h, &mut self.html);
1188        self.html.extend(b">");
1189    }
1190
1191    fn end_highlight(&mut self) {
1192        self.html.extend(b"</span>");
1193    }
1194
1195    fn add_text<F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F)
1196    where
1197        F: Fn(Highlight, &mut Vec<u8>),
1198    {
1199        pub const fn html_escape(c: u8) -> Option<&'static [u8]> {
1200            match c as char {
1201                '>' => Some(b"&gt;"),
1202                '<' => Some(b"&lt;"),
1203                '&' => Some(b"&amp;"),
1204                '\'' => Some(b"&#39;"),
1205                '"' => Some(b"&quot;"),
1206                _ => None,
1207            }
1208        }
1209
1210        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
1211            // Don't render carriage return characters, but allow lone carriage returns (not
1212            // followed by line feeds) to be styled via the attribute callback.
1213            if c == b'\r' {
1214                self.last_carriage_return = Some(self.html.len());
1215                continue;
1216            }
1217            if let Some(offset) = self.last_carriage_return.take() {
1218                if c != b'\n' {
1219                    self.add_carriage_return(offset, attribute_callback);
1220                }
1221            }
1222
1223            // At line boundaries, close and re-open all of the open tags.
1224            if c == b'\n' {
1225                highlights.iter().for_each(|_| self.end_highlight());
1226                self.html.push(c);
1227                self.line_offsets.push(self.html.len() as u32);
1228                highlights
1229                    .iter()
1230                    .for_each(|scope| self.start_highlight(*scope, attribute_callback));
1231            } else if let Some(escape) = html_escape(c) {
1232                self.html.extend_from_slice(escape);
1233            } else {
1234                self.html.push(c);
1235            }
1236        }
1237    }
1238}
1239
1240fn injection_for_match<'a>(
1241    config: &'a HighlightConfiguration,
1242    parent_name: Option<&'a str>,
1243    query: &'a Query,
1244    query_match: &QueryMatch<'a, 'a>,
1245    source: &'a [u8],
1246) -> (Option<&'a str>, Option<Node<'a>>, bool) {
1247    let content_capture_index = config.injection_content_capture_index;
1248    let language_capture_index = config.injection_language_capture_index;
1249
1250    let mut language_name = None;
1251    let mut content_node = None;
1252
1253    for capture in query_match.captures {
1254        let index = Some(capture.index);
1255        if index == language_capture_index {
1256            language_name = capture.node.utf8_text(source).ok();
1257        } else if index == content_capture_index {
1258            content_node = Some(capture.node);
1259        }
1260    }
1261
1262    let mut include_children = false;
1263    for prop in query.property_settings(query_match.pattern_index) {
1264        match prop.key.as_ref() {
1265            // In addition to specifying the language name via the text of a
1266            // captured node, it can also be hard-coded via a `#set!` predicate
1267            // that sets the injection.language key.
1268            "injection.language" => {
1269                if language_name.is_none() {
1270                    language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
1271                }
1272            }
1273
1274            // Setting the `injection.self` key can be used to specify that the
1275            // language name should be the same as the language of the current
1276            // layer.
1277            "injection.self" => {
1278                if language_name.is_none() {
1279                    language_name = Some(config.language_name.as_str());
1280                }
1281            }
1282
1283            // Setting the `injection.parent` key can be used to specify that
1284            // the language name should be the same as the language of the
1285            // parent layer
1286            "injection.parent" => {
1287                if language_name.is_none() {
1288                    language_name = parent_name;
1289                }
1290            }
1291
1292            // By default, injections do not include the *children* of an
1293            // `injection.content` node - only the ranges that belong to the
1294            // node itself. This can be changed using a `#set!` predicate that
1295            // sets the `injection.include-children` key.
1296            "injection.include-children" => include_children = true,
1297            _ => {}
1298        }
1299    }
1300
1301    (language_name, content_node, include_children)
1302}
1303
1304fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
1305    if vec.len() > capacity {
1306        vec.truncate(capacity);
1307        vec.shrink_to_fit();
1308    }
1309    vec.clear();
1310}