tree_sitter_highlight_patched_arborium/
lib.rs

1#![doc = include_str!("../README.md")]
2// Allow warnings in vendored tree-sitter-highlight code
3#![allow(clippy::all)]
4
5use tree_sitter_patched_arborium as tree_sitter;
6
7pub mod c_lib;
8use core::slice;
9use std::{
10    collections::HashSet,
11    iter,
12    marker::PhantomData,
13    mem::{self, MaybeUninit},
14    ops, str,
15    sync::{
16        atomic::{AtomicUsize, Ordering},
17        LazyLock,
18    },
19};
20
21pub use c_lib as c;
22use streaming_iterator::StreamingIterator;
23use thiserror::Error;
24use tree_sitter::{
25    ffi, Language, LossyUtf8, Node, ParseOptions, Parser, Point, Query, QueryCapture,
26    QueryCaptures, QueryCursor, QueryError, QueryMatch, Range, TextProvider, Tree,
27};
28
29const CANCELLATION_CHECK_INTERVAL: usize = 100;
30const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024;
31const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000;
32
33static STANDARD_CAPTURE_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
34    vec![
35        "attribute",
36        "boolean",
37        "carriage-return",
38        "comment",
39        "comment.documentation",
40        "constant",
41        "constant.builtin",
42        "constructor",
43        "constructor.builtin",
44        "embedded",
45        "error",
46        "escape",
47        "function",
48        "function.builtin",
49        "keyword",
50        "markup",
51        "markup.bold",
52        "markup.heading",
53        "markup.italic",
54        "markup.link",
55        "markup.link.url",
56        "markup.list",
57        "markup.list.checked",
58        "markup.list.numbered",
59        "markup.list.unchecked",
60        "markup.list.unnumbered",
61        "markup.quote",
62        "markup.raw",
63        "markup.raw.block",
64        "markup.raw.inline",
65        "markup.strikethrough",
66        "module",
67        "number",
68        "operator",
69        "property",
70        "property.builtin",
71        "punctuation",
72        "punctuation.bracket",
73        "punctuation.delimiter",
74        "punctuation.special",
75        "string",
76        "string.escape",
77        "string.regexp",
78        "string.special",
79        "string.special.symbol",
80        "tag",
81        "type",
82        "type.builtin",
83        "variable",
84        "variable.builtin",
85        "variable.member",
86        "variable.parameter",
87    ]
88    .into_iter()
89    .collect()
90});
91
92/// Indicates which highlight should be applied to a region of source code.
93#[derive(Copy, Clone, Debug, PartialEq, Eq)]
94pub struct Highlight(pub usize);
95
96/// Represents the reason why syntax highlighting failed.
97#[derive(Debug, Error, PartialEq, Eq)]
98pub enum Error {
99    #[error("Cancelled")]
100    Cancelled,
101    #[error("Invalid language")]
102    InvalidLanguage,
103    #[error("Unknown error")]
104    Unknown,
105}
106
107/// Represents a single step in rendering a syntax-highlighted document.
108#[derive(Copy, Clone, Debug)]
109pub enum HighlightEvent {
110    Source { start: usize, end: usize },
111    HighlightStart(Highlight),
112    HighlightEnd,
113}
114
115/// Contains the data needed to highlight code written in a particular language.
116///
117/// This struct is immutable and can be shared between threads.
118pub struct HighlightConfiguration {
119    pub language: Language,
120    pub language_name: String,
121    pub query: Query,
122    combined_injections_query: Option<Query>,
123    locals_pattern_index: usize,
124    highlights_pattern_index: usize,
125    highlight_indices: Vec<Option<Highlight>>,
126    non_local_variable_patterns: Vec<bool>,
127    injection_content_capture_index: Option<u32>,
128    injection_language_capture_index: Option<u32>,
129    local_scope_capture_index: Option<u32>,
130    local_def_capture_index: Option<u32>,
131    local_def_value_capture_index: Option<u32>,
132    local_ref_capture_index: Option<u32>,
133}
134
135/// Performs syntax highlighting, recognizing a given list of highlight names.
136///
137/// For the best performance `Highlighter` values should be reused between
138/// syntax highlighting calls. A separate highlighter is needed for each thread that
139/// is performing highlighting.
140pub struct Highlighter {
141    pub parser: Parser,
142    cursors: Vec<QueryCursor>,
143}
144
145/// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML.
146pub struct HtmlRenderer {
147    pub html: Vec<u8>,
148    pub line_offsets: Vec<u32>,
149    carriage_return_highlight: Option<Highlight>,
150    // The offset in `self.html` of the last carriage return.
151    last_carriage_return: Option<usize>,
152}
153
154#[derive(Debug)]
155struct LocalDef<'a> {
156    name: &'a str,
157    value_range: ops::Range<usize>,
158    highlight: Option<Highlight>,
159}
160
161#[derive(Debug)]
162struct LocalScope<'a> {
163    inherits: bool,
164    range: ops::Range<usize>,
165    local_defs: Vec<LocalDef<'a>>,
166}
167
168struct HighlightIter<'a, F>
169where
170    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
171{
172    source: &'a [u8],
173    language_name: &'a str,
174    byte_offset: usize,
175    highlighter: &'a mut Highlighter,
176    injection_callback: F,
177    cancellation_flag: Option<&'a AtomicUsize>,
178    layers: Vec<HighlightIterLayer<'a>>,
179    iter_count: usize,
180    next_event: Option<HighlightEvent>,
181    last_highlight_range: Option<(usize, usize, usize)>,
182}
183
184struct HighlightIterLayer<'a> {
185    _tree: Tree,
186    cursor: QueryCursor,
187    captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
188    config: &'a HighlightConfiguration,
189    highlight_end_stack: Vec<usize>,
190    scope_stack: Vec<LocalScope<'a>>,
191    ranges: Vec<Range>,
192    depth: usize,
193}
194
195pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
196    ptr: *mut ffi::TSQueryCursor,
197    query: &'query Query,
198    text_provider: T,
199    buffer1: Vec<u8>,
200    buffer2: Vec<u8>,
201    _current_match: Option<(QueryMatch<'query, 'tree>, usize)>,
202    _options: Option<*mut ffi::TSQueryCursorOptions>,
203    _phantom: PhantomData<(&'tree (), I)>,
204}
205
206struct _QueryMatch<'cursor, 'tree> {
207    pub _pattern_index: usize,
208    pub _captures: &'cursor [QueryCapture<'tree>],
209    _id: u32,
210    _cursor: *mut ffi::TSQueryCursor,
211}
212
213impl<'tree> _QueryMatch<'_, 'tree> {
214    fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
215        _QueryMatch {
216            _cursor: cursor,
217            _id: m.id,
218            _pattern_index: m.pattern_index as usize,
219            _captures: (m.capture_count > 0)
220                .then(|| unsafe {
221                    slice::from_raw_parts(
222                        m.captures.cast::<QueryCapture<'tree>>(),
223                        m.capture_count as usize,
224                    )
225                })
226                .unwrap_or_default(),
227        }
228    }
229}
230
231impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
232    for _QueryCaptures<'query, 'tree, T, I>
233{
234    type Item = (QueryMatch<'query, 'tree>, usize);
235
236    fn next(&mut self) -> Option<Self::Item> {
237        unsafe {
238            loop {
239                let mut capture_index = 0u32;
240                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
241                if ffi::ts_query_cursor_next_capture(
242                    self.ptr,
243                    m.as_mut_ptr(),
244                    core::ptr::addr_of_mut!(capture_index),
245                ) {
246                    let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new(
247                        &m.assume_init(),
248                        self.ptr,
249                    ));
250                    if result.satisfies_text_predicates(
251                        self.query,
252                        &mut self.buffer1,
253                        &mut self.buffer2,
254                        &mut self.text_provider,
255                    ) {
256                        return Some((result, capture_index as usize));
257                    }
258                    result.remove();
259                } else {
260                    return None;
261                }
262            }
263        }
264    }
265}
266
267impl Default for Highlighter {
268    fn default() -> Self {
269        Self::new()
270    }
271}
272
273impl Highlighter {
274    #[must_use]
275    pub fn new() -> Self {
276        Self {
277            parser: Parser::new(),
278            cursors: Vec::new(),
279        }
280    }
281
282    pub fn parser(&mut self) -> &mut Parser {
283        &mut self.parser
284    }
285
286    /// Iterate over the highlighted regions for a given slice of source code.
287    pub fn highlight<'a>(
288        &'a mut self,
289        config: &'a HighlightConfiguration,
290        source: &'a [u8],
291        cancellation_flag: Option<&'a AtomicUsize>,
292        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
293    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
294        let layers = HighlightIterLayer::new(
295            source,
296            None,
297            self,
298            cancellation_flag,
299            &mut injection_callback,
300            config,
301            0,
302            vec![Range {
303                start_byte: 0,
304                end_byte: usize::MAX,
305                start_point: Point::new(0, 0),
306                end_point: Point::new(usize::MAX, usize::MAX),
307            }],
308        )?;
309        assert_ne!(layers.len(), 0);
310        let mut result = HighlightIter {
311            source,
312            language_name: &config.language_name,
313            byte_offset: 0,
314            injection_callback,
315            cancellation_flag,
316            highlighter: self,
317            iter_count: 0,
318            layers,
319            next_event: None,
320            last_highlight_range: None,
321        };
322        result.sort_layers();
323        Ok(result)
324    }
325}
326
327impl HighlightConfiguration {
328    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
329    /// queries.
330    ///
331    /// # Parameters
332    ///
333    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
334    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
335    ///   should be non-empty, otherwise no syntax highlights will be added.
336    /// * `injections_query` -  A string containing tree patterns for injecting other languages into
337    ///   the document. This can be empty if no injections are desired.
338    /// * `locals_query` - A string containing tree patterns for tracking local variable definitions
339    ///   and references. This can be empty if local variable tracking is not needed.
340    ///
341    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
342    pub fn new(
343        language: Language,
344        name: impl Into<String>,
345        highlights_query: &str,
346        injection_query: &str,
347        locals_query: &str,
348    ) -> Result<Self, QueryError> {
349        // Concatenate the query strings, keeping track of the start offset of each section.
350        let mut query_source = String::new();
351        query_source.push_str(injection_query);
352        let locals_query_offset = query_source.len();
353        query_source.push_str(locals_query);
354        let highlights_query_offset = query_source.len();
355        query_source.push_str(highlights_query);
356
357        // Construct a single query by concatenating the three query strings, but record the
358        // range of pattern indices that belong to each individual string.
359        let mut query = Query::new(&language, &query_source)?;
360        let mut locals_pattern_index = 0;
361        let mut highlights_pattern_index = 0;
362        for i in 0..(query.pattern_count()) {
363            let pattern_offset = query.start_byte_for_pattern(i);
364            if pattern_offset < highlights_query_offset {
365                if pattern_offset < highlights_query_offset {
366                    highlights_pattern_index += 1;
367                }
368                if pattern_offset < locals_query_offset {
369                    locals_pattern_index += 1;
370                }
371            }
372        }
373
374        // Construct a separate query just for dealing with the 'combined injections'.
375        // Disable the combined injection patterns in the main query.
376        let mut combined_injections_query = Query::new(&language, injection_query)?;
377        let mut has_combined_queries = false;
378        for pattern_index in 0..locals_pattern_index {
379            let settings = query.property_settings(pattern_index);
380            if settings.iter().any(|s| &*s.key == "injection.combined") {
381                has_combined_queries = true;
382                query.disable_pattern(pattern_index);
383            } else {
384                combined_injections_query.disable_pattern(pattern_index);
385            }
386        }
387        let combined_injections_query = if has_combined_queries {
388            Some(combined_injections_query)
389        } else {
390            None
391        };
392
393        // Find all of the highlighting patterns that are disabled for nodes that
394        // have been identified as local variables.
395        let non_local_variable_patterns = (0..query.pattern_count())
396            .map(|i| {
397                query
398                    .property_predicates(i)
399                    .iter()
400                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
401            })
402            .collect();
403
404        // Store the numeric ids for all of the special captures.
405        let mut injection_content_capture_index = None;
406        let mut injection_language_capture_index = None;
407        let mut local_def_capture_index = None;
408        let mut local_def_value_capture_index = None;
409        let mut local_ref_capture_index = None;
410        let mut local_scope_capture_index = None;
411        for (i, name) in query.capture_names().iter().enumerate() {
412            let i = Some(i as u32);
413            match *name {
414                "injection.content" => injection_content_capture_index = i,
415                "injection.language" => injection_language_capture_index = i,
416                "local.definition" => local_def_capture_index = i,
417                "local.definition-value" => local_def_value_capture_index = i,
418                "local.reference" => local_ref_capture_index = i,
419                "local.scope" => local_scope_capture_index = i,
420                _ => {}
421            }
422        }
423
424        let highlight_indices = vec![None; query.capture_names().len()];
425        Ok(Self {
426            language,
427            language_name: name.into(),
428            query,
429            combined_injections_query,
430            locals_pattern_index,
431            highlights_pattern_index,
432            highlight_indices,
433            non_local_variable_patterns,
434            injection_content_capture_index,
435            injection_language_capture_index,
436            local_def_capture_index,
437            local_def_value_capture_index,
438            local_ref_capture_index,
439            local_scope_capture_index,
440        })
441    }
442
443    /// Get a slice containing all of the highlight names used in the configuration.
444    #[must_use]
445    pub const fn names(&self) -> &[&str] {
446        self.query.capture_names()
447    }
448
449    /// Set the list of recognized highlight names.
450    ///
451    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
452    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
453    /// these queries can choose to recognize highlights with different levels of specificity.
454    /// For example, the string `function.builtin` will match against `function.method.builtin`
455    /// and `function.builtin.constructor`, but will not match `function.method`.
456    ///
457    /// When highlighting, results are returned as `Highlight` values, which contain the index
458    /// of the matched highlight this list of highlight names.
459    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
460        let mut capture_parts = Vec::new();
461        self.highlight_indices.clear();
462        self.highlight_indices
463            .extend(self.query.capture_names().iter().map(move |capture_name| {
464                capture_parts.clear();
465                capture_parts.extend(capture_name.split('.'));
466
467                let mut best_index = None;
468                let mut best_match_len = 0;
469                for (i, recognized_name) in recognized_names.iter().enumerate() {
470                    let mut len = 0;
471                    let mut matches = true;
472                    for part in recognized_name.as_ref().split('.') {
473                        len += 1;
474                        if !capture_parts.contains(&part) {
475                            matches = false;
476                            break;
477                        }
478                    }
479                    if matches && len > best_match_len {
480                        best_index = Some(i);
481                        best_match_len = len;
482                    }
483                }
484                best_index.map(Highlight)
485            }));
486    }
487
488    // Return the list of this configuration's capture names that are neither present in the
489    // list of predefined 'canonical' names nor start with an underscore (denoting 'private'
490    // captures used as part of capture internals).
491    #[must_use]
492    pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
493        let capture_names = if capture_names.is_empty() {
494            &*STANDARD_CAPTURE_NAMES
495        } else {
496            capture_names
497        };
498        self.names()
499            .iter()
500            .filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
501            .copied()
502            .collect()
503    }
504}
505
506impl<'a> HighlightIterLayer<'a> {
507    /// Create a new 'layer' of highlighting for this document.
508    ///
509    /// In the event that the new layer contains "combined injections" (injections where multiple
510    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
511    /// added to the returned vector.
512    #[allow(clippy::too_many_arguments)]
513    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
514        source: &'a [u8],
515        parent_name: Option<&str>,
516        highlighter: &mut Highlighter,
517        cancellation_flag: Option<&'a AtomicUsize>,
518        injection_callback: &mut F,
519        mut config: &'a HighlightConfiguration,
520        mut depth: usize,
521        mut ranges: Vec<Range>,
522    ) -> Result<Vec<Self>, Error> {
523        let mut result = Vec::with_capacity(1);
524        let mut queue = Vec::new();
525        loop {
526            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
527                highlighter
528                    .parser
529                    .set_language(&config.language)
530                    .map_err(|_| Error::InvalidLanguage)?;
531
532                let tree = highlighter
533                    .parser
534                    .parse_with_options(
535                        &mut |i, _| {
536                            if i < source.len() {
537                                &source[i..]
538                            } else {
539                                &[]
540                            }
541                        },
542                        None,
543                        Some(ParseOptions::new().progress_callback(&mut |_| {
544                            if let Some(cancellation_flag) = cancellation_flag {
545                                cancellation_flag.load(Ordering::SeqCst) != 0
546                            } else {
547                                false
548                            }
549                        })),
550                    )
551                    .ok_or(Error::Cancelled)?;
552                let mut cursor = highlighter.cursors.pop().unwrap_or_default();
553
554                // Process combined injections.
555                if let Some(combined_injections_query) = &config.combined_injections_query {
556                    let mut injections_by_pattern_index =
557                        vec![(None, Vec::new(), false); combined_injections_query.pattern_count()];
558                    let mut matches =
559                        cursor.matches(combined_injections_query, tree.root_node(), source);
560                    while let Some(mat) = matches.next() {
561                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
562                        let (language_name, content_node, include_children) = injection_for_match(
563                            config,
564                            parent_name,
565                            combined_injections_query,
566                            mat,
567                            source,
568                        );
569                        if language_name.is_some() {
570                            entry.0 = language_name;
571                        }
572                        if let Some(content_node) = content_node {
573                            entry.1.push(content_node);
574                        }
575                        entry.2 = include_children;
576                    }
577                    for (lang_name, content_nodes, includes_children) in injections_by_pattern_index
578                    {
579                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
580                            if let Some(next_config) = (injection_callback)(lang_name) {
581                                let ranges = Self::intersect_ranges(
582                                    &ranges,
583                                    &content_nodes,
584                                    includes_children,
585                                );
586                                if !ranges.is_empty() {
587                                    queue.push((next_config, depth + 1, ranges));
588                                }
589                            }
590                        }
591                    }
592                }
593
594                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
595                // prevents them from being moved. But both of these values are really just
596                // pointers, so it's actually ok to move them.
597                let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
598                let cursor_ref = unsafe {
599                    mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
600                };
601                let captures = unsafe {
602                    std::mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
603                        cursor_ref.captures(&config.query, tree_ref.root_node(), source),
604                    )
605                }
606                .peekable();
607
608                result.push(HighlightIterLayer {
609                    highlight_end_stack: Vec::new(),
610                    scope_stack: vec![LocalScope {
611                        inherits: false,
612                        range: 0..usize::MAX,
613                        local_defs: Vec::new(),
614                    }],
615                    cursor,
616                    depth,
617                    _tree: tree,
618                    captures,
619                    config,
620                    ranges,
621                });
622            }
623
624            if queue.is_empty() {
625                break;
626            }
627
628            let (next_config, next_depth, next_ranges) = queue.remove(0);
629            config = next_config;
630            depth = next_depth;
631            ranges = next_ranges;
632        }
633
634        Ok(result)
635    }
636
637    // Compute the ranges that should be included when parsing an injection.
638    // This takes into account three things:
639    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
640    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the
641    //   ranges of those nodes.
642    // * `includes_children` - For some injections, the content nodes' children should be excluded
643    //   from the nested document, so that only the content nodes' *own* content is reparsed. For
644    //   other injections, the content nodes' entire ranges should be reparsed, including the ranges
645    //   of their children.
646    fn intersect_ranges(
647        parent_ranges: &[Range],
648        nodes: &[Node],
649        includes_children: bool,
650    ) -> Vec<Range> {
651        let mut cursor = nodes[0].walk();
652        let mut result = Vec::new();
653        let mut parent_range_iter = parent_ranges.iter();
654        let mut parent_range = parent_range_iter
655            .next()
656            .expect("Layers should only be constructed with non-empty ranges vectors");
657        for node in nodes {
658            let mut preceding_range = Range {
659                start_byte: 0,
660                start_point: Point::new(0, 0),
661                end_byte: node.start_byte(),
662                end_point: node.start_position(),
663            };
664            let following_range = Range {
665                start_byte: node.end_byte(),
666                start_point: node.end_position(),
667                end_byte: usize::MAX,
668                end_point: Point::new(usize::MAX, usize::MAX),
669            };
670
671            for excluded_range in node
672                .children(&mut cursor)
673                .filter_map(|child| {
674                    if includes_children {
675                        None
676                    } else {
677                        Some(child.range())
678                    }
679                })
680                .chain(std::iter::once(following_range))
681            {
682                let mut range = Range {
683                    start_byte: preceding_range.end_byte,
684                    start_point: preceding_range.end_point,
685                    end_byte: excluded_range.start_byte,
686                    end_point: excluded_range.start_point,
687                };
688                preceding_range = excluded_range;
689
690                if range.end_byte < parent_range.start_byte {
691                    continue;
692                }
693
694                while parent_range.start_byte <= range.end_byte {
695                    if parent_range.end_byte > range.start_byte {
696                        if range.start_byte < parent_range.start_byte {
697                            range.start_byte = parent_range.start_byte;
698                            range.start_point = parent_range.start_point;
699                        }
700
701                        if parent_range.end_byte < range.end_byte {
702                            if range.start_byte < parent_range.end_byte {
703                                result.push(Range {
704                                    start_byte: range.start_byte,
705                                    start_point: range.start_point,
706                                    end_byte: parent_range.end_byte,
707                                    end_point: parent_range.end_point,
708                                });
709                            }
710                            range.start_byte = parent_range.end_byte;
711                            range.start_point = parent_range.end_point;
712                        } else {
713                            if range.start_byte < range.end_byte {
714                                result.push(range);
715                            }
716                            break;
717                        }
718                    }
719
720                    if let Some(next_range) = parent_range_iter.next() {
721                        parent_range = next_range;
722                    } else {
723                        return result;
724                    }
725                }
726            }
727        }
728        result
729    }
730
731    // First, sort scope boundaries by their byte offset in the document. At a
732    // given position, emit scope endings before scope beginnings. Finally, emit
733    // scope boundaries from deeper layers first.
734    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
735        let depth = -(self.depth as isize);
736        let next_start = self
737            .captures
738            .peek()
739            .map(|(m, i)| m.captures[*i].node.start_byte());
740        let next_end = self.highlight_end_stack.last().copied();
741        match (next_start, next_end) {
742            (Some(start), Some(end)) => {
743                if start < end {
744                    Some((start, true, depth))
745                } else {
746                    Some((end, false, depth))
747                }
748            }
749            (Some(i), None) => Some((i, true, depth)),
750            (None, Some(j)) => Some((j, false, depth)),
751            _ => None,
752        }
753    }
754}
755
756impl<'a, F> HighlightIter<'a, F>
757where
758    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
759{
760    fn emit_event(
761        &mut self,
762        offset: usize,
763        event: Option<HighlightEvent>,
764    ) -> Option<Result<HighlightEvent, Error>> {
765        let result;
766        if self.byte_offset < offset {
767            result = Some(Ok(HighlightEvent::Source {
768                start: self.byte_offset,
769                end: offset,
770            }));
771            self.byte_offset = offset;
772            self.next_event = event;
773        } else {
774            result = event.map(Ok);
775        }
776        self.sort_layers();
777        result
778    }
779
780    fn sort_layers(&mut self) {
781        while !self.layers.is_empty() {
782            if let Some(sort_key) = self.layers[0].sort_key() {
783                let mut i = 0;
784                while i + 1 < self.layers.len() {
785                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
786                        if next_offset < sort_key {
787                            i += 1;
788                            continue;
789                        }
790                    }
791                    break;
792                }
793                if i > 0 {
794                    self.layers[0..=i].rotate_left(1);
795                }
796                break;
797            }
798            let layer = self.layers.remove(0);
799            self.highlighter.cursors.push(layer.cursor);
800        }
801    }
802
803    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
804        if let Some(sort_key) = layer.sort_key() {
805            let mut i = 1;
806            while i < self.layers.len() {
807                if let Some(sort_key_i) = self.layers[i].sort_key() {
808                    if sort_key_i > sort_key {
809                        self.layers.insert(i, layer);
810                        return;
811                    }
812                    i += 1;
813                } else {
814                    self.layers.remove(i);
815                }
816            }
817            self.layers.push(layer);
818        }
819    }
820}
821
822impl<'a, F> Iterator for HighlightIter<'a, F>
823where
824    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
825{
826    type Item = Result<HighlightEvent, Error>;
827
828    fn next(&mut self) -> Option<Self::Item> {
829        'main: loop {
830            // If we've already determined the next highlight boundary, just return it.
831            if let Some(e) = self.next_event.take() {
832                return Some(Ok(e));
833            }
834
835            // Periodically check for cancellation, returning `Cancelled` error if the
836            // cancellation flag was flipped.
837            if let Some(cancellation_flag) = self.cancellation_flag {
838                self.iter_count += 1;
839                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
840                    self.iter_count = 0;
841                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
842                        return Some(Err(Error::Cancelled));
843                    }
844                }
845            }
846
847            // If none of the layers have any more highlight boundaries, terminate.
848            if self.layers.is_empty() {
849                return if self.byte_offset < self.source.len() {
850                    let result = Some(Ok(HighlightEvent::Source {
851                        start: self.byte_offset,
852                        end: self.source.len(),
853                    }));
854                    self.byte_offset = self.source.len();
855                    result
856                } else {
857                    None
858                };
859            }
860
861            // Get the next capture from whichever layer has the earliest highlight boundary.
862            let range;
863            let layer = &mut self.layers[0];
864            if let Some((next_match, capture_index)) = layer.captures.peek() {
865                let next_capture = next_match.captures[*capture_index];
866                range = next_capture.node.byte_range();
867
868                // If any previous highlight ends before this node starts, then before
869                // processing this capture, emit the source code up until the end of the
870                // previous highlight, and an end event for that highlight.
871                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
872                    if end_byte <= range.start {
873                        layer.highlight_end_stack.pop();
874                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
875                    }
876                }
877            }
878            // If there are no more captures, then emit any remaining highlight end events.
879            // And if there are none of those, then just advance to the end of the document.
880            else {
881                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
882                    layer.highlight_end_stack.pop();
883                    return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
884                }
885                return self.emit_event(self.source.len(), None);
886            }
887
888            let (mut match_, capture_index) = layer.captures.next().unwrap();
889            let mut capture = match_.captures[capture_index];
890
891            // If this capture represents an injection, then process the injection.
892            if match_.pattern_index < layer.config.locals_pattern_index {
893                let (language_name, content_node, include_children) = injection_for_match(
894                    layer.config,
895                    Some(self.language_name),
896                    &layer.config.query,
897                    &match_,
898                    self.source,
899                );
900
901                // Explicitly remove this match so that none of its other captures will remain
902                // in the stream of captures.
903                match_.remove();
904
905                // If a language is found with the given name, then add a new language layer
906                // to the highlighted document.
907                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
908                    if let Some(config) = (self.injection_callback)(language_name) {
909                        let ranges = HighlightIterLayer::intersect_ranges(
910                            &self.layers[0].ranges,
911                            &[content_node],
912                            include_children,
913                        );
914                        if !ranges.is_empty() {
915                            match HighlightIterLayer::new(
916                                self.source,
917                                Some(self.language_name),
918                                self.highlighter,
919                                self.cancellation_flag,
920                                &mut self.injection_callback,
921                                config,
922                                self.layers[0].depth + 1,
923                                ranges,
924                            ) {
925                                Ok(layers) => {
926                                    for layer in layers {
927                                        self.insert_layer(layer);
928                                    }
929                                }
930                                Err(e) => return Some(Err(e)),
931                            }
932                        }
933                    }
934                }
935
936                self.sort_layers();
937                continue 'main;
938            }
939
940            // Remove from the local scope stack any local scopes that have already ended.
941            while range.start > layer.scope_stack.last().unwrap().range.end {
942                layer.scope_stack.pop();
943            }
944
945            // If this capture is for tracking local variables, then process the
946            // local variable info.
947            let mut reference_highlight = None;
948            let mut definition_highlight = None;
949            while match_.pattern_index < layer.config.highlights_pattern_index {
950                // If the node represents a local scope, push a new local scope onto
951                // the scope stack.
952                if Some(capture.index) == layer.config.local_scope_capture_index {
953                    definition_highlight = None;
954                    let mut scope = LocalScope {
955                        inherits: true,
956                        range: range.clone(),
957                        local_defs: Vec::new(),
958                    };
959                    for prop in layer.config.query.property_settings(match_.pattern_index) {
960                        if prop.key.as_ref() == "local.scope-inherits" {
961                            scope.inherits =
962                                prop.value.as_ref().is_none_or(|r| r.as_ref() == "true");
963                        }
964                    }
965                    layer.scope_stack.push(scope);
966                }
967                // If the node represents a definition, add a new definition to the
968                // local scope at the top of the scope stack.
969                else if Some(capture.index) == layer.config.local_def_capture_index {
970                    reference_highlight = None;
971                    definition_highlight = None;
972                    let scope = layer.scope_stack.last_mut().unwrap();
973
974                    let mut value_range = 0..0;
975                    for capture in match_.captures {
976                        if Some(capture.index) == layer.config.local_def_value_capture_index {
977                            value_range = capture.node.byte_range();
978                        }
979                    }
980
981                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
982                        scope.local_defs.push(LocalDef {
983                            name,
984                            value_range,
985                            highlight: None,
986                        });
987                        definition_highlight =
988                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
989                    }
990                }
991                // If the node represents a reference, then try to find the corresponding
992                // definition in the scope stack.
993                else if Some(capture.index) == layer.config.local_ref_capture_index
994                    && definition_highlight.is_none()
995                {
996                    definition_highlight = None;
997                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
998                        for scope in layer.scope_stack.iter().rev() {
999                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
1000                                if def.name == name && range.start >= def.value_range.end {
1001                                    Some(def.highlight)
1002                                } else {
1003                                    None
1004                                }
1005                            }) {
1006                                reference_highlight = highlight;
1007                                break;
1008                            }
1009                            if !scope.inherits {
1010                                break;
1011                            }
1012                        }
1013                    }
1014                }
1015
1016                // Continue processing any additional matches for the same node.
1017                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
1018                    let next_capture = next_match.captures[*next_capture_index];
1019                    if next_capture.node == capture.node {
1020                        capture = next_capture;
1021                        match_ = layer.captures.next().unwrap().0;
1022                        continue;
1023                    }
1024                }
1025
1026                self.sort_layers();
1027                continue 'main;
1028            }
1029
1030            // Otherwise, this capture must represent a highlight.
1031            // If this exact range has already been highlighted by an earlier pattern, or by
1032            // a different layer, then skip over this one.
1033            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
1034                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
1035                    self.sort_layers();
1036                    continue 'main;
1037                }
1038            }
1039
1040            // Once a highlighting pattern is found for the current node, keep iterating over
1041            // any later highlighting patterns that also match this node and set the match to it.
1042            // Captures for a given node are ordered by pattern index, so these subsequent
1043            // captures are guaranteed to be for highlighting, not injections or
1044            // local variables.
1045            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
1046                let next_capture = next_match.captures[*next_capture_index];
1047                if next_capture.node == capture.node {
1048                    let following_match = layer.captures.next().unwrap().0;
1049                    // If the current node was found to be a local variable, then ignore
1050                    // the following match if it's a highlighting pattern that is disabled
1051                    // for local variables.
1052                    if (definition_highlight.is_some() || reference_highlight.is_some())
1053                        && layer.config.non_local_variable_patterns[following_match.pattern_index]
1054                    {
1055                        continue;
1056                    }
1057                    match_.remove();
1058                    capture = next_capture;
1059                    match_ = following_match;
1060                } else {
1061                    break;
1062                }
1063            }
1064
1065            let current_highlight = layer.config.highlight_indices[capture.index as usize];
1066
1067            // If this node represents a local definition, then store the current
1068            // highlight value on the local scope entry representing this node.
1069            if let Some(definition_highlight) = definition_highlight {
1070                *definition_highlight = current_highlight;
1071            }
1072
1073            // Emit a scope start event and push the node's end position to the stack.
1074            if let Some(highlight) = reference_highlight.or(current_highlight) {
1075                self.last_highlight_range = Some((range.start, range.end, layer.depth));
1076                layer.highlight_end_stack.push(range.end);
1077                return self
1078                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
1079            }
1080
1081            self.sort_layers();
1082        }
1083    }
1084}
1085
1086impl Default for HtmlRenderer {
1087    fn default() -> Self {
1088        Self::new()
1089    }
1090}
1091
1092impl HtmlRenderer {
1093    #[must_use]
1094    pub fn new() -> Self {
1095        let mut result = Self {
1096            html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
1097            line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
1098            carriage_return_highlight: None,
1099            last_carriage_return: None,
1100        };
1101        result.line_offsets.push(0);
1102        result
1103    }
1104
1105    pub fn set_carriage_return_highlight(&mut self, highlight: Option<Highlight>) {
1106        self.carriage_return_highlight = highlight;
1107    }
1108
1109    pub fn reset(&mut self) {
1110        shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY);
1111        shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY);
1112        self.line_offsets.push(0);
1113    }
1114
1115    pub fn render<F>(
1116        &mut self,
1117        highlighter: impl Iterator<Item = Result<HighlightEvent, Error>>,
1118        source: &[u8],
1119        attribute_callback: &F,
1120    ) -> Result<(), Error>
1121    where
1122        F: Fn(Highlight, &mut Vec<u8>),
1123    {
1124        let mut highlights = Vec::new();
1125        for event in highlighter {
1126            match event {
1127                Ok(HighlightEvent::HighlightStart(s)) => {
1128                    highlights.push(s);
1129                    self.start_highlight(s, &attribute_callback);
1130                }
1131                Ok(HighlightEvent::HighlightEnd) => {
1132                    highlights.pop();
1133                    self.end_highlight();
1134                }
1135                Ok(HighlightEvent::Source { start, end }) => {
1136                    self.add_text(&source[start..end], &highlights, &attribute_callback);
1137                }
1138                Err(a) => return Err(a),
1139            }
1140        }
1141        if let Some(offset) = self.last_carriage_return.take() {
1142            self.add_carriage_return(offset, attribute_callback);
1143        }
1144        if self.html.last() != Some(&b'\n') {
1145            self.html.push(b'\n');
1146        }
1147        if self.line_offsets.last() == Some(&(self.html.len() as u32)) {
1148            self.line_offsets.pop();
1149        }
1150        Ok(())
1151    }
1152
1153    pub fn lines(&self) -> impl Iterator<Item = &str> {
1154        self.line_offsets
1155            .iter()
1156            .enumerate()
1157            .map(move |(i, line_start)| {
1158                let line_start = *line_start as usize;
1159                let line_end = if i + 1 == self.line_offsets.len() {
1160                    self.html.len()
1161                } else {
1162                    self.line_offsets[i + 1] as usize
1163                };
1164                str::from_utf8(&self.html[line_start..line_end]).unwrap()
1165            })
1166    }
1167
1168    fn add_carriage_return<F>(&mut self, offset: usize, attribute_callback: &F)
1169    where
1170        F: Fn(Highlight, &mut Vec<u8>),
1171    {
1172        if let Some(highlight) = self.carriage_return_highlight {
1173            // If a CR is the last character in a `HighlightEvent::Source`
1174            // region, then we don't know until the next `Source` event or EOF
1175            // whether it is part of CRLF or on its own. To avoid unbounded
1176            // lookahead, save the offset of the CR and insert there now that we
1177            // know.
1178            let rest = self.html.split_off(offset);
1179            self.html.extend(b"<span ");
1180            (attribute_callback)(highlight, &mut self.html);
1181            self.html.extend(b"></span>");
1182            self.html.extend(rest);
1183        }
1184    }
1185
1186    fn start_highlight<F>(&mut self, h: Highlight, attribute_callback: &F)
1187    where
1188        F: Fn(Highlight, &mut Vec<u8>),
1189    {
1190        self.html.extend(b"<span ");
1191        (attribute_callback)(h, &mut self.html);
1192        self.html.extend(b">");
1193    }
1194
1195    fn end_highlight(&mut self) {
1196        self.html.extend(b"</span>");
1197    }
1198
1199    fn add_text<F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F)
1200    where
1201        F: Fn(Highlight, &mut Vec<u8>),
1202    {
1203        pub const fn html_escape(c: u8) -> Option<&'static [u8]> {
1204            match c as char {
1205                '>' => Some(b"&gt;"),
1206                '<' => Some(b"&lt;"),
1207                '&' => Some(b"&amp;"),
1208                '\'' => Some(b"&#39;"),
1209                '"' => Some(b"&quot;"),
1210                _ => None,
1211            }
1212        }
1213
1214        for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
1215            // Don't render carriage return characters, but allow lone carriage returns (not
1216            // followed by line feeds) to be styled via the attribute callback.
1217            if c == b'\r' {
1218                self.last_carriage_return = Some(self.html.len());
1219                continue;
1220            }
1221            if let Some(offset) = self.last_carriage_return.take() {
1222                if c != b'\n' {
1223                    self.add_carriage_return(offset, attribute_callback);
1224                }
1225            }
1226
1227            // At line boundaries, close and re-open all of the open tags.
1228            if c == b'\n' {
1229                highlights.iter().for_each(|_| self.end_highlight());
1230                self.html.push(c);
1231                self.line_offsets.push(self.html.len() as u32);
1232                highlights
1233                    .iter()
1234                    .for_each(|scope| self.start_highlight(*scope, attribute_callback));
1235            } else if let Some(escape) = html_escape(c) {
1236                self.html.extend_from_slice(escape);
1237            } else {
1238                self.html.push(c);
1239            }
1240        }
1241    }
1242}
1243
1244fn injection_for_match<'a>(
1245    config: &'a HighlightConfiguration,
1246    parent_name: Option<&'a str>,
1247    query: &'a Query,
1248    query_match: &QueryMatch<'a, 'a>,
1249    source: &'a [u8],
1250) -> (Option<&'a str>, Option<Node<'a>>, bool) {
1251    let content_capture_index = config.injection_content_capture_index;
1252    let language_capture_index = config.injection_language_capture_index;
1253
1254    let mut language_name = None;
1255    let mut content_node = None;
1256
1257    for capture in query_match.captures {
1258        let index = Some(capture.index);
1259        if index == language_capture_index {
1260            language_name = capture.node.utf8_text(source).ok();
1261        } else if index == content_capture_index {
1262            content_node = Some(capture.node);
1263        }
1264    }
1265
1266    let mut include_children = false;
1267    for prop in query.property_settings(query_match.pattern_index) {
1268        match prop.key.as_ref() {
1269            // In addition to specifying the language name via the text of a
1270            // captured node, it can also be hard-coded via a `#set!` predicate
1271            // that sets the injection.language key.
1272            "injection.language" => {
1273                if language_name.is_none() {
1274                    language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
1275                }
1276            }
1277
1278            // Setting the `injection.self` key can be used to specify that the
1279            // language name should be the same as the language of the current
1280            // layer.
1281            "injection.self" => {
1282                if language_name.is_none() {
1283                    language_name = Some(config.language_name.as_str());
1284                }
1285            }
1286
1287            // Setting the `injection.parent` key can be used to specify that
1288            // the language name should be the same as the language of the
1289            // parent layer
1290            "injection.parent" => {
1291                if language_name.is_none() {
1292                    language_name = parent_name;
1293                }
1294            }
1295
1296            // By default, injections do not include the *children* of an
1297            // `injection.content` node - only the ranges that belong to the
1298            // node itself. This can be changed using a `#set!` predicate that
1299            // sets the `injection.include-children` key.
1300            "injection.include-children" => include_children = true,
1301            _ => {}
1302        }
1303    }
1304
1305    (language_name, content_node, include_children)
1306}
1307
1308fn shrink_and_clear<T>(vec: &mut Vec<T>, capacity: usize) {
1309    if vec.len() > capacity {
1310        vec.truncate(capacity);
1311        vec.shrink_to_fit();
1312    }
1313    vec.clear();
1314}