duat_treesitter/
lib.rs

1//! A [tree-sitter] implementation for Duat
2//!
3//! `duat-treesitter` currently does two things:
4//!
5//! * Syntax highlighting
6//! * Indentation calculation
7//!
8//! # Installation
9//!
10//! Just like other Duat plugins, this one can be installed by calling
11//! `cargo add` in the config directory:
12//!
13//! ```bash
14//! cargo add duat-treesitter@"*"
15//! ```
16//!
17//! Or, if you are using a `--git-deps` version of duat, do this:
18//!
19//! ```bash
20//! cargo add --git https://github.com/AhoyISki/duat-treesitter
21//! ```
22//!
23//! But this is a default plugin, so you most likely won't have to do
24//! that.
25//!
26//! [tree-sitter]: https://tree-sitter.github.io/tree-sitter
27#![feature(closure_lifetime_binder)]
28use std::{
29    collections::HashMap,
30    fs,
31    ops::Range,
32    path::PathBuf,
33    sync::{LazyLock, Mutex},
34};
35
36use duat_core::{
37    lender::Lender,
38    Plugins, Ranges,
39    buffer::{self, Buffer, BufferTracker, PathKind},
40    context::{self, Handle},
41    data::Pass,
42    form::{self, Form, FormId},
43    hook,
44    mode::Cursor,
45    opts::PrintOpts,
46    text::{Builder, Bytes, Change, Matcheable, Point, Tagger, Tags, Text, txt},
47    ui::Widget,
48};
49use duat_filetype::FileType;
50use streaming_iterator::StreamingIterator;
51use tree_sitter::{
52    InputEdit, Language, Node, Parser, Point as TsPoint, Query, QueryCapture as QueryCap,
53    QueryCursor, QueryMatch, TextProvider, Tree,
54};
55
56use self::{injections::InjectedTree, languages::parser_is_compiled};
57
58mod cursor;
59mod injections;
60mod languages;
61
62/// The [tree-sitter] plugin for Duat
63///
64/// For now, it adds syntax highlighting and indentation, but more
65/// features will be coming in the future.
66///
67/// These things are done through the [`TsParser`] [`Parser`], which
68/// reads updates the inner syntax tree when the [`Text`] reports any
69/// changes.
70///
71/// # NOTE
72///
73/// If you are looking to create a [`Parser`] which can do similar
74/// things, you should look at the code for the implementation of
75/// [`Parser`] for [`TsParser`], it's relatively short and with good
76/// explanations for what is happening.
77///
78/// [tree-sitter]: https://tree-sitter.github.io/tree-sitter
79#[derive(Default)]
80pub struct TreeSitter;
81
82impl duat_core::Plugin for TreeSitter {
83    fn plug(self, _: &Plugins) {
84        const MAX_LEN_FOR_LOCAL: usize = 100_000;
85
86        form::set_many_weak!(
87            ("variable", Form::white()),
88            ("variable.builtin", Form::dark_yellow()),
89            ("constant", Form::grey()),
90            ("constant.builtin", Form::dark_yellow()),
91            ("module", Form::blue().italic()),
92            ("label", Form::green()),
93            ("string", Form::green()),
94            ("character", Form::dark_yellow()),
95            ("boolean", Form::dark_yellow()),
96            ("number", Form::dark_yellow()),
97            ("type", Form::yellow().italic()),
98            ("type.builtin", Form::yellow().reset()),
99            ("attribute", Form::green()),
100            ("property", Form::green()),
101            ("function", Form::blue().reset()),
102            ("constructor", Form::dark_yellow().reset()),
103            ("operator", Form::cyan()),
104            ("keyword", Form::magenta()),
105            ("punctuation.bracket", Form::grey()),
106            ("punctuation.delimiter", Form::grey()),
107            ("comment", Form::grey()),
108            ("comment.documentation", Form::grey().bold()),
109            ("markup.strong", Form::bold()),
110            ("markup.italic", Form::italic()),
111            ("markup.strikethrough", Form::crossed_out()),
112            ("markup.underline", Form::underlined()),
113            ("markup.heading", Form::blue().bold()),
114            ("markup.math", Form::yellow()),
115            ("markup.quote", Form::grey().italic()),
116            ("markup.link", Form::blue().underlined()),
117            ("markup.raw", Form::cyan()),
118            ("markup.list", Form::yellow()),
119            ("markup.list.checked", Form::green()),
120            ("markup.list.unchecked", Form::grey()),
121            ("diff.plus", Form::red()),
122            ("diff.delta", Form::blue()),
123            ("diff.minus", Form::green()),
124            ("node.field", "variable.member"),
125        );
126
127        hook::add::<Buffer>(|pa, handle| {
128            let file = handle.write(pa);
129
130            let path = file.path_kind();
131            let filetype = if let PathKind::SetExists(path) | PathKind::SetAbsent(path) = &path
132                && let Some(filetype) = path.filetype()
133                && crate::languages::filetype_is_in_list(filetype)
134            {
135                filetype
136            } else {
137                context::debug!(
138                    "No filetype set for [a]{}[], will try again once one is set",
139                    path.name_txt()
140                );
141                return file.add_parser(|tracker| TsParser(Some(ParserState::NotSet(tracker))));
142            };
143
144            if parser_is_compiled(filetype)? && file.bytes().len().byte() <= MAX_LEN_FOR_LOCAL {
145                let lang_parts = lang_parts_of(filetype)?;
146                handle.add_parser(pa, |tracker| {
147                    TsParser(Some(ParserState::Present(InnerTsParser::new(
148                        lang_parts, tracker,
149                    ))))
150                })
151            } else {
152                handle.add_parser(pa, |tracker| {
153                    TsParser(Some(ParserState::Remote(std::thread::spawn(move || {
154                        let lang_parts = match lang_parts_of(filetype) {
155                            Ok(lang_parts) => lang_parts,
156                            Err(err) => {
157                                context::error!("{err}");
158                                return Err(tracker);
159                            }
160                        };
161
162                        let mut parser = InnerTsParser::new(lang_parts, tracker);
163
164                        while parser.parse() {}
165
166                        parser.tracker.request_parse();
167                        Ok(parser)
168                    }))))
169                })
170            }
171        })
172        .grouped("TreeSitter");
173    }
174}
175
176/// [`Parser`] that parses [`Buffer`]'s as [tree-sitter] syntax trees
177///
178/// [tree-sitter]: https://tree-sitter.github.io/tree-sitter
179pub struct TsParser(Option<ParserState>);
180
181impl TsParser {
182    /// The root [`Node`] of the syntax tree
183    pub fn root(&self) -> Option<Node<'_>> {
184        let Some(ParserState::Present(parser)) = &self.0 else {
185            context::warn!("Called function that shouldn't be possible without present parser");
186            return None;
187        };
188
189        Some(parser.tree.root_node())
190    }
191
192    /// Logs the root node with the [`context::debug`] macro
193    pub fn debug_root(&self) {
194        let Some(ParserState::Present(parser)) = &self.0 else {
195            context::warn!("Called function that shouldn't be possible without present parser");
196            return;
197        };
198
199        context::debug!("{}", format_root(parser.tree.root_node()));
200    }
201
202    /// Gets the requested indentation level on a given [`Point`]
203    ///
204    /// Will be [`None`] if the [`filetype`] hasn't been set yet or if
205    /// there is no indentation query for this language.
206    ///
207    /// [`filetype`]: FileType::filetype
208    pub fn indent_on(&self, p: Point, bytes: &Bytes, cfg: PrintOpts) -> Option<usize> {
209        let Some(ParserState::Present(parser)) = &self.0 else {
210            context::warn!("Called function that shouldn't be possible without present parser");
211            return None;
212        };
213
214        parser.indent_on(p, bytes, cfg)
215    }
216}
217
218impl buffer::Parser for TsParser {
219    fn parse(&mut self) -> bool {
220        // In this function, the changes will be applied and the Ranges will
221        // be updated to include the following regions to be updated:
222        //
223        // - The ranges returned by Parser::changed_ranges,
224        // - All ranges where an injection was added or removed, which will be
225        //   acquired through the injections query, applied on the two
226        //   previous range lists,
227        let parser_state = self.0.take().unwrap();
228        let (parser_state, do_update) = parser_state.parse();
229        self.0 = Some(parser_state);
230
231        do_update
232    }
233
234    fn update(&mut self, pa: &mut Pass, file: &Handle, on: Vec<Range<Point>>) {
235        match self.0.as_mut().unwrap() {
236            ParserState::Present(parser) => {
237                let mut parts = file.write(pa).text_mut().parts();
238
239                for range in on {
240                    let range = range.start.byte()..range.end.byte();
241                    parser.highlight_and_inject(parts.bytes, &mut parts.tags, range);
242                }
243            }
244            ParserState::Remote(..) => {
245                context::warn!("Tried updating parser while it is still remote");
246            }
247            _ => (),
248        }
249    }
250
251    fn before_get(&mut self) {
252        self.parse();
253    }
254
255    fn before_try_get(&mut self) -> bool {
256        let parser_state = self.0.take().unwrap();
257
258        if let ParserState::Remote(join_handle) = parser_state {
259            if join_handle.is_finished() {
260                match join_handle.join().unwrap() {
261                    Ok(parser) => {
262                        self.0 = Some(ParserState::Present(parser));
263                        self.parse()
264                    }
265                    Err(tracker) => {
266                        self.0 = Some(ParserState::NotSet(tracker));
267                        false
268                    }
269                }
270            } else {
271                self.0 = Some(ParserState::Remote(join_handle));
272                false
273            }
274        } else {
275            self.0 = Some(parser_state);
276            true
277        }
278    }
279}
280
281struct InnerTsParser {
282    parser: Parser,
283    lang_parts: LangParts<'static>,
284    forms: &'static [(FormId, u8)],
285    tree: Tree,
286    old_tree: Option<Tree>,
287    injections: Vec<InjectedTree>,
288    tracker: BufferTracker,
289}
290
291impl InnerTsParser {
292    /// Returns a new [`InnerTsParser`]
293    fn new(lang_parts: LangParts<'static>, tracker: BufferTracker) -> InnerTsParser {
294        let (.., lang, _) = &lang_parts;
295        let forms = forms_from_lang_parts(lang_parts);
296
297        let mut parser = Parser::new();
298        parser.set_language(lang).unwrap();
299
300        let tree = parser
301            .parse_with_options(&mut parser_fn(tracker.bytes()), None, None)
302            .unwrap();
303
304        InnerTsParser {
305            parser,
306            lang_parts,
307            forms,
308            tree,
309            old_tree: None,
310            injections: Vec::new(),
311            tracker,
312        }
313    }
314
315    /// Parse the newest changes, returns `false` if there were none
316    fn parse(&mut self) -> bool {
317        self.tracker.update();
318        let bytes = self.tracker.bytes();
319        let moment = self.tracker.moment();
320
321        if moment.is_empty() {
322            return false;
323        }
324
325        // These new ranges will be used for calculating things like
326        // new injections, for example.
327        let mut new_ranges = Ranges::empty();
328
329        for change in moment.changes() {
330            let input_edit = input_edit(change, bytes);
331            self.tree.edit(&input_edit);
332
333            for inj in self.injections.iter_mut() {
334                inj.edit(&input_edit);
335            }
336            let range = change.line_range(bytes);
337            new_ranges.add(range.start.byte()..range.end.byte());
338        }
339
340        let tree = self
341            .parser
342            .parse_with_options(&mut parser_fn(bytes), Some(&self.tree), None)
343            .unwrap();
344
345        self.old_tree = Some(std::mem::replace(&mut self.tree, tree));
346
347        for inj in self.injections.iter_mut() {
348            inj.update_tree(bytes);
349        }
350
351        // `changed_ranges` should mostly be able to catch any big additions
352        // to the tree structure.
353        for range in self
354            .old_tree
355            .as_ref()
356            .unwrap()
357            .changed_ranges(&self.tree)
358            .chain(
359                self.injections
360                    .iter()
361                    .flat_map(InjectedTree::changed_ranges),
362            )
363        {
364            // The rows seem kind of unpredictable, which is why I have to do this
365            // nonsense
366            let start = bytes.point_at_line(bytes.point_at_byte(range.start_byte).line());
367            let range = bytes.line_range(
368                bytes
369                    .point_at_byte(range.end_byte.min(bytes.len().byte()))
370                    .line(),
371            );
372
373            new_ranges.add(start.byte()..range.end.byte())
374        }
375
376        // Finally, in order to properly catch injection changes, a final
377        // comparison is done between the old tree and the new tree, in
378        // regards to injection captures. This is done on every range in
379        // new_ranges.
380        refactor_injections(
381            &mut new_ranges,
382            (self.lang_parts, &mut self.injections),
383            (self.old_tree.as_ref(), &self.tree),
384            bytes,
385        );
386
387        self.tracker.add_ranges(new_ranges);
388
389        true
390    }
391
392    /// Highlights and injects based on the [`LangParts`] queries
393    fn highlight_and_inject(&mut self, bytes: &Bytes, tags: &mut Tags, range: Range<usize>) {
394        tags.remove(ts_tagger(), range.clone());
395
396        highlight_and_inject(
397            self.tree.root_node(),
398            &mut self.injections,
399            (self.lang_parts, self.forms),
400            (bytes, tags),
401            range.start.saturating_sub(1)..(range.end + 1).min(bytes.len().byte()),
402        );
403    }
404
405    ////////// Querying functions
406
407    /// The expected level of indentation on a given [`Point`]
408    fn indent_on(&self, p: Point, bytes: &Bytes, cfg: PrintOpts) -> Option<usize> {
409        let start = bytes.point_at_line(p.line());
410
411        let (root, indents, range) = self
412            .injections
413            .iter()
414            .find_map(|inj| inj.get_injection_indent_parts(start.byte()))
415            .unwrap_or((
416                self.tree.root_node(),
417                self.lang_parts.2.indents,
418                0..bytes.len().byte(),
419            ));
420
421        let first_line = bytes.point_at_byte(range.start).line();
422
423        // The query could be empty.
424        if indents.pattern_count() == 0 {
425            return None;
426        }
427
428        // TODO: Don't reparse python, apparently.
429
430        type Captures<'a> = HashMap<&'a str, HashMap<usize, HashMap<&'a str, Option<&'a str>>>>;
431        let mut caps = HashMap::new();
432        let q = {
433            let mut cursor = QueryCursor::new();
434            let buf = TsBuf(bytes);
435            cursor
436                .matches(indents, root, buf)
437                .for_each(|qm: &QueryMatch| {
438                    for cap in qm.captures.iter() {
439                        let Some(cap_end) =
440                            indents.capture_names()[cap.index as usize].strip_prefix("indent.")
441                        else {
442                            continue;
443                        };
444
445                        let nodes = if let Some(nodes) = caps.get_mut(cap_end) {
446                            nodes
447                        } else {
448                            caps.insert(cap_end, HashMap::new());
449                            caps.get_mut(cap_end).unwrap()
450                        };
451                        let props = indents.property_settings(qm.pattern_index).iter();
452                        nodes.insert(
453                            cap.node.id(),
454                            props
455                                .map(|p| {
456                                    let key = p.key.strip_prefix("indent.").unwrap();
457                                    (key, p.value.as_deref())
458                                })
459                                .collect(),
460                        );
461                    }
462                });
463            |caps: &Captures, node: Node, queries: &[&str]| {
464                caps.get(queries[0])
465                    .and_then(|nodes| nodes.get(&node.id()))
466                    .is_some_and(|props| {
467                        let key = queries.get(1);
468                        key.is_none_or(|key| props.iter().any(|(k, _)| k == key))
469                    })
470            }
471        };
472
473        // The first non indent character of this line.
474        let indented_start = bytes
475            .chars_fwd(start..)
476            .unwrap()
477            .take_while(|(p, _)| p.line() == start.line())
478            .find_map(|(p, c)| (!c.is_whitespace()).then_some(p));
479
480        let mut opt_node = if let Some(indented_start) = indented_start {
481            Some(descendant_in(root, indented_start.byte()))
482        // If the line is empty, look behind for another.
483        } else {
484            // Find last previous empty line.
485            let mut lines = bytes.lines(..start).rev();
486            let Some((prev_l, line)) = lines
487                .find(|(_, line)| !(line.reg_matches(r"^\s*$", ..).unwrap()))
488                .filter(|(l, _)| *l >= first_line)
489            else {
490                // If there is no previous non empty line, align to 0.
491                return Some(0);
492            };
493            let trail = line.chars().rev().take_while(|c| c.is_whitespace()).count();
494
495            let prev_range = bytes.line_range(prev_l);
496            let mut node = descendant_in(root, prev_range.end.byte() - (trail + 1));
497            if node.kind().contains("comment") {
498                // Unless the whole line is a comment, try to find the last node
499                // before the comment.
500                // This technically fails if there are multiple block comments.
501                let first_node = descendant_in(root, prev_range.start.byte());
502                if first_node.id() != node.id() {
503                    node = descendant_in(root, node.start_byte() - 1)
504                }
505            }
506
507            Some(if q(&caps, node, &["end"]) {
508                descendant_in(root, start.byte())
509            } else {
510                node
511            })
512        };
513
514        if q(&caps, opt_node.unwrap(), &["zero"]) {
515            return Some(0);
516        }
517
518        let tab = cfg.tabstop as i32;
519        let mut indent = if root.start_byte() != 0 {
520            bytes.indent(bytes.point_at_byte(root.start_byte()), cfg) as i32
521        } else {
522            0
523        };
524
525        let mut processed_lines = Vec::new();
526        while let Some(node) = opt_node {
527            let s_line = node.start_position().row;
528            let e_line = node.end_position().row;
529
530            // If a node is not an indent and is marked as auto or ignore, act
531            // accordingly.
532            if !q(&caps, node, &["begin"]) && s_line < p.line() && p.line() <= e_line {
533                if !q(&caps, node, &["align"]) && q(&caps, node, &["auto"]) {
534                    return None;
535                } else if q(&caps, node, &["ignore"]) {
536                    return Some(0);
537                }
538            }
539
540            let should_process = !processed_lines.contains(&s_line);
541
542            let mut is_processed = false;
543
544            if should_process
545                && ((s_line == p.line() && q(&caps, node, &["branch"]))
546                    || (s_line != p.line() && q(&caps, node, &["dedent"])))
547            {
548                indent -= tab;
549                is_processed = true;
550            }
551
552            let is_in_err = should_process && node.parent().is_some_and(|p| p.is_error());
553            // Indent only if the node spans more than one line, or under other
554            // special circumstances.
555            if should_process
556                && q(&caps, node, &["begin"])
557                && (s_line != e_line || is_in_err || q(&caps, node, &["begin", "immediate"]))
558                && (s_line != p.line() || q(&caps, node, &["begin", "start_at_same_line"]))
559            {
560                is_processed = true;
561                indent += tab;
562            }
563
564            if is_in_err && !q(&caps, node, &["align"]) {
565                let mut cursor = node.walk();
566                for child in node.children(&mut cursor) {
567                    if q(&caps, child, &["align"]) {
568                        let props = caps["align"][&child.id()].clone();
569                        caps.get_mut("align").unwrap().insert(node.id(), props);
570                    }
571                }
572            }
573
574            let fd = for<'a, 'b> |node: Node<'a>, delim: &'b str| -> (Option<Node<'a>>, bool) {
575                let mut c = node.walk();
576                let child = node.children(&mut c).find(|child| child.kind() == delim);
577                let ret = child.map(|child| {
578                    let range = bytes.line_range(child.start_position().row);
579                    let range = child.range().start_byte..range.end.byte();
580
581                    let is_last_in_line = if let Some(line) = bytes.get_contiguous(range.clone()) {
582                        line.split_whitespace().any(|w| w != delim)
583                    } else {
584                        let line = bytes.slices(range).try_to_string().unwrap();
585                        line.split_whitespace().any(|w| w != delim)
586                    };
587
588                    (child, is_last_in_line)
589                });
590                let (child, is_last_in_line) = ret.unzip();
591                (child, is_last_in_line.unwrap_or(false))
592            };
593
594            if should_process
595                && q(&caps, node, &["align"])
596                && (s_line != e_line || is_in_err)
597                && s_line != p.line()
598            {
599                let props = &caps["align"][&node.id()];
600                let (o_delim_node, o_is_last_in_line) = props
601                    .get(&"open_delimiter")
602                    .and_then(|delim| delim.map(|d| fd(node, d)))
603                    .unwrap_or((Some(node), false));
604                let (c_delim_node, c_is_last_in_line) = props
605                    .get(&"close_delimiter")
606                    .and_then(|delim| delim.map(|d| fd(node, d)))
607                    .unwrap_or((Some(node), false));
608
609                if let Some(o_delim_node) = o_delim_node {
610                    let o_s_line = o_delim_node.start_position().row;
611                    let o_s_col = o_delim_node.start_position().column;
612                    let c_s_line = c_delim_node.map(|n| n.start_position().row);
613
614                    // If the previous line was marked with an open_delimiter, treat it
615                    // like an indent.
616                    let indent_is_absolute = if o_is_last_in_line && should_process {
617                        indent += tab;
618                        // If the aligned node ended before the current line, its @align
619                        // shouldn't affect it.
620                        if c_is_last_in_line && c_s_line.is_some_and(|l| l < p.line()) {
621                            indent = (indent - tab).max(0);
622                        }
623                        false
624                    // Aligned indent
625                    } else if c_is_last_in_line
626                        && let Some(c_s_line) = c_s_line
627                        // If the aligned node ended before the current line, its @align
628                        // shouldn't affect it.
629                        && (o_s_line != c_s_line && c_s_line < p.line())
630                    {
631                        indent = (indent - tab).max(0);
632                        false
633                    } else {
634                        let inc = props.get("increment").cloned().flatten();
635                        indent = o_s_col as i32 + inc.map(str::parse::<i32>).unwrap().unwrap();
636                        true
637                    };
638
639                    // If this is the last line of the @align, then some additional
640                    // indentation may be needed to avoid clashes. This is the case in
641                    // some function parameters, for example.
642                    let avoid_last_matching_next = c_s_line
643                        .is_some_and(|c_s_line| c_s_line != o_s_line && c_s_line == p.line())
644                        && props.contains_key("avoid_last_matching_next");
645                    if avoid_last_matching_next {
646                        indent += tab;
647                    }
648                    is_processed = true;
649                    if indent_is_absolute {
650                        return Some(indent as usize);
651                    }
652                }
653            }
654
655            if should_process && is_processed {
656                processed_lines.push(s_line);
657            }
658            opt_node = node.parent();
659        }
660
661        // indent < 0 means "keep level of indentation"
662        (indent >= 0).then_some(indent as usize)
663    }
664}
665
666impl std::fmt::Debug for InnerTsParser {
667    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
668        f.debug_struct("TsParser")
669            .field("tree", &self.tree)
670            .field("old_tree", &self.old_tree)
671            .field("injections", &self.injections)
672            .finish_non_exhaustive()
673    }
674}
675
676#[derive(Clone, Copy)]
677struct TsBuf<'a>(&'a Bytes);
678
679impl<'a> TextProvider<&'a [u8]> for TsBuf<'a> {
680    type I = std::array::IntoIter<&'a [u8], 2>;
681
682    fn text(&mut self, node: tree_sitter::Node) -> Self::I {
683        let range = node.range();
684        let buffers = self.0.slices(range.start_byte..range.end_byte);
685        buffers.to_array().into_iter()
686    }
687}
688
689type LangParts<'a> = (&'a str, &'a Language, Queries<'a>);
690
691#[derive(Clone, Copy)]
692struct Queries<'a> {
693    highlights: &'a Query,
694    indents: &'a Query,
695    injections: &'a Query,
696}
697
698enum ParserState {
699    Present(InnerTsParser),
700    Remote(std::thread::JoinHandle<RemoteResult>),
701    NotSet(BufferTracker),
702}
703
704impl ParserState {
705    fn parse(self) -> (Self, bool) {
706        match self {
707            ParserState::Present(mut parser) => {
708                parser.parse();
709                (ParserState::Present(parser), true)
710            }
711            ParserState::Remote(join_handle) => {
712                if join_handle.is_finished() {
713                    match join_handle.join().unwrap() {
714                        Ok(mut parser) => {
715                            parser.parse();
716                            (ParserState::Present(parser), true)
717                        }
718                        Err(tracker) => (ParserState::NotSet(tracker), false),
719                    }
720                } else {
721                    (ParserState::Remote(join_handle), false)
722                }
723            }
724            ParserState::NotSet(tracker) => (ParserState::NotSet(tracker), false),
725        }
726    }
727}
728
729#[track_caller]
730fn descendant_in(node: Node, byte: usize) -> Node {
731    node.descendant_for_byte_range(byte, byte + 1).unwrap()
732}
733
734fn parser_fn<'a>(bytes: &'a Bytes) -> impl FnMut(usize, TsPoint) -> &'a [u8] {
735    let [s0, s1] = bytes.slices(..).to_array();
736    |byte, _point| {
737        if byte < s0.len() {
738            &s0[byte..]
739        } else {
740            &s1[byte - s0.len()..]
741        }
742    }
743}
744
745fn ts_point(point: Point, buffer: &Bytes) -> TsPoint {
746    let strs = buffer.slices(..point.byte());
747    let iter = strs.into_iter().rev();
748    let col = iter.take_while(|&b| b != b'\n').count();
749
750    TsPoint::new(point.line(), col)
751}
752
753fn ts_point_from(to: Point, (col, from): (usize, Point), str: &str) -> TsPoint {
754    let col = if to.line() == from.line() {
755        col + str.len()
756    } else {
757        str.bytes().rev().take_while(|&b| b != b'\n').count()
758    };
759
760    TsPoint::new(to.line(), col)
761}
762
763fn forms_from_lang_parts(
764    (lang, _, Queries { highlights, .. }): LangParts<'static>,
765) -> &'static [(FormId, u8)] {
766    #[rustfmt::skip]
767    const PRIORITIES: &[&str] = &[
768        "markup", "operator", "comment", "string", "diff", "variable", "module", "label",
769        "character", "boolean", "number", "type", "attribute", "property", "function", "constant",
770        "constructor", "keyword", "punctuation",
771    ];
772    type MemoizedForms<'a> = HashMap<&'a str, &'a [(FormId, u8)]>;
773
774    static LISTS: LazyLock<Mutex<MemoizedForms<'static>>> = LazyLock::new(Mutex::default);
775    let mut lists = LISTS.lock().unwrap();
776
777    if let Some(forms) = lists.get(lang) {
778        forms
779    } else {
780        let capture_names = highlights.capture_names();
781        let priorities = capture_names.iter().map(|name| {
782            PRIORITIES
783                .iter()
784                .take_while(|p| !name.starts_with(*p))
785                .count() as u8
786        });
787
788        let ids = form::ids_of_non_static(
789            capture_names
790                .iter()
791                .map(|name| name.to_string() + "." + lang),
792        );
793        let forms: Vec<(FormId, u8)> = ids.into_iter().zip(priorities).collect();
794
795        lists.insert(lang, forms.leak());
796        lists.get(lang).unwrap()
797    }
798}
799
800fn lang_parts_of(lang: &str) -> Result<LangParts<'static>, Text> {
801    static MAPS: LazyLock<Mutex<HashMap<&str, LangParts<'static>>>> = LazyLock::new(Mutex::default);
802
803    let mut maps = MAPS.lock().unwrap();
804
805    Ok(if let Some(lang_parts) = maps.get(lang).copied() {
806        lang_parts
807    } else {
808        let language: &'static Language = Box::leak(Box::new(languages::get_language(lang)?));
809
810        let highlights = query_from_path(lang, "highlights", language)?;
811        let indents = query_from_path(lang, "indents", language)?;
812        let injections = query_from_path(lang, "injections", language)?;
813
814        let queries = Queries { highlights, indents, injections };
815
816        let lang = lang.to_string().leak();
817
818        maps.insert(lang, (lang, language, queries));
819
820        (lang, language, queries)
821    })
822}
823
824/// The Key for tree-sitter
825fn ts_tagger() -> Tagger {
826    static TAGGER: LazyLock<Tagger> = Tagger::new_static();
827    *TAGGER
828}
829
830fn input_edit(change: Change<&str>, bytes: &Bytes) -> InputEdit {
831    let start = change.start();
832    let added = change.added_end();
833    let taken = change.taken_end();
834
835    let ts_start = ts_point(start, bytes);
836    let ts_taken_end = ts_point_from(taken, (ts_start.column, start), change.taken_str());
837    let ts_added_end = ts_point_from(added, (ts_start.column, start), change.added_str());
838
839    InputEdit {
840        start_byte: start.byte(),
841        old_end_byte: taken.byte(),
842        new_end_byte: added.byte(),
843        start_position: ts_start,
844        old_end_position: ts_taken_end,
845        new_end_position: ts_added_end,
846    }
847}
848
849/// Returns a new [`Query`] for a given language and kind
850///
851/// If the [`Query`] in question does not exist, returns an emtpy
852/// [`Query`] instead.
853fn query_from_path(name: &str, kind: &str, language: &Language) -> Result<&'static Query, Text> {
854    static QUERIES: LazyLock<Mutex<HashMap<PathBuf, &'static Query>>> =
855        LazyLock::new(Mutex::default);
856
857    let queries_dir = duat_core::utils::plugin_dir("duat-treesitter")?.join("queries");
858
859    let path = queries_dir.join(name).join(kind).with_extension("scm");
860
861    let mut queries = QUERIES.lock().unwrap();
862
863    Ok(if let Some(query) = queries.get(&path) {
864        query
865    } else {
866        let Ok(mut query) = fs::read_to_string(&path) else {
867            let query = Box::leak(Box::new(Query::new(language, "").unwrap()));
868            queries.insert(path, query);
869            return Ok(query);
870        };
871
872        let Some(first_line) = query.lines().map(String::from).next() else {
873            context::warn!("Query is empty");
874            let query = Box::leak(Box::new(Query::new(language, "").unwrap()));
875            queries.insert(path, query);
876            return Ok(query);
877        };
878
879        if let Some(langs) = first_line.strip_prefix("; inherits: ") {
880            for name in langs.split(',') {
881                let path = queries_dir.join(name).join(kind).with_extension("scm");
882                match fs::read_to_string(&path) {
883                    Ok(inherited_query) => {
884                        if inherited_query.is_empty() {
885                            context::warn!("Inherited query is empty");
886                        }
887
888                        query = format!("{inherited_query}\n{query}");
889                    }
890                    Err(err) => context::error!("{err}"),
891                }
892            }
893        }
894
895        let query = Box::leak(Box::new(match Query::new(language, &query) {
896            Ok(query) => query,
897            Err(err) => return Err(txt!("{err}")),
898        }));
899
900        queries.insert(path, query);
901
902        query
903    })
904}
905
906/// Convenience methods for use of tree-sitter in [`Buffer`]s
907pub trait TsBuffer {
908    /// The level of indentation required at a certain [`Point`]
909    ///
910    /// This is determined by a query, currently, it is the query
911    /// located in
912    /// `"{plugin_dir}/duat-treesitter/queries/{lang}/indent.scm"`
913    fn ts_indent_on(&self, p: Point) -> Option<usize>;
914}
915
916impl TsBuffer for Buffer {
917    fn ts_indent_on(&self, p: Point) -> Option<usize> {
918        self.read_parser(|ts: &TsParser| {
919            ts.indent_on(p, self.text().bytes(), self.get_print_opts())
920        })
921        .flatten()
922    }
923}
924
925/// Convenience methods for use of tree-sitter in [`Cursor`]s
926pub trait TsCursor {
927    /// The level of indentation required at the [`Cursor`]'s `caret`
928    ///
929    /// This is determined by a query, currently, it is the query
930    /// located in
931    /// `"{plugin_dir}/duat-treesitter/queries/{lang}/indent.scm"`
932    fn ts_indent(&self) -> Option<usize>;
933
934    /// The level of indentation required at a certain [`Point`]
935    ///
936    /// This is determined by a query, currently, it is the query
937    /// located in
938    /// `"{plugin_dir}/duat-treesitter/queries/{lang}/indent.scm"`
939    fn ts_indent_on(&self, p: Point) -> Option<usize>;
940
941    /// Reindents the [`Cursor`]'s line
942    ///
943    /// Returns `true` if the line was reindented.
944    ///
945    /// This is determined by a query, currently, it is the query
946    /// located in
947    /// `"{plugin_dir}/duat-treesitter/queries/{lang}/indent.scm"`
948    fn ts_reindent(&mut self) -> bool;
949}
950
951impl<S> TsCursor for Cursor<'_, Buffer, S> {
952    fn ts_indent(&self) -> Option<usize> {
953        self.ts_indent_on(self.caret())
954    }
955
956    fn ts_indent_on(&self, p: Point) -> Option<usize> {
957        let opts = self.opts();
958
959        self.read_parser(|ts: &TsParser| ts.indent_on(p, self.text().bytes(), opts))
960            .flatten()
961    }
962
963    fn ts_reindent(&mut self) -> bool {
964        fn prev_non_empty_line_points<S>(c: &mut Cursor<Buffer, S>) -> Option<Range<Point>> {
965            let byte_col = c
966                .text()
967                .slices(..c.caret().byte())
968                .take_while(|b| *b != b'\n')
969                .count();
970            let mut lines = c.lines_on(..c.caret().byte() - byte_col);
971            let prev = lines.find_map(|(n, l): (usize, &str)| {
972                l.chars().any(|c| !c.is_whitespace()).then_some(n)
973            });
974            prev.map(|n| c.text().line_range(n))
975        }
976
977        let old_col = self.v_caret().char_col();
978        let anchor_existed = self.anchor().is_some();
979
980        let old_indent = self.indent();
981        let new_indent = if let Some(indent) = self.ts_indent() {
982            indent
983        } else {
984            let prev_non_empty = prev_non_empty_line_points(self);
985            prev_non_empty
986                .map(|range| self.indent_on(range.start))
987                .unwrap_or(0)
988        };
989        let indent_diff = new_indent as i32 - old_indent as i32;
990
991        self.move_hor(-(old_col as i32));
992        self.set_anchor();
993        self.move_hor(old_indent as i32);
994
995        if self.caret() == self.anchor().unwrap() {
996            self.insert(" ".repeat(new_indent));
997        } else {
998            self.move_hor(-1);
999            self.replace(" ".repeat(new_indent));
1000        }
1001        self.set_caret_on_start();
1002        self.unset_anchor();
1003
1004        if anchor_existed {
1005            self.set_anchor();
1006            if old_col < old_indent {
1007                self.move_hor(old_col as i32);
1008            } else {
1009                self.move_hor(old_col as i32 + indent_diff);
1010            }
1011            self.swap_ends();
1012        }
1013
1014        if old_col < old_indent {
1015            self.move_hor(old_col as i32);
1016        } else {
1017            self.move_hor(old_col as i32 + indent_diff);
1018        }
1019
1020        indent_diff != 0
1021    }
1022}
1023
1024#[allow(unused)]
1025fn format_root(node: Node) -> Text {
1026    fn format_range(node: Node, builder: &mut Builder) {
1027        let mut first = true;
1028        for point in [node.start_position(), node.end_position()] {
1029            builder.push(txt!(
1030                "[punctuation.bracket.TreeView][[[coords.TreeView]{}\
1031             	 [punctuation.delimiter.TreeView],[] [coords.TreeView]{}\
1032             	 [punctuation.bracket.TreeView]]]",
1033                point.row,
1034                point.column
1035            ));
1036
1037            if first {
1038                first = false;
1039                builder.push(txt!("[punctuation.delimiter],[] "));
1040            }
1041        }
1042        builder.push("\n");
1043    }
1044
1045    fn format_node(
1046        node: Node,
1047        depth: usize,
1048        pars: usize,
1049        builder: &mut Builder,
1050        name: Option<&str>,
1051    ) {
1052        builder.push("  ".repeat(depth));
1053
1054        if let Some(name) = name {
1055            builder.push(txt!("[node.field]{name}[punctuation.delimiter.TreeView]: "));
1056        }
1057
1058        builder.push(txt!("[punctuation.bracket.TreeView]("));
1059        builder.push(txt!("[node.name]{}", node.grammar_name()));
1060
1061        let mut cursor = node.walk();
1062        let named_children = node.named_children(&mut cursor);
1063        let len = named_children.len();
1064
1065        if len == 0 {
1066            builder.push(txt!(
1067                "[punctuation.bracket.TreeView]{}[] ",
1068                ")".repeat(pars)
1069            ));
1070            format_range(node, builder);
1071        } else {
1072            builder.push(" ");
1073            format_range(node, builder);
1074
1075            let mut i = 0;
1076
1077            for (i, child) in named_children.enumerate() {
1078                let name = node.field_name_for_named_child(i as u32);
1079                let pars = if i == len - 1 { pars + 1 } else { 1 };
1080                format_node(child, depth + 1, pars, builder, name);
1081            }
1082        }
1083    }
1084
1085    let mut cursor = node.walk();
1086    let mut builder = Text::builder();
1087
1088    format_node(node, 0, 1, &mut builder, None);
1089
1090    builder.build()
1091}
1092
1093fn highlight_and_inject(
1094    root: Node,
1095    injected_trees: &mut Vec<InjectedTree>,
1096    (lang_parts, forms): (LangParts<'static>, &'static [(FormId, u8)]),
1097    (bytes, tags): (&Bytes, &mut Tags),
1098    range: Range<usize>,
1099) {
1100    let tagger = ts_tagger();
1101    let (.., Queries { highlights, injections, .. }) = &lang_parts;
1102
1103    let mut cursor = QueryCursor::new();
1104    cursor.set_byte_range(range.clone());
1105    let buf = TsBuf(bytes);
1106
1107    let cn = injections.capture_names();
1108    let is_content = |cap: &&QueryCap| cn[cap.index as usize] == "injection.content";
1109    let is_language = |cap: &&QueryCap| cn[cap.index as usize] == "injection.language";
1110
1111    let mut new_langs: Vec<(LangParts<'static>, Ranges)> = Vec::new();
1112
1113    let mut inj_captures = cursor.captures(injections, root, buf);
1114    while let Some((qm, _)) = inj_captures.next() {
1115        let Some(cap) = qm.captures.iter().find(is_content) else {
1116            continue;
1117        };
1118        let cap_range = cap.node.byte_range();
1119
1120        let props = injections.property_settings(qm.pattern_index);
1121        let Some(lang) = props
1122            .iter()
1123            .find_map(|p| {
1124                (p.key.as_ref() == "injection.language")
1125                    .then_some(p.value.as_ref().unwrap().to_string())
1126            })
1127            .or_else(|| {
1128                let cap = qm.captures.iter().find(is_language)?;
1129                Some(bytes.slices(cap.node.byte_range()).try_to_string().unwrap())
1130            })
1131        else {
1132            continue;
1133        };
1134
1135        let Ok(mut lang_parts) = lang_parts_of(&lang) else {
1136            continue;
1137        };
1138
1139        // You may want to set a new injections query, only for this capture.
1140        if let Some(prop) = props.iter().find(|p| p.key.as_ref() == "injection.query")
1141            && let Some(value) = prop.value.as_ref()
1142        {
1143            match query_from_path(&lang, value, lang_parts.1) {
1144                Ok(injections) => {
1145                    lang_parts.2.injections = injections;
1146                }
1147                Err(err) => context::error!("{err}"),
1148            }
1149        };
1150
1151        if let Some(inj) = injected_trees
1152            .iter_mut()
1153            .find(|inj| inj.lang_parts().0 == lang_parts.0)
1154        {
1155            inj.add_range(cap_range.clone());
1156        } else if let Some(new) = new_langs.iter_mut().find(|(lp, _)| lp.0 == lang_parts.0) {
1157            new.1.add(cap_range);
1158        } else {
1159            new_langs.push((lang_parts, Ranges::new(cap_range)));
1160        }
1161    }
1162
1163    for (lang_parts, ranges) in new_langs {
1164        injected_trees.push(InjectedTree::new(bytes, lang_parts, ranges));
1165    }
1166
1167    injected_trees.retain_mut(|inj| {
1168        if inj.is_empty() {
1169            false
1170        } else {
1171            inj.update_tree(bytes);
1172            inj.highlight_and_inject(bytes, tags, range.clone());
1173            true
1174        }
1175    });
1176
1177    let mut hi_captures = cursor.captures(highlights, root, buf);
1178    while let Some((qm, _)) = hi_captures.next() {
1179        let qm: &QueryMatch = qm;
1180        for cap in qm.captures.iter() {
1181            let ts_range = cap.node.range();
1182
1183            // Assume that an empty range must take up the whole line
1184            // Cuz sometimes it be like that
1185            let (form, priority) = forms[cap.index as usize];
1186            let range = ts_range.start_byte..ts_range.end_byte;
1187            tags.insert(tagger, range, form.to_tag(priority));
1188        }
1189    }
1190}
1191
1192/// Figures out injection changes
1193///
1194/// This function does not actually modify the injections (beyond
1195/// removing deinjected areas), as that will be done by the
1196/// highlight_and_inject function.
1197///
1198/// Its main purpose is to find the regions where changes have taken
1199/// place and add them to the ranges to update.
1200fn refactor_injections(
1201    ranges: &mut Ranges,
1202    (lang_parts, injected_trees): (LangParts, &mut Vec<InjectedTree>),
1203    (old, new): (Option<&Tree>, &Tree),
1204    bytes: &Bytes,
1205) {
1206    let buf = TsBuf(bytes);
1207    let (.., Queries { injections, .. }) = lang_parts;
1208    let mut cursor = QueryCursor::new();
1209
1210    let cn = injections.capture_names();
1211    let is_content = |cap: &&QueryCap| cn[cap.index as usize] == "injection.content";
1212
1213    let mut inj_ranges = Ranges::empty();
1214
1215    for range in ranges.iter() {
1216        cursor.set_byte_range(range.clone());
1217
1218        if let Some(old) = old {
1219            let mut inj_captures = cursor.captures(injections, old.root_node(), buf);
1220            while let Some((qm, _)) = inj_captures.next() {
1221                if let Some(cap) = qm.captures.iter().find(is_content) {
1222                    inj_ranges.add(cap.node.byte_range());
1223                    for inj in injected_trees.iter_mut() {
1224                        inj.remove_range(cap.node.byte_range());
1225                    }
1226                }
1227            }
1228        }
1229
1230        let mut inj_captures = cursor.captures(injections, new.root_node(), buf);
1231        while let Some((qm, _)) = inj_captures.next() {
1232            if let Some(cap) = qm.captures.iter().find(is_content) {
1233                inj_ranges.add(cap.node.byte_range());
1234            }
1235        }
1236    }
1237
1238    for inj in injected_trees.iter_mut() {
1239        inj.refactor_injections(ranges, bytes);
1240    }
1241
1242    ranges.merge(inj_ranges);
1243}
1244
1245type RemoteResult = Result<InnerTsParser, BufferTracker>;