duat_match_pairs/
lib.rs

1//! A simple [`Plugin`] to match pairs of parentheses
2//!
3//! # Installation
4//!
5//! This is a default plugin, but you can add it manually in order to
6//! configure it:
7//!
8//! ```bash
9//! cargo add duat-match-pairs@"*" --rename match-pairs
10//! ```
11//!
12//! Or, if you are using a `--git-deps` version of duat, do this:
13//!
14//! ```bash
15//! cargo add --git https://github.com/AhoyISki/duat-match-pairs --rename match-pairs
16//! ```
17//!
18//! # Usage
19//!
20//! In order to make use of it, just add the following to your `setup`
21//! function:
22//!
23//! ```rust
24//! # #[macro_use]
25//! # macro_rules! setup_duat { ($duat:ident) => {} }
26//! # mod duat { pub mod prelude { pub fn plug<P>(plugin: P) {} }}
27//! setup_duat!(setup);
28//! use duat::prelude::*;
29//!
30//! fn setup() {
31//!     plug(duat_match_pairs::MatchPairs::new());
32//! }
33//! ```
34//!
35//! In this plugin, there are two types of "pairs", these are the
36//! normal pairs and the treesitter pairs. The normal pairs match
37//! based on the content of the text itself, so for example, in this
38//! situation:
39//!
40//! ```rust
41//! let my_string = "(this is my string)";
42//! ```
43//!
44//! There is a normal pair within the string of `(`,`)`. However,
45//! there is no treesitter pair in there, because a treesitter pair
46//! only matches if the pairs are on the language's syntax tree.
47//!
48//! This distinction allows for some combination of pairings that can
49//! also be used as non pairs. For example, in Rust, `<`,`>` is a pair
50//! only on type arguments and things of the sort, in other cases, it
51//! is just a comparison operator. That's where the treesitter pairs
52//! come in, as they can identify when it is an actual pair, or just
53//! the operator.
54//!
55//! In order to change what counts as a normal pair and what counts as
56//! a treesitter pair, you can add the following to the setup
57//! function:
58//!
59//! ```rust
60//! # #[macro_use]
61//! # macro_rules! setup_duat { ($duat:ident) => {} }
62//! # mod duat { pub mod prelude { pub fn plug<P>(plugin: P) {} }}
63//! setup_duat!(setup);
64//! use duat::prelude::*;
65//!
66//! fn setup() {
67//!     plug(
68//!         duat_match_pairs::MatchPairs::new()
69//!             .match_pairs([["\\(", "\\)"], ["\\{", "\\}"], ["\\[", "\\]"]])
70//!             .match_ts_pairs([["<", ">"], ["|", "|"]]),
71//!     );
72//! }
73//! ```
74//!
75//! Two things to note here:
76//!
77//! - For now, normal pairs only support one character regexes.
78//! - Also for now, normal pairs use regex, while treesitter pairs use
79//!   strings.
80//!
81//! [`Plugin`]: duat_core::Plugin
82use std::{
83    collections::HashMap,
84    ops::Range,
85    sync::{LazyLock, Mutex},
86};
87
88use duat_core::{
89    Plugin, Plugins,
90    context::Handle,
91    data::Pass,
92    form,
93    hook::{self, BufferUpdated},
94    text::{Point, RegexHaystack, Tagger},
95    ui::Widget,
96};
97use duat_filetype::FileType;
98use duat_treesitter::TsHandle;
99
100/// highlight the match of delimiters under [`Selection`]s
101///
102/// [`Selection`]: duat_core::mode::Selection
103#[derive(Clone)]
104pub struct MatchPairs {
105    ts_and_reg: Vec<[&'static [u8]; 2]>,
106    ts_only: Vec<[&'static [u8]; 2]>,
107    escaped: Vec<[&'static str; 2]>,
108}
109
110impl MatchPairs {
111    /// Returns a new [`MatchPairs`]
112    pub fn new() -> Self {
113        Self {
114            ts_and_reg: vec![[b"(", b")"], [b"{", b"}"], [b"[", b"]"]],
115            // TODO: Add more filetypes
116            ts_only: vec![[b"<", b">"]],
117            escaped: vec![["\\(", "\\)"], ["\\{", "\\}"], ["\\[", "\\]"]],
118        }
119    }
120
121    /// Match these pairs _always_
122    ///
123    /// The counterpart to this is [`match_ts_pairs`], which will
124    /// match the pairs only if they are tree-sitter pairings.
125    ///
126    /// Matching always, as opposed to only tree-sitter pairs can be
127    /// useful if you don't have tree-sitter available, or if you are
128    /// matching inside comments or strings, where pairs wouldn't show
129    /// up as nodes in tree-sitter.
130    ///
131    /// [`match_ts_pairs`]: Self::match_ts_pairs
132    pub fn match_pairs(self, pairs: impl IntoIterator<Item = [&'static str; 2]>) -> Self {
133        let ts_and_bytes: Vec<[&'static [u8]; 2]> = pairs
134            .into_iter()
135            .map(|arr| arr.map(str::as_bytes))
136            .collect();
137        let escaped = ts_and_bytes
138            .iter()
139            .map(|[l, r]| [escape(l), escape(r)])
140            .collect();
141
142        Self {
143            ts_and_reg: ts_and_bytes,
144            escaped,
145            ..self
146        }
147    }
148
149    /// Match these pairs _only_ when they are tree-sitter pairs
150    ///
151    /// This can be useful for situations where pairings can also be
152    /// interpreted as other things, like `|`, which can be a
153    /// delimiter for parameters in a rust closure, but is an "or"
154    /// operator most of the time, or `<`/`>`, which are comparison
155    /// operators most of the time, but can also delimit things like
156    /// types in some languages.
157    ///
158    /// The counterpart to this is [`match_pairs`], which will always
159    /// match, even when the pair is not a tree-sitter pair.
160    ///
161    /// [`match_pairs`]: Self::match_pairs
162    pub fn match_ts_pairs(self, pairs: impl IntoIterator<Item = [&'static str; 2]>) -> Self {
163        Self {
164            ts_only: pairs
165                .into_iter()
166                .map(|arr| arr.map(str::as_bytes))
167                .collect(),
168            ..self
169        }
170    }
171}
172
173impl Plugin for MatchPairs {
174    fn plug(self, plugins: &Plugins) {
175        plugins.require::<duat_treesitter::TreeSitter>();
176
177        hook::add::<BufferUpdated>(move |pa, handle| {
178            let file = handle.write(pa);
179
180            let match_pairs_ref = MatchPairsRef {
181                ts_and_reg: &self.ts_and_reg,
182                ts_only: if let Some(path) = file.path_set()
183                    && let Some(filetype) = path.filetype()
184                {
185                    match filetype {
186                        "rust" => &[[b"<".as_slice(), b">"], [b"|", b"|"]],
187                        _ => self.ts_only.as_slice(),
188                    }
189                } else {
190                    &self.ts_only
191                },
192                escaped: &self.escaped,
193            };
194
195            let range = handle.full_printed_range(pa);
196            match_pairs_ref.update(pa, handle, range);
197        });
198    }
199}
200
201impl Default for MatchPairs {
202    fn default() -> Self {
203        Self::new()
204    }
205}
206
207struct MatchPairsRef<'mp> {
208    ts_and_reg: &'mp [[&'static [u8]; 2]],
209    ts_only: &'mp [[&'static [u8]; 2]],
210    escaped: &'mp [[&'static str; 2]],
211}
212
213impl MatchPairsRef<'_> {
214    fn update(self, pa: &mut Pass, handle: &Handle, range: Range<Point>) {
215        fn ends(str: &[u8]) -> impl Fn(&[&[u8]; 2]) -> bool {
216            move |delims| delims.contains(&str)
217        }
218
219        let buffer = handle.write(pa);
220
221        buffer.text_mut().remove_tags(*PAREN_TAGGER, ..);
222
223        let selections: Vec<_> = buffer
224            .selections()
225            .iter_within(range)
226            .map(|(_, sel, is_main)| (sel.byte_range(buffer.bytes()), is_main))
227            .collect();
228
229        'selections: for (c_range, is_main) in selections {
230            let str: Vec<u8> = handle.text(pa).bytes().slices(c_range.clone()).collect();
231
232            // TODO: Support multi-character pairs
233            let (delims, escaped) = if let Some(i) = self.ts_and_reg.iter().position(ends(&str)) {
234                (self.ts_and_reg[i], Some(self.escaped[i]))
235            } else if let Some(i) = self.ts_only.iter().position(ends(&str)) {
236                (self.ts_only[i], None)
237            } else {
238                continue;
239            };
240
241            let get_ts_ranges = |parser: &duat_treesitter::Parser| {
242                let node = parser
243                    .root_node()
244                    .descendant_for_byte_range(c_range.start, c_range.end)
245                    .and_then(|node| {
246                        delims
247                            .iter()
248                            .position(|d| *d == node.grammar_name().as_bytes())
249                            .zip(Some(node))
250                    });
251                let ((delim_side, node), parent) =
252                    node.and_then(|(ds, n)| Some((ds, n)).zip(n.parent()))?;
253
254                let mut c = parent.walk();
255
256                if delim_side == 0
257                    && (c.goto_first_child() && c.node() == node && c.goto_parent())
258                    && (c.goto_last_child() && c.node().grammar_name().as_bytes() == delims[1])
259                {
260                    Some((node.byte_range(), c.node().byte_range()))
261                } else if (c.goto_last_child() && c.node() == node && c.goto_parent())
262                    && (c.goto_first_child() && c.node().grammar_name().as_bytes() == delims[0])
263                {
264                    Some((c.node().byte_range(), node.byte_range()))
265                } else {
266                    None
267                }
268            };
269
270            let (start_range, end_range) = if let Some((parser, _)) = handle.get_ts_parser(pa)
271                && let Some(ranges) = get_ts_ranges(parser)
272            {
273                ranges
274            } else if let Some(escaped) = escaped {
275                if str == delims[0] {
276                    let mut iter = handle
277                        .text(pa)
278                        .bytes()
279                        .search(escaped)
280                        .range(c_range.start..);
281                    let mut bounds = 0;
282
283                    loop {
284                        let Some((i, m_range)) = iter.next() else {
285                            continue 'selections;
286                        };
287                        bounds = (bounds + (i == 0) as usize) - (i == 1) as usize;
288                        if bounds == 0 {
289                            break (c_range, m_range);
290                        }
291                    }
292                } else {
293                    let mut iter = handle.text(pa).bytes().search(escaped).range(..c_range.end);
294                    let mut bounds = 0;
295
296                    loop {
297                        let Some((i, m_range)) = iter.next_back() else {
298                            continue 'selections;
299                        };
300                        bounds = (bounds + (i == 1) as usize) - (i == 0) as usize;
301                        if bounds == 0 {
302                            break (m_range, c_range);
303                        }
304                    }
305                }
306            } else {
307                continue;
308            };
309
310            let buffer = handle.write(pa);
311
312            let id = if is_main {
313                form::id_of!("matched_pair.main.start")
314            } else {
315                form::id_of!("matched_pair.extra.start")
316            };
317            buffer
318                .text_mut()
319                .insert_tag(*PAREN_TAGGER, start_range, id.to_tag(99));
320
321            let id = if is_main {
322                form::id_of!("matched_pair.main.end")
323            } else {
324                form::id_of!("matched_pair.extra.end")
325            };
326            buffer
327                .text_mut()
328                .insert_tag(*PAREN_TAGGER, end_range, id.to_tag(99));
329        }
330    }
331}
332
333static PAREN_TAGGER: LazyLock<Tagger> = Tagger::new_static();
334
335/// Escapes regex pattern characters.
336fn escape(str: &'static [u8]) -> &'static str {
337    static TOKENS: &[u8] = b"(){}[]^$.+*?|";
338    static ESCAPED_STRS: LazyLock<Mutex<HashMap<Vec<u8>, &str>>> = LazyLock::new(Mutex::default);
339
340    let mut escaped_strs = ESCAPED_STRS.lock().unwrap();
341
342    if let Some(escaped) = escaped_strs.get(str) {
343        escaped
344    } else {
345        // SAFETY: This str would have originally come from a &str
346        let mut escaped = unsafe { str::from_utf8_unchecked(str) }.to_string();
347        for (i, _) in str.iter().enumerate().filter(|(_, b)| TOKENS.contains(b)) {
348            escaped.insert(i, '\\');
349        }
350
351        let escaped = escaped.leak();
352        escaped_strs.insert(str.to_vec(), escaped);
353
354        escaped
355    }
356}