duat_match_pairs/lib.rs
1//! A simple [`Plugin`] to match pairs of parentheses
2//!
3//! # Installation
4//!
5//! This is a default plugin, but you can add it manually in order to
6//! configure it:
7//!
8//! ```bash
9//! cargo add duat-match-pairs@"*" --rename match-pairs
10//! ```
11//!
12//! Or, if you are using a `--git-deps` version of duat, do this:
13//!
14//! ```bash
15//! cargo add --git https://github.com/AhoyISki/duat-match-pairs --rename match-pairs
16//! ```
17//!
18//! # Usage
19//!
20//! In order to make use of it, just add the following to your `setup`
21//! function:
22//!
23//! ```rust
24//! # #[macro_use]
25//! # macro_rules! setup_duat { ($duat:ident) => {} }
26//! # mod duat { pub mod prelude { pub fn plug<P>(plugin: P) {} }}
27//! setup_duat!(setup);
28//! use duat::prelude::*;
29//!
30//! fn setup() {
31//! plug(duat_match_pairs::MatchPairs::new());
32//! }
33//! ```
34//!
35//! In this plugin, there are two types of "pairs", these are the
36//! normal pairs and the treesitter pairs. The normal pairs match
37//! based on the content of the text itself, so for example, in this
38//! situation:
39//!
40//! ```rust
41//! let my_string = "(this is my string)";
42//! ```
43//!
44//! There is a normal pair within the string of `(`,`)`. However,
45//! there is no treesitter pair in there, because a treesitter pair
46//! only matches if the pairs are on the language's syntax tree.
47//!
48//! This distinction allows for some combination of pairings that can
49//! also be used as non pairs. For example, in Rust, `<`,`>` is a pair
50//! only on type arguments and things of the sort, in other cases, it
51//! is just a comparison operator. That's where the treesitter pairs
52//! come in, as they can identify when it is an actual pair, or just
53//! the operator.
54//!
55//! In order to change what counts as a normal pair and what counts as
56//! a treesitter pair, you can add the following to the setup
57//! function:
58//!
59//! ```rust
60//! # #[macro_use]
61//! # macro_rules! setup_duat { ($duat:ident) => {} }
62//! # mod duat { pub mod prelude { pub fn plug<P>(plugin: P) {} }}
63//! setup_duat!(setup);
64//! use duat::prelude::*;
65//!
66//! fn setup() {
67//! plug(
68//! duat_match_pairs::MatchPairs::new()
69//! .match_pairs([["\\(", "\\)"], ["\\{", "\\}"], ["\\[", "\\]"]])
70//! .match_ts_pairs([["<", ">"], ["|", "|"]]),
71//! );
72//! }
73//! ```
74//!
75//! Two things to note here:
76//!
77//! - For now, normal pairs only support one character regexes.
78//! - Also for now, normal pairs use regex, while treesitter pairs use
79//! strings.
80//!
81//! [`Plugin`]: duat_core::Plugin
82use std::{
83 collections::HashMap,
84 ops::Range,
85 sync::{LazyLock, Mutex},
86};
87
88use duat_core::{
89 Plugin, Plugins,
90 context::Handle,
91 data::Pass,
92 form,
93 hook::{self, BufferUpdated},
94 text::{Point, RegexHaystack, Tagger},
95 ui::Widget,
96};
97use duat_filetype::FileType;
98use duat_treesitter::TsHandle;
99
100/// highlight the match of delimiters under [`Selection`]s
101///
102/// [`Selection`]: duat_core::mode::Selection
103#[derive(Clone)]
104pub struct MatchPairs {
105 ts_and_reg: Vec<[&'static [u8]; 2]>,
106 ts_only: Vec<[&'static [u8]; 2]>,
107 escaped: Vec<[&'static str; 2]>,
108}
109
110impl MatchPairs {
111 /// Returns a new [`MatchPairs`]
112 pub fn new() -> Self {
113 Self {
114 ts_and_reg: vec![[b"(", b")"], [b"{", b"}"], [b"[", b"]"]],
115 // TODO: Add more filetypes
116 ts_only: vec![[b"<", b">"]],
117 escaped: vec![["\\(", "\\)"], ["\\{", "\\}"], ["\\[", "\\]"]],
118 }
119 }
120
121 /// Match these pairs _always_
122 ///
123 /// The counterpart to this is [`match_ts_pairs`], which will
124 /// match the pairs only if they are tree-sitter pairings.
125 ///
126 /// Matching always, as opposed to only tree-sitter pairs can be
127 /// useful if you don't have tree-sitter available, or if you are
128 /// matching inside comments or strings, where pairs wouldn't show
129 /// up as nodes in tree-sitter.
130 ///
131 /// [`match_ts_pairs`]: Self::match_ts_pairs
132 pub fn match_pairs(self, pairs: impl IntoIterator<Item = [&'static str; 2]>) -> Self {
133 let ts_and_bytes: Vec<[&'static [u8]; 2]> = pairs
134 .into_iter()
135 .map(|arr| arr.map(str::as_bytes))
136 .collect();
137 let escaped = ts_and_bytes
138 .iter()
139 .map(|[l, r]| [escape(l), escape(r)])
140 .collect();
141
142 Self {
143 ts_and_reg: ts_and_bytes,
144 escaped,
145 ..self
146 }
147 }
148
149 /// Match these pairs _only_ when they are tree-sitter pairs
150 ///
151 /// This can be useful for situations where pairings can also be
152 /// interpreted as other things, like `|`, which can be a
153 /// delimiter for parameters in a rust closure, but is an "or"
154 /// operator most of the time, or `<`/`>`, which are comparison
155 /// operators most of the time, but can also delimit things like
156 /// types in some languages.
157 ///
158 /// The counterpart to this is [`match_pairs`], which will always
159 /// match, even when the pair is not a tree-sitter pair.
160 ///
161 /// [`match_pairs`]: Self::match_pairs
162 pub fn match_ts_pairs(self, pairs: impl IntoIterator<Item = [&'static str; 2]>) -> Self {
163 Self {
164 ts_only: pairs
165 .into_iter()
166 .map(|arr| arr.map(str::as_bytes))
167 .collect(),
168 ..self
169 }
170 }
171}
172
173impl Plugin for MatchPairs {
174 fn plug(self, plugins: &Plugins) {
175 plugins.require::<duat_treesitter::TreeSitter>();
176
177 hook::add::<BufferUpdated>(move |pa, handle| {
178 let file = handle.write(pa);
179
180 let match_pairs_ref = MatchPairsRef {
181 ts_and_reg: &self.ts_and_reg,
182 ts_only: if let Some(path) = file.path_set()
183 && let Some(filetype) = path.filetype()
184 {
185 match filetype {
186 "rust" => &[[b"<".as_slice(), b">"], [b"|", b"|"]],
187 _ => self.ts_only.as_slice(),
188 }
189 } else {
190 &self.ts_only
191 },
192 escaped: &self.escaped,
193 };
194
195 let range = handle.full_printed_range(pa);
196 match_pairs_ref.update(pa, handle, range);
197 });
198 }
199}
200
201impl Default for MatchPairs {
202 fn default() -> Self {
203 Self::new()
204 }
205}
206
207struct MatchPairsRef<'mp> {
208 ts_and_reg: &'mp [[&'static [u8]; 2]],
209 ts_only: &'mp [[&'static [u8]; 2]],
210 escaped: &'mp [[&'static str; 2]],
211}
212
213impl MatchPairsRef<'_> {
214 fn update(self, pa: &mut Pass, handle: &Handle, range: Range<Point>) {
215 fn ends(str: &[u8]) -> impl Fn(&[&[u8]; 2]) -> bool {
216 move |delims| delims.contains(&str)
217 }
218
219 let buffer = handle.write(pa);
220
221 buffer.text_mut().remove_tags(*PAREN_TAGGER, ..);
222
223 let selections: Vec<_> = buffer
224 .selections()
225 .iter_within(range)
226 .map(|(_, sel, is_main)| (sel.byte_range(buffer.bytes()), is_main))
227 .collect();
228
229 'selections: for (c_range, is_main) in selections {
230 let str: Vec<u8> = handle.text(pa).bytes().slices(c_range.clone()).collect();
231
232 // TODO: Support multi-character pairs
233 let (delims, escaped) = if let Some(i) = self.ts_and_reg.iter().position(ends(&str)) {
234 (self.ts_and_reg[i], Some(self.escaped[i]))
235 } else if let Some(i) = self.ts_only.iter().position(ends(&str)) {
236 (self.ts_only[i], None)
237 } else {
238 continue;
239 };
240
241 let get_ts_ranges = |parser: &duat_treesitter::Parser| {
242 let node = parser
243 .root_node()
244 .descendant_for_byte_range(c_range.start, c_range.end)
245 .and_then(|node| {
246 delims
247 .iter()
248 .position(|d| *d == node.grammar_name().as_bytes())
249 .zip(Some(node))
250 });
251 let ((delim_side, node), parent) =
252 node.and_then(|(ds, n)| Some((ds, n)).zip(n.parent()))?;
253
254 let mut c = parent.walk();
255
256 if delim_side == 0
257 && (c.goto_first_child() && c.node() == node && c.goto_parent())
258 && (c.goto_last_child() && c.node().grammar_name().as_bytes() == delims[1])
259 {
260 Some((node.byte_range(), c.node().byte_range()))
261 } else if (c.goto_last_child() && c.node() == node && c.goto_parent())
262 && (c.goto_first_child() && c.node().grammar_name().as_bytes() == delims[0])
263 {
264 Some((c.node().byte_range(), node.byte_range()))
265 } else {
266 None
267 }
268 };
269
270 let (start_range, end_range) = if let Some((parser, _)) = handle.get_ts_parser(pa)
271 && let Some(ranges) = get_ts_ranges(parser)
272 {
273 ranges
274 } else if let Some(escaped) = escaped {
275 if str == delims[0] {
276 let mut iter = handle
277 .text(pa)
278 .bytes()
279 .search(escaped)
280 .range(c_range.start..);
281 let mut bounds = 0;
282
283 loop {
284 let Some((i, m_range)) = iter.next() else {
285 continue 'selections;
286 };
287 bounds = (bounds + (i == 0) as usize) - (i == 1) as usize;
288 if bounds == 0 {
289 break (c_range, m_range);
290 }
291 }
292 } else {
293 let mut iter = handle.text(pa).bytes().search(escaped).range(..c_range.end);
294 let mut bounds = 0;
295
296 loop {
297 let Some((i, m_range)) = iter.next_back() else {
298 continue 'selections;
299 };
300 bounds = (bounds + (i == 1) as usize) - (i == 0) as usize;
301 if bounds == 0 {
302 break (m_range, c_range);
303 }
304 }
305 }
306 } else {
307 continue;
308 };
309
310 let buffer = handle.write(pa);
311
312 let id = if is_main {
313 form::id_of!("matched_pair.main.start")
314 } else {
315 form::id_of!("matched_pair.extra.start")
316 };
317 buffer
318 .text_mut()
319 .insert_tag(*PAREN_TAGGER, start_range, id.to_tag(99));
320
321 let id = if is_main {
322 form::id_of!("matched_pair.main.end")
323 } else {
324 form::id_of!("matched_pair.extra.end")
325 };
326 buffer
327 .text_mut()
328 .insert_tag(*PAREN_TAGGER, end_range, id.to_tag(99));
329 }
330 }
331}
332
333static PAREN_TAGGER: LazyLock<Tagger> = Tagger::new_static();
334
335/// Escapes regex pattern characters.
336fn escape(str: &'static [u8]) -> &'static str {
337 static TOKENS: &[u8] = b"(){}[]^$.+*?|";
338 static ESCAPED_STRS: LazyLock<Mutex<HashMap<Vec<u8>, &str>>> = LazyLock::new(Mutex::default);
339
340 let mut escaped_strs = ESCAPED_STRS.lock().unwrap();
341
342 if let Some(escaped) = escaped_strs.get(str) {
343 escaped
344 } else {
345 // SAFETY: This str would have originally come from a &str
346 let mut escaped = unsafe { str::from_utf8_unchecked(str) }.to_string();
347 for (i, _) in str.iter().enumerate().filter(|(_, b)| TOKENS.contains(b)) {
348 escaped.insert(i, '\\');
349 }
350
351 let escaped = escaped.leak();
352 escaped_strs.insert(str.to_vec(), escaped);
353
354 escaped
355 }
356}