zrx_id/id/matcher.rs
1// Copyright (c) 2025-2026 Zensical and contributors
2
3// SPDX-License-Identifier: MIT
4// All contributions are certified under the DCO
5
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to
8// deal in the Software without restriction, including without limitation the
9// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10// sell copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22// IN THE SOFTWARE.
23
24// ----------------------------------------------------------------------------
25
26//! Matcher.
27
28use std::str::FromStr;
29
30use super::convert::TryToId;
31
32mod builder;
33mod component;
34mod error;
35pub mod matches;
36
37pub use builder::Builder;
38use component::Component;
39pub use error::{Error, Result};
40pub use matches::Matches;
41
42// ----------------------------------------------------------------------------
43// Structs
44// ----------------------------------------------------------------------------
45
46/// Matcher.
47///
48/// Matchers provide efficient matching of identifiers against an arbitrary set
49/// of selectors in linear time, implemented through the use of the [`globset`]
50/// crate, which compiles globs into deterministic finite automata (DFA). Each
51/// [`Component`] of the matcher receives its own distinct [`GlobSet`][].
52///
53/// While components are matched one after another, all registered identifiers
54/// in a [`Component`] are matched in linear time, i.e., O(n), where n is the
55/// length of the component value. The [`Matches`] returned by each component
56/// are intersected, leaving only selectors that match all components. There
57/// are theoretical limits on the number of selectors that can be added to a
58/// [`Component`], so it can be necessary to split across multiple matchers if
59/// the number of selectors is high, i.e., 10,000 or more.
60///
61/// [`GlobSet`]: globset::GlobSet
62///
63/// # Examples
64///
65/// ```
66/// # use std::error::Error;
67/// # fn main() -> Result<(), Box<dyn Error>> {
68/// use zrx_id::{Id, Matcher};
69///
70/// // Create matcher builder and add selector
71/// let mut builder = Matcher::builder();
72/// builder.add(&"zrs:::::**/*.md:")?;
73///
74/// // Create matcher from builder
75/// let matcher = builder.build()?;
76///
77/// // Create identifier and match selector
78/// let id: Id = "zri:file:::docs:index.md:".parse()?;
79/// assert!(matcher.is_match(&id)?);
80/// # Ok(())
81/// # }
82/// ```
83#[derive(Clone, Debug, Default)]
84pub struct Matcher {
85 /// Component for provider.
86 provider: Component,
87 /// Component for resource.
88 resource: Component,
89 /// Component for variant.
90 variant: Component,
91 /// Component for context.
92 context: Component,
93 /// Component for location.
94 location: Component,
95 /// Component for selector.
96 fragment: Component,
97}
98
99// ----------------------------------------------------------------------------
100// Implementations
101// ----------------------------------------------------------------------------
102
103impl Matcher {
104 /// Returns whether the given identifier matches any selector.
105 ///
106 /// Components are compared in descending variability and their likelihood
107 /// for mismatch, starting with the `location`. This approach effectively
108 /// tries to short-circuits the comparison. Note that empty components are
109 /// considered wildcards, so they will always match.
110 ///
111 /// # Errors
112 ///
113 /// Returns [`Error::Id`] if the identifier is invalid.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// # use std::error::Error;
119 /// # fn main() -> Result<(), Box<dyn Error>> {
120 /// use zrx_id::{Id, Matcher};
121 ///
122 /// // Create matcher builder and add selector
123 /// let mut builder = Matcher::builder();
124 /// builder.add(&"zrs:::::**/*.md:")?;
125 ///
126 /// // Create matcher from builder
127 /// let matcher = builder.build()?;
128 ///
129 /// // Create identifier and match selector
130 /// let id: Id = "zri:file:::docs:index.md:".parse()?;
131 /// assert!(matcher.is_match(&id)?);
132 /// # Ok(())
133 /// # }
134 /// ```
135 #[inline]
136 pub fn is_match<T>(&self, id: &T) -> Result<bool>
137 where
138 T: TryToId,
139 {
140 self.matches(id).map(|matches| !matches.is_empty())
141 }
142
143 /// Returns the indices of selectors that match the identifier.
144 ///
145 /// This method compares each component of the identifier against the
146 /// corresponding component of a selector using the compiled globs, and
147 /// returns the indices of the matching selectors in the order they were
148 /// added to the [`Matcher`].
149 ///
150 /// Components are compared in descending variability and their likelihood
151 /// for mismatch, starting with the `location`. This approach effectively
152 /// tries to short-circuit the comparison. Note that empty components are
153 /// considered wildcards, so they will always match.
154 ///
155 /// # Errors
156 ///
157 /// Returns [`Error::Id`] if the identifier is invalid.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// # use std::error::Error;
163 /// # fn main() -> Result<(), Box<dyn Error>> {
164 /// use zrx_id::{Id, Matcher, Matches};
165 ///
166 /// // Create matcher builder and add selector
167 /// let mut builder = Matcher::builder();
168 /// builder.add(&"zrs:::::**/*.md:")?;
169 ///
170 /// // Create matcher from builder
171 /// let matcher = builder.build()?;
172 ///
173 /// // Create identifier and obtain matched selectors
174 /// let id: Id = "zri:file:::docs:index.md:".parse()?;
175 /// assert_eq!(matcher.matches(&id)?, Matches::from_iter([0]));
176 /// # Ok(())
177 /// # }
178 /// ```
179 #[allow(clippy::missing_panics_doc)]
180 pub fn matches<T>(&self, id: &T) -> Result<Matches>
181 where
182 T: TryToId,
183 {
184 let id = id.try_to_id()?;
185
186 // Query all components from highest to lowest variability, and
187 // intersect the resulting match sets, keeping only full matches
188 let mut opt: Option<Matches> = None;
189 for (component, value) in [
190 (&self.location, Some(id.location())),
191 (&self.context, Some(id.context())),
192 (&self.provider, Some(id.provider())),
193 (&self.resource, id.resource()),
194 (&self.fragment, id.fragment()),
195 (&self.variant, id.variant()),
196 ] {
197 // If the component doesn't have a value, we could theoretically
198 // ignore all non-empty patterns and only match the empty ones,
199 // but we would then miss selectors that use explicit `*` or `**`
200 // wildcards. We use the unlikely `U+FFFE` to test for those.
201 let path = value.as_deref().unwrap_or("\u{FFFE}");
202 let matches = component.matches(path);
203
204 // Intersect with or set as tracking match set
205 if let Some(tracked) = &mut opt {
206 tracked.intersect(&matches);
207 } else {
208 opt = Some(matches);
209 }
210 }
211
212 // Return matches
213 Ok(opt.expect("invariant"))
214 }
215}
216
217// ----------------------------------------------------------------------------
218// Trait implementations
219// ----------------------------------------------------------------------------
220
221impl FromStr for Matcher {
222 type Err = Error;
223
224 /// Attempts to create a matcher from a string.
225 ///
226 /// The string must adhere to the following format and include exactly six
227 /// `:` separators, even if some components are empty. All components are
228 /// optional, which means they can be left empty, which is equivalent to
229 /// setting them to a `**` wildcard.
230 ///
231 /// ``` text
232 /// zrs:<provider>:<resource>:<variant>:<context>:<location>:<fragment>
233 /// ```
234 ///
235 /// # Errors
236 ///
237 /// Returns [`Error::Id`] if the given string can't be parsed into a valid
238 /// selector, from which the matcher is then constructed.
239 ///
240 /// # Examples
241 ///
242 /// ```
243 /// # use std::error::Error;
244 /// # fn main() -> Result<(), Box<dyn Error>> {
245 /// use zrx_id::Matcher;
246 ///
247 /// // Create matcher from string
248 /// let matcher: Matcher = "zrs:::::**/*.md:".parse()?;
249 /// # Ok(())
250 /// # }
251 /// ```
252 fn from_str(value: &str) -> Result<Self> {
253 Matcher::builder().with(&value)?.build()
254 }
255}
256
257// ----------------------------------------------------------------------------
258// Tests
259// ----------------------------------------------------------------------------
260
261#[cfg(test)]
262mod tests {
263
264 mod is_match {
265 use crate::id::matcher::{Matcher, Result};
266
267 #[test]
268 fn handles_selectors() -> Result {
269 for selector in &[
270 "zrs:file:::docs:index.md:",
271 "zrs::::docs:index.md:",
272 "zrs:::::index.md:",
273 "zrs::::::",
274 ] {
275 let matcher: Matcher = selector.parse()?;
276 assert!(matcher.is_match(&"zri:file:::docs:index.md:")?);
277 }
278 Ok(())
279 }
280
281 #[test]
282 fn handles_wildcards() -> Result {
283 for selector in &[
284 "zrs:file:::docs:*.md:",
285 "zrs:::::*.md:",
286 "zrs:*::::*.md:",
287 "zrs:*:*:*:*:*:",
288 ] {
289 let matcher: Matcher = selector.parse()?;
290 assert!(matcher.is_match(&"zri:file:::docs:index.md:")?);
291 }
292 Ok(())
293 }
294
295 #[test]
296 fn handles_optionals() -> Result {
297 for selector in &[
298 "zrs:{git,file}:::{docs}:index.md:",
299 "zrs::::docs:{index,about}.md:",
300 "zrs:::::index.{md,rst}:",
301 "zrs:::::{*}:",
302 ] {
303 let matcher: Matcher = selector.parse()?;
304 assert!(matcher.is_match(&"zri:file:::docs:index.md:")?);
305 }
306 Ok(())
307 }
308
309 #[test]
310 fn handles_non_matches() -> Result {
311 for selector in &[
312 "zrs:file:::{docs}:index.md:anchor",
313 "zrs:{git,file}:master::::",
314 "zrs:::::about.md:",
315 "zrs::::::anchor",
316 ] {
317 let matcher: Matcher = selector.parse()?;
318 assert!(!matcher.is_match(&"zri:file:::docs:index.md:")?);
319 }
320 Ok(())
321 }
322 }
323
324 mod matches {
325 use crate::id::matcher::{Matcher, Matches, Result};
326
327 #[test]
328 fn handles_selectors() -> Result {
329 for selector in &[
330 "zrs:file:::docs:index.md:",
331 "zrs::::docs:index.md:",
332 "zrs:::::index.md:",
333 "zrs::::::",
334 ] {
335 let matcher: Matcher = selector.parse()?;
336 assert_eq!(
337 matcher.matches(&"zri:file:::docs:index.md:")?,
338 Matches::from_iter([0])
339 );
340 }
341 Ok(())
342 }
343
344 #[test]
345 fn handles_wildcards() -> Result {
346 for selector in &[
347 "zrs:file:::docs:*.md:",
348 "zrs:::::*.md:",
349 "zrs:*::::*.md:",
350 "zrs:*:*:*:*:*:",
351 ] {
352 let matcher: Matcher = selector.parse()?;
353 assert_eq!(
354 matcher.matches(&"zri:file:::docs:index.md:")?,
355 Matches::from_iter([0])
356 );
357 }
358 Ok(())
359 }
360
361 #[test]
362 fn handles_optionals() -> Result {
363 for selector in &[
364 "zrs:{git,file}:::{docs}:index.md:",
365 "zrs::::docs:{index,about}.md:",
366 "zrs:::::index.{md,rst}:",
367 "zrs:::::{*}:",
368 ] {
369 let matcher: Matcher = selector.parse()?;
370 assert_eq!(
371 matcher.matches(&"zri:file:::docs:index.md:")?,
372 Matches::from_iter([0])
373 );
374 }
375 Ok(())
376 }
377
378 #[test]
379 fn handles_non_matches() -> Result {
380 for selector in &[
381 "zrs:file:::{docs}:index.md:anchor",
382 "zrs:{git,file}:master::::",
383 "zrs:::::about.md:",
384 "zrs::::::anchor",
385 ] {
386 let matcher: Matcher = selector.parse()?;
387 assert_eq!(
388 matcher.matches(&"zri:file:::docs:index.md:")?,
389 Matches::new()
390 );
391 }
392 Ok(())
393 }
394 }
395}