Skip to main content

zrx_id/id/
matcher.rs

1// Copyright (c) 2025-2026 Zensical and contributors
2
3// SPDX-License-Identifier: MIT
4// All contributions are certified under the DCO
5
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to
8// deal in the Software without restriction, including without limitation the
9// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10// sell copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22// IN THE SOFTWARE.
23
24// ----------------------------------------------------------------------------
25
26//! Matcher.
27
28use std::str::FromStr;
29
30use super::convert::TryToId;
31
32mod builder;
33mod component;
34mod error;
35pub mod matches;
36
37pub use builder::Builder;
38use component::Component;
39pub use error::{Error, Result};
40pub use matches::Matches;
41
42// ----------------------------------------------------------------------------
43// Structs
44// ----------------------------------------------------------------------------
45
46/// Matcher.
47///
48/// Matchers provide efficient matching of identifiers against an arbitrary set
49/// of selectors in linear time, implemented through the use of the [`globset`]
50/// crate, which compiles globs into deterministic finite automata (DFA). Each
51/// [`Component`] of the matcher receives its own distinct [`GlobSet`][].
52///
53/// While components are matched one after another, all registered identifiers
54/// in a [`Component`] are matched in linear time, i.e., O(n), where n is the
55/// length of the component value. The [`Matches`] returned by each component
56/// are intersected, leaving only selectors that match all components. There
57/// are theoretical limits on the number of selectors that can be added to a
58/// [`Component`], so it can be necessary to split across multiple matchers if
59/// the number of selectors is high, i.e., 10,000 or more.
60///
61/// [`GlobSet`]: globset::GlobSet
62///
63/// # Examples
64///
65/// ```
66/// # use std::error::Error;
67/// # fn main() -> Result<(), Box<dyn Error>> {
68/// use zrx_id::{Id, Matcher};
69///
70/// // Create matcher builder and add selector
71/// let mut builder = Matcher::builder();
72/// builder.add(&"zrs:::::**/*.md:")?;
73///
74/// // Create matcher from builder
75/// let matcher = builder.build()?;
76///
77/// // Create identifier and match selector
78/// let id: Id = "zri:file:::docs:index.md:".parse()?;
79/// assert!(matcher.is_match(&id)?);
80/// # Ok(())
81/// # }
82/// ```
83#[derive(Clone, Debug, Default)]
84pub struct Matcher {
85    /// Component for provider.
86    provider: Component,
87    /// Component for resource.
88    resource: Component,
89    /// Component for variant.
90    variant: Component,
91    /// Component for context.
92    context: Component,
93    /// Component for location.
94    location: Component,
95    /// Component for selector.
96    fragment: Component,
97}
98
99// ----------------------------------------------------------------------------
100// Implementations
101// ----------------------------------------------------------------------------
102
103impl Matcher {
104    /// Returns whether the given identifier matches any selector.
105    ///
106    /// Components are compared in descending variability and their likelihood
107    /// for mismatch, starting with the `location`. This approach effectively
108    /// tries to short-circuits the comparison. Note that empty components are
109    /// considered wildcards, so they will always match.
110    ///
111    /// # Errors
112    ///
113    /// Returns [`Error::Id`] if the identifier is invalid.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// # use std::error::Error;
119    /// # fn main() -> Result<(), Box<dyn Error>> {
120    /// use zrx_id::{Id, Matcher};
121    ///
122    /// // Create matcher builder and add selector
123    /// let mut builder = Matcher::builder();
124    /// builder.add(&"zrs:::::**/*.md:")?;
125    ///
126    /// // Create matcher from builder
127    /// let matcher = builder.build()?;
128    ///
129    /// // Create identifier and match selector
130    /// let id: Id = "zri:file:::docs:index.md:".parse()?;
131    /// assert!(matcher.is_match(&id)?);
132    /// # Ok(())
133    /// # }
134    /// ```
135    #[inline]
136    pub fn is_match<T>(&self, id: &T) -> Result<bool>
137    where
138        T: TryToId,
139    {
140        self.matches(id).map(|matches| !matches.is_empty())
141    }
142
143    /// Returns the indices of selectors that match the identifier.
144    ///
145    /// This method compares each component of the identifier against the
146    /// corresponding component of a selector using the compiled globs, and
147    /// returns the indices of the matching selectors in the order they were
148    /// added to the [`Matcher`].
149    ///
150    /// Components are compared in descending variability and their likelihood
151    /// for mismatch, starting with the `location`. This approach effectively
152    /// tries to short-circuit the comparison. Note that empty components are
153    /// considered wildcards, so they will always match.
154    ///
155    /// # Errors
156    ///
157    /// Returns [`Error::Id`] if the identifier is invalid.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// # use std::error::Error;
163    /// # fn main() -> Result<(), Box<dyn Error>> {
164    /// use zrx_id::{Id, Matcher, Matches};
165    ///
166    /// // Create matcher builder and add selector
167    /// let mut builder = Matcher::builder();
168    /// builder.add(&"zrs:::::**/*.md:")?;
169    ///
170    /// // Create matcher from builder
171    /// let matcher = builder.build()?;
172    ///
173    /// // Create identifier and obtain matched selectors
174    /// let id: Id = "zri:file:::docs:index.md:".parse()?;
175    /// assert_eq!(matcher.matches(&id)?, Matches::from_iter([0]));
176    /// # Ok(())
177    /// # }
178    /// ```
179    #[allow(clippy::missing_panics_doc)]
180    pub fn matches<T>(&self, id: &T) -> Result<Matches>
181    where
182        T: TryToId,
183    {
184        let id = id.try_to_id()?;
185
186        // Query all components from highest to lowest variability, and
187        // intersect the resulting match sets, keeping only full matches
188        let mut opt: Option<Matches> = None;
189        for (component, value) in [
190            (&self.location, Some(id.location())),
191            (&self.context, Some(id.context())),
192            (&self.provider, Some(id.provider())),
193            (&self.resource, id.resource()),
194            (&self.fragment, id.fragment()),
195            (&self.variant, id.variant()),
196        ] {
197            // If the component doesn't have a value, we could theoretically
198            // ignore all non-empty patterns and only match the empty ones,
199            // but we would then miss selectors that use explicit `*` or `**`
200            // wildcards. We use the unlikely `U+FFFE` to test for those.
201            let path = value.as_deref().unwrap_or("\u{FFFE}");
202            let matches = component.matches(path);
203
204            // Intersect with or set as tracking match set
205            if let Some(tracked) = &mut opt {
206                tracked.intersect(&matches);
207            } else {
208                opt = Some(matches);
209            }
210        }
211
212        // Return matches
213        Ok(opt.expect("invariant"))
214    }
215}
216
217// ----------------------------------------------------------------------------
218// Trait implementations
219// ----------------------------------------------------------------------------
220
221impl FromStr for Matcher {
222    type Err = Error;
223
224    /// Attempts to create a matcher from a string.
225    ///
226    /// The string must adhere to the following format and include exactly six
227    /// `:` separators, even if some components are empty. All components are
228    /// optional, which means they can be left empty, which is equivalent to
229    /// setting them to a `**` wildcard.
230    ///
231    /// ``` text
232    /// zrs:<provider>:<resource>:<variant>:<context>:<location>:<fragment>
233    /// ```
234    ///
235    /// # Errors
236    ///
237    /// Returns [`Error::Id`] if the given string can't be parsed into a valid
238    /// selector, from which the matcher is then constructed.
239    ///
240    /// # Examples
241    ///
242    /// ```
243    /// # use std::error::Error;
244    /// # fn main() -> Result<(), Box<dyn Error>> {
245    /// use zrx_id::Matcher;
246    ///
247    /// // Create matcher from string
248    /// let matcher: Matcher = "zrs:::::**/*.md:".parse()?;
249    /// # Ok(())
250    /// # }
251    /// ```
252    fn from_str(value: &str) -> Result<Self> {
253        Matcher::builder().with(&value)?.build()
254    }
255}
256
257// ----------------------------------------------------------------------------
258// Tests
259// ----------------------------------------------------------------------------
260
261#[cfg(test)]
262mod tests {
263
264    mod is_match {
265        use crate::id::matcher::{Matcher, Result};
266
267        #[test]
268        fn handles_selectors() -> Result {
269            for selector in &[
270                "zrs:file:::docs:index.md:",
271                "zrs::::docs:index.md:",
272                "zrs:::::index.md:",
273                "zrs::::::",
274            ] {
275                let matcher: Matcher = selector.parse()?;
276                assert!(matcher.is_match(&"zri:file:::docs:index.md:")?);
277            }
278            Ok(())
279        }
280
281        #[test]
282        fn handles_wildcards() -> Result {
283            for selector in &[
284                "zrs:file:::docs:*.md:",
285                "zrs:::::*.md:",
286                "zrs:*::::*.md:",
287                "zrs:*:*:*:*:*:",
288            ] {
289                let matcher: Matcher = selector.parse()?;
290                assert!(matcher.is_match(&"zri:file:::docs:index.md:")?);
291            }
292            Ok(())
293        }
294
295        #[test]
296        fn handles_optionals() -> Result {
297            for selector in &[
298                "zrs:{git,file}:::{docs}:index.md:",
299                "zrs::::docs:{index,about}.md:",
300                "zrs:::::index.{md,rst}:",
301                "zrs:::::{*}:",
302            ] {
303                let matcher: Matcher = selector.parse()?;
304                assert!(matcher.is_match(&"zri:file:::docs:index.md:")?);
305            }
306            Ok(())
307        }
308
309        #[test]
310        fn handles_non_matches() -> Result {
311            for selector in &[
312                "zrs:file:::{docs}:index.md:anchor",
313                "zrs:{git,file}:master::::",
314                "zrs:::::about.md:",
315                "zrs::::::anchor",
316            ] {
317                let matcher: Matcher = selector.parse()?;
318                assert!(!matcher.is_match(&"zri:file:::docs:index.md:")?);
319            }
320            Ok(())
321        }
322    }
323
324    mod matches {
325        use crate::id::matcher::{Matcher, Matches, Result};
326
327        #[test]
328        fn handles_selectors() -> Result {
329            for selector in &[
330                "zrs:file:::docs:index.md:",
331                "zrs::::docs:index.md:",
332                "zrs:::::index.md:",
333                "zrs::::::",
334            ] {
335                let matcher: Matcher = selector.parse()?;
336                assert_eq!(
337                    matcher.matches(&"zri:file:::docs:index.md:")?,
338                    Matches::from_iter([0])
339                );
340            }
341            Ok(())
342        }
343
344        #[test]
345        fn handles_wildcards() -> Result {
346            for selector in &[
347                "zrs:file:::docs:*.md:",
348                "zrs:::::*.md:",
349                "zrs:*::::*.md:",
350                "zrs:*:*:*:*:*:",
351            ] {
352                let matcher: Matcher = selector.parse()?;
353                assert_eq!(
354                    matcher.matches(&"zri:file:::docs:index.md:")?,
355                    Matches::from_iter([0])
356                );
357            }
358            Ok(())
359        }
360
361        #[test]
362        fn handles_optionals() -> Result {
363            for selector in &[
364                "zrs:{git,file}:::{docs}:index.md:",
365                "zrs::::docs:{index,about}.md:",
366                "zrs:::::index.{md,rst}:",
367                "zrs:::::{*}:",
368            ] {
369                let matcher: Matcher = selector.parse()?;
370                assert_eq!(
371                    matcher.matches(&"zri:file:::docs:index.md:")?,
372                    Matches::from_iter([0])
373                );
374            }
375            Ok(())
376        }
377
378        #[test]
379        fn handles_non_matches() -> Result {
380            for selector in &[
381                "zrs:file:::{docs}:index.md:anchor",
382                "zrs:{git,file}:master::::",
383                "zrs:::::about.md:",
384                "zrs::::::anchor",
385            ] {
386                let matcher: Matcher = selector.parse()?;
387                assert_eq!(
388                    matcher.matches(&"zri:file:::docs:index.md:")?,
389                    Matches::new()
390                );
391            }
392            Ok(())
393        }
394    }
395}