tre_regex/wchar/
approx.rs

1use std::borrow::Cow;
2
3use widestring::WideStr;
4
5use crate::{
6    err::{BindingErrorCode, ErrorKind, RegexError, Result},
7    tre, RegApproxMatch, RegApproxParams, Regex, RegexecFlags,
8};
9
10pub type RegApproxMatchWideStr<'a> = RegApproxMatch<&'a WideStr, Cow<'a, WideStr>>;
11
12impl Regex {
13    /// Performs an approximate regex search on the passed wide string, returning `nmatches`
14    /// results.
15    ///
16    /// This function should only be used if you need to match raw wide string. Otherwise,
17    /// [`regaexec`] is recommended instead.
18    ///
19    /// # Arguments
20    /// * `string`: [`WideStr`] to match against `compiled_reg`
21    /// * `params`: see [`RegApproxParams`]
22    /// * `nmatches`: number of matches to return
23    /// * `flags`: [`RegexecFlags`] to pass to [`tre_reganexec`](tre_regex_sys::tre_reganexec).
24    ///
25    /// # Returns
26    /// If no error was found, a [`Vec`] of [`Option`]s will be returned.
27    ///
28    /// If a given match index is empty, The `Option` will be `None`. Otherwise, a [`WideStr`] will
29    /// be returned.
30    ///
31    /// # Errors
32    /// If an error is encountered during matching, it returns a [`RegexError`].
33    ///
34    /// # Caveats
35    /// Unless copied, the match results must live at least as long as `string`. This is because they are
36    /// slices into `string` under the hood, for efficiency.
37    ///
38    /// # Examples
39    /// ```
40    /// # use tre_regex::Result;
41    /// # fn main() -> Result<()> {
42    /// use tre_regex::{RegcompFlags, RegexecFlags, RegApproxParams, Regex};
43    /// use widestring::widestr;
44    ///
45    /// let regcomp_flags = RegcompFlags::new()
46    ///     .add(RegcompFlags::EXTENDED)
47    ///     .add(RegcompFlags::ICASE);
48    /// let regaexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
49    /// let regaexec_params = RegApproxParams::new()
50    ///     .cost_ins(1)
51    ///     .cost_del(1)
52    ///     .cost_subst(1)
53    ///     .max_cost(2)
54    ///     .max_del(2)
55    ///     .max_ins(2)
56    ///     .max_subst(2)
57    ///     .max_err(2);
58    ///
59    /// let compiled_reg = Regex::new_wide(widestr!("^(hello).*(world)$"), regcomp_flags)?;
60    /// let result = compiled_reg.regawexec(
61    ///     widestr!("hello world"),    // Bytes to match against
62    ///     &regaexec_params,           // Matching parameters
63    ///     3,                          // Number of matches we want
64    ///     regaexec_flags              // Flags
65    /// )?;
66    ///
67    /// for (i, matched) in result.get_matches().into_iter().enumerate() {
68    ///     match matched {
69    ///         Some(substr) => println!("Match {i}: {}", substr.display()),
70    ///         None => println!("Match {i}: <None>"),
71    ///     }
72    /// }
73    /// # Ok(())
74    /// # }
75    /// ```
76    ///
77    /// [`regaexec`]: crate::Regex::regaexec
78    pub fn regawexec<'a>(
79        &self,
80        string: &'a WideStr,
81        params: &RegApproxParams,
82        nmatches: usize,
83        flags: RegexecFlags,
84    ) -> Result<RegApproxMatchWideStr<'a>> {
85        let Some(compiled_reg_obj) = self.get() else {
86            return Err(RegexError::new(
87                ErrorKind::Binding(BindingErrorCode::REGEX_VACANT),
88                "Attempted to unwrap a vacant Regex object",
89            ));
90        };
91        let mut match_vec: Vec<tre::regmatch_t> =
92            vec![tre::regmatch_t { rm_so: 0, rm_eo: 0 }; nmatches];
93        let mut amatch = tre::regamatch_t {
94            nmatch: nmatches,
95            pmatch: match_vec.as_mut_ptr(),
96            ..Default::default()
97        };
98
99        // SAFETY: compiled_reg is a wrapped type (see safety concerns for Regex). string is read-only.
100        // match_vec has enough room for everything. flags also cannot wrap around.
101        #[allow(clippy::cast_possible_wrap)]
102        let result = unsafe {
103            tre::tre_regawnexec(
104                compiled_reg_obj,
105                string.as_ptr().cast(),
106                string.len(),
107                &mut amatch,
108                *params.get(),
109                flags.get(),
110            )
111        };
112        if result != 0 {
113            return Err(self.regerror(result));
114        }
115
116        let mut result: Vec<Option<Cow<'a, WideStr>>> = Vec::with_capacity(nmatches);
117        for pmatch in match_vec {
118            if pmatch.rm_so < 0 || pmatch.rm_eo < 0 {
119                result.push(None);
120                continue;
121            }
122
123            // Wraparound is impossible.
124            #[allow(clippy::cast_sign_loss)]
125            let start_offset = pmatch.rm_so as usize;
126            #[allow(clippy::cast_sign_loss)]
127            let end_offset = pmatch.rm_eo as usize;
128
129            result.push(Some(Cow::Borrowed(&string[start_offset..end_offset])));
130        }
131
132        Ok(RegApproxMatchWideStr::new(string, result, amatch))
133    }
134}
135
136/// Performs an approximate regex search on the passed wide string, returning `nmatches` results.
137///
138/// This is a thin wrapper around [`Regex::regawexec`].
139///
140/// Non-matching subexpressions or patterns will return `None` in the results.
141///
142/// # Arguments
143/// * `compiled_reg`: the compiled [`Regex`] object.
144/// * `string`: [`WideStr`] to match against `compiled_reg`
145/// * `params`: see [`RegApproxParams`]
146/// * `nmatches`: number of matches to return
147/// * `flags`: [`RegexecFlags`] to pass to [`tre_regnexec`](tre_regex_sys::tre_regnexec).
148///
149/// # Returns
150/// If no error was found, a [`Vec`] of [`Option`]s will be returned.
151///
152/// If a given match index is empty, The `Option` will be `None`. Otherwise, a [`WideStr`] will be
153/// returned.
154///
155/// # Errors
156/// If an error is encountered during matching, it returns a [`RegexError`].
157///
158/// # Caveats
159/// Unless copied, the match results must live at least as long as `string`. This is because they
160/// are slices into `string` under the hood, for efficiency.
161///
162/// # Examples
163/// ```
164/// # use tre_regex::Result;
165/// # fn main() -> Result<()> {
166/// use tre_regex::{RegcompFlags, RegexecFlags, RegApproxParams, Regex, regawexec};
167/// use widestring::widestr;
168///
169/// let regcomp_flags = RegcompFlags::new()
170///     .add(RegcompFlags::EXTENDED)
171///     .add(RegcompFlags::ICASE);
172/// let regaexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
173/// let regaexec_params = RegApproxParams::new()
174///     .cost_ins(1)
175///     .cost_del(1)
176///     .cost_subst(1)
177///     .max_cost(2)
178///     .max_del(2)
179///     .max_ins(2)
180///     .max_subst(2)
181///     .max_err(2);
182///
183/// let compiled_reg = Regex::new_wide(widestr!("^(hello).*(world)$"), regcomp_flags)?;
184/// let result = regawexec(
185///     &compiled_reg,              // Compiled regex
186///     widestr!("hello world"),    // String to match against
187///     &regaexec_params,           // Matching parameters
188///     3,                          // Number of matches we want
189///     regaexec_flags              // Flags
190/// )?;
191///
192/// for (i, matched) in result.get_matches().into_iter().enumerate() {
193///     match matched {
194///         Some(substr) => println!("Match {i}: {}", substr.display()),
195///         None => println!("Match {i}: <None>"),
196///     }
197/// }
198/// # Ok(())
199/// # }
200/// ```
201#[inline]
202pub fn regawexec<'a>(
203    compiled_reg: &Regex,
204    string: &'a WideStr,
205    params: &RegApproxParams,
206    nmatches: usize,
207    flags: RegexecFlags,
208) -> Result<RegApproxMatchWideStr<'a>> {
209    compiled_reg.regawexec(string, params, nmatches, flags)
210}