tre_regex/wchar/
exec.rs

1use std::borrow::Cow;
2
3use widestring::WideStr;
4
5use crate::{err::{BindingErrorCode, ErrorKind, RegexError, Result}, flags::RegexecFlags, tre, Regex};
6
7pub type RegMatchWideStr<'a> = Vec<Option<Cow<'a, WideStr>>>;
8
9impl Regex {
10    /// Performs a regex search on the passed wide string, returning `nmatches` results.
11    ///
12    /// This function should only be used if you need to match wide strings. Otherwise, [`regexec`]
13    /// is recommended instead.
14    ///
15    /// # Arguments
16    /// * `string`: [`WideStr`] to match against `compiled_reg`
17    /// * `nmatches`: number of matches to return
18    /// * `flags`: [`RegexecFlags`] to pass to [`tre_regnexec`](tre_regex_sys::tre_regnexec).
19    ///
20    /// # Returns
21    /// If no error was found, a [`Vec`] of [`Option`]s will be returned.
22    ///
23    /// If a given match index is empty, The `Option` will be `None`. Otherwise, the `Option` will
24    /// contain a [`WideStr`].
25    ///
26    /// # Errors
27    /// If an error is encountered during matching, it returns a [`RegexError`].
28    ///
29    /// # Caveats
30    /// Unless copied, the match results must live at least as long as `string`. This is because
31    /// they are slices into `string` under the hood, for efficiency.
32    ///
33    /// # Examples
34    /// ```
35    /// # use tre_regex::Result;
36    /// # fn main() -> Result<()> {
37    /// use tre_regex::{RegcompFlags, RegexecFlags, Regex};
38    /// use widestring::widestr;
39    ///
40    /// let regcomp_flags = RegcompFlags::new()
41    ///     .add(RegcompFlags::EXTENDED)
42    ///     .add(RegcompFlags::ICASE);
43    /// let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
44    ///
45    /// let compiled_reg = Regex::new_wide(widestr!("^(hello).*(world)$"), regcomp_flags)?;
46    /// let matches = compiled_reg.regwexec(widestr!("hello world"), 2, regexec_flags)?;
47    ///
48    /// for (i, matched) in matches.into_iter().enumerate() {
49    ///     match matched {
50    ///         Some(substr) => println!("Match {i}: {}", substr.display()),
51    ///         None => println!("Match {i}: <None>"),
52    ///     }
53    /// }
54    /// # Ok(())
55    /// # }
56    /// ```
57    ///
58    /// [`regexec`]: crate::regexec
59    pub fn regwexec<'a>(
60        &self,
61        string: &'a WideStr,
62        nmatches: usize,
63        flags: RegexecFlags,
64    ) -> Result<RegMatchWideStr<'a>> {
65        let Some(compiled_reg_obj) = self.get() else {
66            return Err(RegexError::new(
67                ErrorKind::Binding(BindingErrorCode::REGEX_VACANT),
68                "Attempted to unwrap a vacant Regex object",
69            ));
70        };
71        let mut match_vec: Vec<tre::regmatch_t> =
72            vec![tre::regmatch_t { rm_so: 0, rm_eo: 0 }; nmatches];
73
74        // SAFETY: compiled_reg is a wrapped type (see safety concerns for Regex). string is read-only.
75        // match_vec has enough room for everything. flags also cannot wrap around.
76        #[allow(clippy::cast_possible_wrap)]
77        let result = unsafe {
78            tre::tre_regwnexec(
79                compiled_reg_obj,
80                string.as_ptr().cast(),
81                string.len(),
82                nmatches,
83                match_vec.as_mut_ptr(),
84                flags.get(),
85            )
86        };
87        if result != 0 {
88            return Err(self.regerror(result));
89        }
90
91        let mut result: Vec<Option<Cow<'a, WideStr>>> = Vec::with_capacity(nmatches);
92        for pmatch in match_vec {
93            if pmatch.rm_so < 0 || pmatch.rm_eo < 0 {
94                result.push(None);
95                continue;
96            }
97
98            // Wraparound is impossible.
99            #[allow(clippy::cast_sign_loss)]
100            let start_offset = pmatch.rm_so as usize;
101            #[allow(clippy::cast_sign_loss)]
102            let end_offset = pmatch.rm_eo as usize;
103
104            result.push(Some(Cow::Borrowed(&string[start_offset..end_offset])));
105        }
106
107        Ok(result)
108    }
109}
110
111/// Performs a regex search on the passed wide string, returning `nmatches` results.
112///
113/// This is a thin wrapper around [`Regex::regwexec`].
114///
115/// This function should only be used if you need to match wide strings.
116///
117/// # Arguments
118/// * `compiled_reg`: the compiled [`Regex`] object.
119/// * `string`: [`WideStr`] to match against `compiled_reg`
120/// * `nmatches`: number of matches to return
121/// * `flags`: [`RegexecFlags`] to pass to [`tre_regwnexec`](tre_regex_sys::tre_regwnexec).
122///
123/// # Returns
124/// If no error was found, a [`Vec`] of [`Option`]s will be returned.
125///
126/// If a given match index is empty, The `Option` will be `None`. Otherwise, a [`WideStr`]
127/// will be returned.
128///
129/// # Errors
130/// If an error is encountered during matching, it returns a [`RegexError`].
131///
132/// # Caveats
133/// Unless copied, the match results must live at least as long as `string`. This is because they are
134/// slices into `string` under the hood, for efficiency.
135///
136/// # Examples
137/// ```
138/// # use tre_regex::Result;
139/// # fn main() -> Result<()> {
140/// use tre_regex::{RegcompFlags, RegexecFlags, regwcomp, regwexec};
141/// use widestring::widestr;
142///
143/// let regcomp_flags = RegcompFlags::new()
144///     .add(RegcompFlags::EXTENDED)
145///     .add(RegcompFlags::ICASE)
146///     .add(RegcompFlags::UNGREEDY);
147/// let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
148///
149/// let compiled_reg = regwcomp(widestr!("^(hello).*(world)$"), regcomp_flags)?;
150/// let matches = regwexec(
151///     &compiled_reg,              // Compiled regex
152///     widestr!("hello world"),    // String to match against
153///     2,                          // Number of matches
154///     regexec_flags               // Flags
155/// )?;
156///
157/// for (i, matched) in matches.into_iter().enumerate() {
158///     match matched {
159///         Some(substr) => println!("Match {i}: {}", substr.display()),
160///         None => println!("Match {i}: <None>"),
161///     }
162/// }
163/// # Ok(())
164/// # }
165/// ```
166pub fn regwexec<'a>(
167    compiled_reg: &Regex,
168    string: &'a WideStr,
169    nmatches: usize,
170    flags: RegexecFlags,
171) -> Result<RegMatchWideStr<'a>> {
172    compiled_reg.regwexec(string, nmatches, flags)
173}