tre_regex/wchar/exec.rs
1use std::borrow::Cow;
2
3use widestring::WideStr;
4
5use crate::{err::{BindingErrorCode, ErrorKind, RegexError, Result}, flags::RegexecFlags, tre, Regex};
6
7pub type RegMatchWideStr<'a> = Vec<Option<Cow<'a, WideStr>>>;
8
9impl Regex {
10 /// Performs a regex search on the passed wide string, returning `nmatches` results.
11 ///
12 /// This function should only be used if you need to match wide strings. Otherwise, [`regexec`]
13 /// is recommended instead.
14 ///
15 /// # Arguments
16 /// * `string`: [`WideStr`] to match against `compiled_reg`
17 /// * `nmatches`: number of matches to return
18 /// * `flags`: [`RegexecFlags`] to pass to [`tre_regnexec`](tre_regex_sys::tre_regnexec).
19 ///
20 /// # Returns
21 /// If no error was found, a [`Vec`] of [`Option`]s will be returned.
22 ///
23 /// If a given match index is empty, The `Option` will be `None`. Otherwise, the `Option` will
24 /// contain a [`WideStr`].
25 ///
26 /// # Errors
27 /// If an error is encountered during matching, it returns a [`RegexError`].
28 ///
29 /// # Caveats
30 /// Unless copied, the match results must live at least as long as `string`. This is because
31 /// they are slices into `string` under the hood, for efficiency.
32 ///
33 /// # Examples
34 /// ```
35 /// # use tre_regex::Result;
36 /// # fn main() -> Result<()> {
37 /// use tre_regex::{RegcompFlags, RegexecFlags, Regex};
38 /// use widestring::widestr;
39 ///
40 /// let regcomp_flags = RegcompFlags::new()
41 /// .add(RegcompFlags::EXTENDED)
42 /// .add(RegcompFlags::ICASE);
43 /// let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
44 ///
45 /// let compiled_reg = Regex::new_wide(widestr!("^(hello).*(world)$"), regcomp_flags)?;
46 /// let matches = compiled_reg.regwexec(widestr!("hello world"), 2, regexec_flags)?;
47 ///
48 /// for (i, matched) in matches.into_iter().enumerate() {
49 /// match matched {
50 /// Some(substr) => println!("Match {i}: {}", substr.display()),
51 /// None => println!("Match {i}: <None>"),
52 /// }
53 /// }
54 /// # Ok(())
55 /// # }
56 /// ```
57 ///
58 /// [`regexec`]: crate::regexec
59 pub fn regwexec<'a>(
60 &self,
61 string: &'a WideStr,
62 nmatches: usize,
63 flags: RegexecFlags,
64 ) -> Result<RegMatchWideStr<'a>> {
65 let Some(compiled_reg_obj) = self.get() else {
66 return Err(RegexError::new(
67 ErrorKind::Binding(BindingErrorCode::REGEX_VACANT),
68 "Attempted to unwrap a vacant Regex object",
69 ));
70 };
71 let mut match_vec: Vec<tre::regmatch_t> =
72 vec![tre::regmatch_t { rm_so: 0, rm_eo: 0 }; nmatches];
73
74 // SAFETY: compiled_reg is a wrapped type (see safety concerns for Regex). string is read-only.
75 // match_vec has enough room for everything. flags also cannot wrap around.
76 #[allow(clippy::cast_possible_wrap)]
77 let result = unsafe {
78 tre::tre_regwnexec(
79 compiled_reg_obj,
80 string.as_ptr().cast(),
81 string.len(),
82 nmatches,
83 match_vec.as_mut_ptr(),
84 flags.get(),
85 )
86 };
87 if result != 0 {
88 return Err(self.regerror(result));
89 }
90
91 let mut result: Vec<Option<Cow<'a, WideStr>>> = Vec::with_capacity(nmatches);
92 for pmatch in match_vec {
93 if pmatch.rm_so < 0 || pmatch.rm_eo < 0 {
94 result.push(None);
95 continue;
96 }
97
98 // Wraparound is impossible.
99 #[allow(clippy::cast_sign_loss)]
100 let start_offset = pmatch.rm_so as usize;
101 #[allow(clippy::cast_sign_loss)]
102 let end_offset = pmatch.rm_eo as usize;
103
104 result.push(Some(Cow::Borrowed(&string[start_offset..end_offset])));
105 }
106
107 Ok(result)
108 }
109}
110
111/// Performs a regex search on the passed wide string, returning `nmatches` results.
112///
113/// This is a thin wrapper around [`Regex::regwexec`].
114///
115/// This function should only be used if you need to match wide strings.
116///
117/// # Arguments
118/// * `compiled_reg`: the compiled [`Regex`] object.
119/// * `string`: [`WideStr`] to match against `compiled_reg`
120/// * `nmatches`: number of matches to return
121/// * `flags`: [`RegexecFlags`] to pass to [`tre_regwnexec`](tre_regex_sys::tre_regwnexec).
122///
123/// # Returns
124/// If no error was found, a [`Vec`] of [`Option`]s will be returned.
125///
126/// If a given match index is empty, The `Option` will be `None`. Otherwise, a [`WideStr`]
127/// will be returned.
128///
129/// # Errors
130/// If an error is encountered during matching, it returns a [`RegexError`].
131///
132/// # Caveats
133/// Unless copied, the match results must live at least as long as `string`. This is because they are
134/// slices into `string` under the hood, for efficiency.
135///
136/// # Examples
137/// ```
138/// # use tre_regex::Result;
139/// # fn main() -> Result<()> {
140/// use tre_regex::{RegcompFlags, RegexecFlags, regwcomp, regwexec};
141/// use widestring::widestr;
142///
143/// let regcomp_flags = RegcompFlags::new()
144/// .add(RegcompFlags::EXTENDED)
145/// .add(RegcompFlags::ICASE)
146/// .add(RegcompFlags::UNGREEDY);
147/// let regexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
148///
149/// let compiled_reg = regwcomp(widestr!("^(hello).*(world)$"), regcomp_flags)?;
150/// let matches = regwexec(
151/// &compiled_reg, // Compiled regex
152/// widestr!("hello world"), // String to match against
153/// 2, // Number of matches
154/// regexec_flags // Flags
155/// )?;
156///
157/// for (i, matched) in matches.into_iter().enumerate() {
158/// match matched {
159/// Some(substr) => println!("Match {i}: {}", substr.display()),
160/// None => println!("Match {i}: <None>"),
161/// }
162/// }
163/// # Ok(())
164/// # }
165/// ```
166pub fn regwexec<'a>(
167 compiled_reg: &Regex,
168 string: &'a WideStr,
169 nmatches: usize,
170 flags: RegexecFlags,
171) -> Result<RegMatchWideStr<'a>> {
172 compiled_reg.regwexec(string, nmatches, flags)
173}