tre_regex/wchar/approx.rs
1use std::borrow::Cow;
2
3use widestring::WideStr;
4
5use crate::{
6 err::{BindingErrorCode, ErrorKind, RegexError, Result},
7 tre, RegApproxMatch, RegApproxParams, Regex, RegexecFlags,
8};
9
10pub type RegApproxMatchWideStr<'a> = RegApproxMatch<&'a WideStr, Cow<'a, WideStr>>;
11
12impl Regex {
13 /// Performs an approximate regex search on the passed wide string, returning `nmatches`
14 /// results.
15 ///
16 /// This function should only be used if you need to match raw wide string. Otherwise,
17 /// [`regaexec`] is recommended instead.
18 ///
19 /// # Arguments
20 /// * `string`: [`WideStr`] to match against `compiled_reg`
21 /// * `params`: see [`RegApproxParams`]
22 /// * `nmatches`: number of matches to return
23 /// * `flags`: [`RegexecFlags`] to pass to [`tre_reganexec`](tre_regex_sys::tre_reganexec).
24 ///
25 /// # Returns
26 /// If no error was found, a [`Vec`] of [`Option`]s will be returned.
27 ///
28 /// If a given match index is empty, The `Option` will be `None`. Otherwise, a [`WideStr`] will
29 /// be returned.
30 ///
31 /// # Errors
32 /// If an error is encountered during matching, it returns a [`RegexError`].
33 ///
34 /// # Caveats
35 /// Unless copied, the match results must live at least as long as `string`. This is because they are
36 /// slices into `string` under the hood, for efficiency.
37 ///
38 /// # Examples
39 /// ```
40 /// # use tre_regex::Result;
41 /// # fn main() -> Result<()> {
42 /// use tre_regex::{RegcompFlags, RegexecFlags, RegApproxParams, Regex};
43 /// use widestring::widestr;
44 ///
45 /// let regcomp_flags = RegcompFlags::new()
46 /// .add(RegcompFlags::EXTENDED)
47 /// .add(RegcompFlags::ICASE);
48 /// let regaexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
49 /// let regaexec_params = RegApproxParams::new()
50 /// .cost_ins(1)
51 /// .cost_del(1)
52 /// .cost_subst(1)
53 /// .max_cost(2)
54 /// .max_del(2)
55 /// .max_ins(2)
56 /// .max_subst(2)
57 /// .max_err(2);
58 ///
59 /// let compiled_reg = Regex::new_wide(widestr!("^(hello).*(world)$"), regcomp_flags)?;
60 /// let result = compiled_reg.regawexec(
61 /// widestr!("hello world"), // Bytes to match against
62 /// ®aexec_params, // Matching parameters
63 /// 3, // Number of matches we want
64 /// regaexec_flags // Flags
65 /// )?;
66 ///
67 /// for (i, matched) in result.get_matches().into_iter().enumerate() {
68 /// match matched {
69 /// Some(substr) => println!("Match {i}: {}", substr.display()),
70 /// None => println!("Match {i}: <None>"),
71 /// }
72 /// }
73 /// # Ok(())
74 /// # }
75 /// ```
76 ///
77 /// [`regaexec`]: crate::Regex::regaexec
78 pub fn regawexec<'a>(
79 &self,
80 string: &'a WideStr,
81 params: &RegApproxParams,
82 nmatches: usize,
83 flags: RegexecFlags,
84 ) -> Result<RegApproxMatchWideStr<'a>> {
85 let Some(compiled_reg_obj) = self.get() else {
86 return Err(RegexError::new(
87 ErrorKind::Binding(BindingErrorCode::REGEX_VACANT),
88 "Attempted to unwrap a vacant Regex object",
89 ));
90 };
91 let mut match_vec: Vec<tre::regmatch_t> =
92 vec![tre::regmatch_t { rm_so: 0, rm_eo: 0 }; nmatches];
93 let mut amatch = tre::regamatch_t {
94 nmatch: nmatches,
95 pmatch: match_vec.as_mut_ptr(),
96 ..Default::default()
97 };
98
99 // SAFETY: compiled_reg is a wrapped type (see safety concerns for Regex). string is read-only.
100 // match_vec has enough room for everything. flags also cannot wrap around.
101 #[allow(clippy::cast_possible_wrap)]
102 let result = unsafe {
103 tre::tre_regawnexec(
104 compiled_reg_obj,
105 string.as_ptr().cast(),
106 string.len(),
107 &mut amatch,
108 *params.get(),
109 flags.get(),
110 )
111 };
112 if result != 0 {
113 return Err(self.regerror(result));
114 }
115
116 let mut result: Vec<Option<Cow<'a, WideStr>>> = Vec::with_capacity(nmatches);
117 for pmatch in match_vec {
118 if pmatch.rm_so < 0 || pmatch.rm_eo < 0 {
119 result.push(None);
120 continue;
121 }
122
123 // Wraparound is impossible.
124 #[allow(clippy::cast_sign_loss)]
125 let start_offset = pmatch.rm_so as usize;
126 #[allow(clippy::cast_sign_loss)]
127 let end_offset = pmatch.rm_eo as usize;
128
129 result.push(Some(Cow::Borrowed(&string[start_offset..end_offset])));
130 }
131
132 Ok(RegApproxMatchWideStr::new(string, result, amatch))
133 }
134}
135
136/// Performs an approximate regex search on the passed wide string, returning `nmatches` results.
137///
138/// This is a thin wrapper around [`Regex::regawexec`].
139///
140/// Non-matching subexpressions or patterns will return `None` in the results.
141///
142/// # Arguments
143/// * `compiled_reg`: the compiled [`Regex`] object.
144/// * `string`: [`WideStr`] to match against `compiled_reg`
145/// * `params`: see [`RegApproxParams`]
146/// * `nmatches`: number of matches to return
147/// * `flags`: [`RegexecFlags`] to pass to [`tre_regnexec`](tre_regex_sys::tre_regnexec).
148///
149/// # Returns
150/// If no error was found, a [`Vec`] of [`Option`]s will be returned.
151///
152/// If a given match index is empty, The `Option` will be `None`. Otherwise, a [`WideStr`] will be
153/// returned.
154///
155/// # Errors
156/// If an error is encountered during matching, it returns a [`RegexError`].
157///
158/// # Caveats
159/// Unless copied, the match results must live at least as long as `string`. This is because they
160/// are slices into `string` under the hood, for efficiency.
161///
162/// # Examples
163/// ```
164/// # use tre_regex::Result;
165/// # fn main() -> Result<()> {
166/// use tre_regex::{RegcompFlags, RegexecFlags, RegApproxParams, Regex, regawexec};
167/// use widestring::widestr;
168///
169/// let regcomp_flags = RegcompFlags::new()
170/// .add(RegcompFlags::EXTENDED)
171/// .add(RegcompFlags::ICASE);
172/// let regaexec_flags = RegexecFlags::new().add(RegexecFlags::NONE);
173/// let regaexec_params = RegApproxParams::new()
174/// .cost_ins(1)
175/// .cost_del(1)
176/// .cost_subst(1)
177/// .max_cost(2)
178/// .max_del(2)
179/// .max_ins(2)
180/// .max_subst(2)
181/// .max_err(2);
182///
183/// let compiled_reg = Regex::new_wide(widestr!("^(hello).*(world)$"), regcomp_flags)?;
184/// let result = regawexec(
185/// &compiled_reg, // Compiled regex
186/// widestr!("hello world"), // String to match against
187/// ®aexec_params, // Matching parameters
188/// 3, // Number of matches we want
189/// regaexec_flags // Flags
190/// )?;
191///
192/// for (i, matched) in result.get_matches().into_iter().enumerate() {
193/// match matched {
194/// Some(substr) => println!("Match {i}: {}", substr.display()),
195/// None => println!("Match {i}: <None>"),
196/// }
197/// }
198/// # Ok(())
199/// # }
200/// ```
201#[inline]
202pub fn regawexec<'a>(
203 compiled_reg: &Regex,
204 string: &'a WideStr,
205 params: &RegApproxParams,
206 nmatches: usize,
207 flags: RegexecFlags,
208) -> Result<RegApproxMatchWideStr<'a>> {
209 compiled_reg.regawexec(string, params, nmatches, flags)
210}