openjd_expr/functions/
regex.rs1use crate::error::ExpressionError;
8use crate::function_library::EvalContext;
9use crate::types::ExprType;
10use crate::value::ExprValue;
11
12type R = Result<ExprValue, ExpressionError>;
13type Ctx<'a> = &'a mut dyn EvalContext;
14
15fn get_two_strings(a: &[ExprValue], name: &str) -> Result<(String, String), ExpressionError> {
16 let s = match &a[0] {
17 ExprValue::String(s) => s.clone(),
18 _ => {
19 return Err(ExpressionError::new(format!(
20 "{name}() requires string arguments"
21 )))
22 }
23 };
24 let p = match &a[1] {
25 ExprValue::String(s) => s.clone(),
26 _ => {
27 return Err(ExpressionError::new(format!(
28 "{name}() requires string arguments"
29 )))
30 }
31 };
32 Ok((s, p))
33}
34
35fn validate_regex_pattern(pattern: &str) -> Result<(), ExpressionError> {
36 if pattern.is_empty() {
37 return Err(ExpressionError::new("Empty regex pattern is not allowed"));
38 }
39
40 reject_rust_only_features(pattern)?;
54
55 let hir = match regex_syntax::Parser::new().parse(pattern) {
61 Ok(h) => h,
62 Err(e) => return Err(translate_parse_error(e)),
63 };
64 check_hir_portability(&hir)
65}
66
67fn reject_rust_only_features(pattern: &str) -> Result<(), ExpressionError> {
72 let bytes = pattern.as_bytes();
73 let mut i = 0;
74 while i < bytes.len() {
75 if bytes[i] == b'\\' {
76 let next = match bytes.get(i + 1) {
78 Some(&b) => b,
79 None => break, };
81 match next {
82 b'z' => {
83 return Err(ExpressionError::new(
84 "Unsupported regex feature: end-of-string anchor \\z",
85 ));
86 }
87 b'x' | b'u' | b'U' => {
91 if matches!(bytes.get(i + 2), Some(b'{')) {
92 return Err(ExpressionError::new(format!(
93 "Unsupported regex feature: Unicode brace syntax \\{}{{...}}",
94 next as char
95 )));
96 }
97 }
98 _ => {}
99 }
100 i += 2;
104 } else {
105 i += 1;
106 }
107 }
108 Ok(())
109}
110
111fn translate_parse_error(err: regex_syntax::Error) -> ExpressionError {
114 let msg = err.to_string();
115 let lower = msg.to_lowercase();
116 let feature = if lower.contains("look-around") || lower.contains("lookaround") {
120 if lower.contains("negative lookahead") || lower.contains("(?!") {
123 "negative lookahead"
124 } else if lower.contains("positive lookahead") || lower.contains("(?=") {
125 "lookahead"
126 } else if lower.contains("negative lookbehind") || lower.contains("(?<!") {
127 "negative lookbehind"
128 } else if lower.contains("positive lookbehind") || lower.contains("(?<=") {
129 "lookbehind"
130 } else {
131 "look-around"
132 }
133 } else if lower.contains("backreference") || lower.contains("back reference") {
134 "backreferences"
135 } else if lower.contains("unrecognized escape") && msg.contains("\\Z") {
136 "end-of-string anchor \\Z"
141 } else {
142 return ExpressionError::new(format!("Invalid regex pattern: {msg}"));
143 };
144 ExpressionError::new(format!("Unsupported regex feature: {feature}"))
145}
146
147fn check_hir_portability(hir: ®ex_syntax::hir::Hir) -> Result<(), ExpressionError> {
154 use regex_syntax::hir::{HirKind, Look};
155 match hir.kind() {
156 HirKind::Look(l) => match l {
157 Look::Start
159 | Look::End
160 | Look::StartLF
161 | Look::EndLF
162 | Look::StartCRLF
163 | Look::EndCRLF
164 | Look::WordAscii
165 | Look::WordAsciiNegate
166 | Look::WordUnicode
167 | Look::WordUnicodeNegate
168 | Look::WordStartAscii
169 | Look::WordEndAscii
170 | Look::WordStartUnicode
171 | Look::WordEndUnicode
172 | Look::WordStartHalfAscii
173 | Look::WordEndHalfAscii
174 | Look::WordStartHalfUnicode
175 | Look::WordEndHalfUnicode => Ok(()),
176 },
177 HirKind::Capture(c) => check_hir_portability(&c.sub),
178 HirKind::Repetition(r) => check_hir_portability(&r.sub),
179 HirKind::Concat(parts) | HirKind::Alternation(parts) => {
180 for p in parts {
181 check_hir_portability(p)?;
182 }
183 Ok(())
184 }
185 HirKind::Empty | HirKind::Literal(_) | HirKind::Class(_) => Ok(()),
186 }
187}
188
189pub fn re_escape_fn(ctx: Ctx, a: &[ExprValue]) -> R {
190 let s = match &a[0] {
191 ExprValue::String(s) => s.clone(),
192 _ => return Err(ExpressionError::new("re_escape() requires string")),
193 };
194 ctx.count_string_ops(s.len())?;
195 Ok(ExprValue::String(regex::escape(&s)))
196}
197
198pub fn re_match_fn(ctx: Ctx, a: &[ExprValue]) -> R {
199 let (s, pat) = get_two_strings(a, "re_match")?;
200 ctx.count_string_ops(s.len())?;
201 validate_regex_pattern(&pat)?;
202 let re = ctx.get_or_compile_regex(&format!("^(?:{})", pat))?;
203 match re.captures(&s) {
204 None => Ok(ExprValue::Null),
205 Some(caps) => {
206 let groups: Vec<ExprValue> = (0..caps.len())
207 .map(|i| {
208 ExprValue::String(
209 caps.get(i)
210 .map(|m| m.as_str().to_string())
211 .unwrap_or_default(),
212 )
213 })
214 .collect();
215 Ok(ExprValue::make_list_checked(ctx, groups, ExprType::STRING)?)
216 }
217 }
218}
219
220pub fn re_search_fn(ctx: Ctx, a: &[ExprValue]) -> R {
221 let (s, pat) = get_two_strings(a, "re_search")?;
222 ctx.count_string_ops(s.len())?;
223 validate_regex_pattern(&pat)?;
224 let re = ctx.get_or_compile_regex(&pat)?;
225 match re.captures(&s) {
226 None => Ok(ExprValue::Null),
227 Some(caps) => {
228 let groups: Vec<ExprValue> = (0..caps.len())
229 .map(|i| {
230 ExprValue::String(
231 caps.get(i)
232 .map(|m| m.as_str().to_string())
233 .unwrap_or_default(),
234 )
235 })
236 .collect();
237 Ok(ExprValue::make_list_checked(ctx, groups, ExprType::STRING)?)
238 }
239 }
240}
241
242pub fn re_findall_fn(ctx: Ctx, a: &[ExprValue]) -> R {
243 let (s, pat) = get_two_strings(a, "re_findall")?;
244 ctx.count_string_ops(s.len())?;
245 validate_regex_pattern(&pat)?;
246 let re = ctx.get_or_compile_regex(&pat)?;
247 let num_groups = re.captures_len() - 1;
248 if num_groups == 0 {
249 let matches: Vec<ExprValue> = re
250 .find_iter(&s)
251 .map(|m| ExprValue::String(m.as_str().to_string()))
252 .collect();
253 Ok(ExprValue::make_list_checked(
254 ctx,
255 matches,
256 ExprType::STRING,
257 )?)
258 } else if num_groups == 1 {
259 let matches: Vec<ExprValue> = re
260 .captures_iter(&s)
261 .map(|c| {
262 ExprValue::String(c.get(1).map(|m| m.as_str().to_string()).unwrap_or_default())
263 })
264 .collect();
265 Ok(ExprValue::make_list_checked(
266 ctx,
267 matches,
268 ExprType::STRING,
269 )?)
270 } else {
271 let matches: Result<Vec<ExprValue>, _> = re
276 .captures_iter(&s)
277 .map(|c| {
278 let groups: Vec<ExprValue> = (1..=num_groups)
279 .map(|i| {
280 ExprValue::String(
281 c.get(i).map(|m| m.as_str().to_string()).unwrap_or_default(),
282 )
283 })
284 .collect();
285 ExprValue::make_list(groups, ExprType::STRING)
286 })
287 .collect();
288 Ok(ExprValue::make_list_checked(
289 ctx,
290 matches?,
291 ExprType::list(ExprType::STRING),
292 )?)
293 }
294}
295
296pub fn re_replace_fn(ctx: Ctx, a: &[ExprValue]) -> R {
297 if a.len() != 3 {
298 return Err(ExpressionError::new("re_replace() takes 3 arguments"));
299 }
300 let s = match &a[0] {
301 ExprValue::String(s) => s.clone(),
302 _ => return Err(ExpressionError::new("re_replace() requires strings")),
303 };
304 ctx.count_string_ops(s.len())?;
305 let pat = match &a[1] {
306 ExprValue::String(s) => s.clone(),
307 _ => return Err(ExpressionError::new("re_replace() requires strings")),
308 };
309 let repl = match &a[2] {
310 ExprValue::String(s) => s.clone(),
311 _ => return Err(ExpressionError::new("re_replace() requires strings")),
312 };
313 validate_regex_pattern(&pat)?;
314 validate_regex_replacement(&repl)?;
315 let re = ctx.get_or_compile_regex(&pat)?;
316 let result = re.replace_all(&s, regex::NoExpand(&repl));
317 Ok(ExprValue::String(result.into_owned()))
318}
319
320fn validate_regex_replacement(repl: &str) -> Result<(), ExpressionError> {
321 let bytes = repl.as_bytes();
322 for i in 0..bytes.len() {
323 if bytes[i] == b'\\' && i + 1 < bytes.len() {
325 if bytes[i + 1].is_ascii_digit() {
326 return Err(ExpressionError::new(
327 "Group references in replacement strings are not supported",
328 ));
329 }
330 if bytes[i + 1] == b'g' && i + 2 < bytes.len() && bytes[i + 2] == b'<' {
331 return Err(ExpressionError::new(
332 "Group references in replacement strings are not supported",
333 ));
334 }
335 }
336 if bytes[i] == b'$'
338 && i + 1 < bytes.len()
339 && (bytes[i + 1].is_ascii_digit() || bytes[i + 1] == b'{')
340 {
341 return Err(ExpressionError::new(
342 "Group references in replacement strings are not supported",
343 ));
344 }
345 }
346 Ok(())
347}
348
349pub fn re_split_fn(ctx: Ctx, a: &[ExprValue]) -> R {
350 if a.len() < 2 || a.len() > 3 {
351 return Err(ExpressionError::new("re_split() takes 2-3 arguments"));
352 }
353 let (s, pat) = get_two_strings(a, "re_split")?;
354 ctx.count_string_ops(s.len())?;
355 validate_regex_pattern(&pat)?;
356 let maxsplit = a.get(2).and_then(|v| match v {
357 ExprValue::Int(n) => Some(*n as usize),
358 _ => None,
359 });
360 let re = ctx.get_or_compile_regex(&pat)?;
361 let parts: Vec<ExprValue> = match maxsplit {
362 Some(n) => re
363 .splitn(&s, n + 1)
364 .map(|p| ExprValue::String(p.to_string()))
365 .collect(),
366 None => re
367 .split(&s)
368 .map(|p| ExprValue::String(p.to_string()))
369 .collect(),
370 };
371 ExprValue::make_list(parts, ExprType::STRING)
372}