use crate::error::ExpressionError;
use crate::function_library::EvalContext;
use crate::types::ExprType;
use crate::value::ExprValue;
type R = Result<ExprValue, ExpressionError>;
type Ctx<'a> = &'a mut dyn EvalContext;
fn get_two_strings(a: &[ExprValue], name: &str) -> Result<(String, String), ExpressionError> {
let s = match &a[0] {
ExprValue::String(s) => s.clone(),
_ => {
return Err(ExpressionError::new(format!(
"{name}() requires string arguments"
)))
}
};
let p = match &a[1] {
ExprValue::String(s) => s.clone(),
_ => {
return Err(ExpressionError::new(format!(
"{name}() requires string arguments"
)))
}
};
Ok((s, p))
}
fn validate_regex_pattern(pattern: &str) -> Result<(), ExpressionError> {
if pattern.is_empty() {
return Err(ExpressionError::new("Empty regex pattern is not allowed"));
}
reject_rust_only_features(pattern)?;
let hir = match regex_syntax::Parser::new().parse(pattern) {
Ok(h) => h,
Err(e) => return Err(translate_parse_error(e)),
};
check_hir_portability(&hir)
}
fn reject_rust_only_features(pattern: &str) -> Result<(), ExpressionError> {
let bytes = pattern.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
let next = match bytes.get(i + 1) {
Some(&b) => b,
None => break, };
match next {
b'z' => {
return Err(ExpressionError::new(
"Unsupported regex feature: end-of-string anchor \\z",
));
}
b'x' | b'u' | b'U' => {
if matches!(bytes.get(i + 2), Some(b'{')) {
return Err(ExpressionError::new(format!(
"Unsupported regex feature: Unicode brace syntax \\{}{{...}}",
next as char
)));
}
}
_ => {}
}
i += 2;
} else {
i += 1;
}
}
Ok(())
}
fn translate_parse_error(err: regex_syntax::Error) -> ExpressionError {
let msg = err.to_string();
let lower = msg.to_lowercase();
let feature = if lower.contains("look-around") || lower.contains("lookaround") {
if lower.contains("negative lookahead") || lower.contains("(?!") {
"negative lookahead"
} else if lower.contains("positive lookahead") || lower.contains("(?=") {
"lookahead"
} else if lower.contains("negative lookbehind") || lower.contains("(?<!") {
"negative lookbehind"
} else if lower.contains("positive lookbehind") || lower.contains("(?<=") {
"lookbehind"
} else {
"look-around"
}
} else if lower.contains("backreference") || lower.contains("back reference") {
"backreferences"
} else if lower.contains("unrecognized escape") && msg.contains("\\Z") {
"end-of-string anchor \\Z"
} else {
return ExpressionError::new(format!("Invalid regex pattern: {msg}"));
};
ExpressionError::new(format!("Unsupported regex feature: {feature}"))
}
fn check_hir_portability(hir: ®ex_syntax::hir::Hir) -> Result<(), ExpressionError> {
use regex_syntax::hir::{HirKind, Look};
match hir.kind() {
HirKind::Look(l) => match l {
Look::Start
| Look::End
| Look::StartLF
| Look::EndLF
| Look::StartCRLF
| Look::EndCRLF
| Look::WordAscii
| Look::WordAsciiNegate
| Look::WordUnicode
| Look::WordUnicodeNegate
| Look::WordStartAscii
| Look::WordEndAscii
| Look::WordStartUnicode
| Look::WordEndUnicode
| Look::WordStartHalfAscii
| Look::WordEndHalfAscii
| Look::WordStartHalfUnicode
| Look::WordEndHalfUnicode => Ok(()),
},
HirKind::Capture(c) => check_hir_portability(&c.sub),
HirKind::Repetition(r) => check_hir_portability(&r.sub),
HirKind::Concat(parts) | HirKind::Alternation(parts) => {
for p in parts {
check_hir_portability(p)?;
}
Ok(())
}
HirKind::Empty | HirKind::Literal(_) | HirKind::Class(_) => Ok(()),
}
}
pub fn re_escape_fn(ctx: Ctx, a: &[ExprValue]) -> R {
let s = match &a[0] {
ExprValue::String(s) => s.clone(),
_ => return Err(ExpressionError::new("re_escape() requires string")),
};
ctx.count_string_ops(s.len())?;
Ok(ExprValue::String(regex::escape(&s)))
}
pub fn re_match_fn(ctx: Ctx, a: &[ExprValue]) -> R {
let (s, pat) = get_two_strings(a, "re_match")?;
ctx.count_string_ops(s.len())?;
validate_regex_pattern(&pat)?;
let re = ctx.get_or_compile_regex(&format!("^(?:{})", pat))?;
match re.captures(&s) {
None => Ok(ExprValue::Null),
Some(caps) => {
let groups: Vec<ExprValue> = (0..caps.len())
.map(|i| {
ExprValue::String(
caps.get(i)
.map(|m| m.as_str().to_string())
.unwrap_or_default(),
)
})
.collect();
Ok(ExprValue::make_list_checked(ctx, groups, ExprType::STRING)?)
}
}
}
pub fn re_search_fn(ctx: Ctx, a: &[ExprValue]) -> R {
let (s, pat) = get_two_strings(a, "re_search")?;
ctx.count_string_ops(s.len())?;
validate_regex_pattern(&pat)?;
let re = ctx.get_or_compile_regex(&pat)?;
match re.captures(&s) {
None => Ok(ExprValue::Null),
Some(caps) => {
let groups: Vec<ExprValue> = (0..caps.len())
.map(|i| {
ExprValue::String(
caps.get(i)
.map(|m| m.as_str().to_string())
.unwrap_or_default(),
)
})
.collect();
Ok(ExprValue::make_list_checked(ctx, groups, ExprType::STRING)?)
}
}
}
pub fn re_findall_fn(ctx: Ctx, a: &[ExprValue]) -> R {
let (s, pat) = get_two_strings(a, "re_findall")?;
ctx.count_string_ops(s.len())?;
validate_regex_pattern(&pat)?;
let re = ctx.get_or_compile_regex(&pat)?;
let num_groups = re.captures_len() - 1;
if num_groups == 0 {
let matches: Vec<ExprValue> = re
.find_iter(&s)
.map(|m| ExprValue::String(m.as_str().to_string()))
.collect();
Ok(ExprValue::make_list_checked(
ctx,
matches,
ExprType::STRING,
)?)
} else if num_groups == 1 {
let matches: Vec<ExprValue> = re
.captures_iter(&s)
.map(|c| {
ExprValue::String(c.get(1).map(|m| m.as_str().to_string()).unwrap_or_default())
})
.collect();
Ok(ExprValue::make_list_checked(
ctx,
matches,
ExprType::STRING,
)?)
} else {
let matches: Result<Vec<ExprValue>, _> = re
.captures_iter(&s)
.map(|c| {
let groups: Vec<ExprValue> = (1..=num_groups)
.map(|i| {
ExprValue::String(
c.get(i).map(|m| m.as_str().to_string()).unwrap_or_default(),
)
})
.collect();
ExprValue::make_list(groups, ExprType::STRING)
})
.collect();
Ok(ExprValue::make_list_checked(
ctx,
matches?,
ExprType::list(ExprType::STRING),
)?)
}
}
pub fn re_replace_fn(ctx: Ctx, a: &[ExprValue]) -> R {
if a.len() != 3 {
return Err(ExpressionError::new("re_replace() takes 3 arguments"));
}
let s = match &a[0] {
ExprValue::String(s) => s.clone(),
_ => return Err(ExpressionError::new("re_replace() requires strings")),
};
ctx.count_string_ops(s.len())?;
let pat = match &a[1] {
ExprValue::String(s) => s.clone(),
_ => return Err(ExpressionError::new("re_replace() requires strings")),
};
let repl = match &a[2] {
ExprValue::String(s) => s.clone(),
_ => return Err(ExpressionError::new("re_replace() requires strings")),
};
validate_regex_pattern(&pat)?;
validate_regex_replacement(&repl)?;
let re = ctx.get_or_compile_regex(&pat)?;
let result = re.replace_all(&s, regex::NoExpand(&repl));
Ok(ExprValue::String(result.into_owned()))
}
fn validate_regex_replacement(repl: &str) -> Result<(), ExpressionError> {
let bytes = repl.as_bytes();
for i in 0..bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
if bytes[i + 1].is_ascii_digit() {
return Err(ExpressionError::new(
"Group references in replacement strings are not supported",
));
}
if bytes[i + 1] == b'g' && i + 2 < bytes.len() && bytes[i + 2] == b'<' {
return Err(ExpressionError::new(
"Group references in replacement strings are not supported",
));
}
}
if bytes[i] == b'$'
&& i + 1 < bytes.len()
&& (bytes[i + 1].is_ascii_digit() || bytes[i + 1] == b'{')
{
return Err(ExpressionError::new(
"Group references in replacement strings are not supported",
));
}
}
Ok(())
}
pub fn re_split_fn(ctx: Ctx, a: &[ExprValue]) -> R {
if a.len() < 2 || a.len() > 3 {
return Err(ExpressionError::new("re_split() takes 2-3 arguments"));
}
let (s, pat) = get_two_strings(a, "re_split")?;
ctx.count_string_ops(s.len())?;
validate_regex_pattern(&pat)?;
let maxsplit = a.get(2).and_then(|v| match v {
ExprValue::Int(n) => Some(*n as usize),
_ => None,
});
let re = ctx.get_or_compile_regex(&pat)?;
let parts: Vec<ExprValue> = match maxsplit {
Some(n) => re
.splitn(&s, n + 1)
.map(|p| ExprValue::String(p.to_string()))
.collect(),
None => re
.split(&s)
.map(|p| ExprValue::String(p.to_string()))
.collect(),
};
ExprValue::make_list(parts, ExprType::STRING)
}