use std::fmt;
use std::sync::{Arc, OnceLock};
use serde::{Deserialize, Serialize};
pub fn escape_regexp_characters(value: &str) -> String {
value
.chars()
.map(|c| match c {
'-' | '\\' | '{' | '}' | '*' | '+' | '?' | '|' | '^' | '$' | '.' | ',' | '[' | ']'
| '(' | ')' | '#' => {
format!("\\{}", c)
}
c if c.is_whitespace() => {
format!("\\{}", c)
}
_ => c.to_string(),
})
.collect()
}
pub fn resolve_backreferences(
pattern: &str,
input: &str,
captures_pos: &[Option<(usize, usize)>],
) -> String {
let captures: Vec<_> = captures_pos
.iter()
.map(|cap| match cap {
Some((start, end)) => &input[*start..*end],
None => "",
})
.collect();
let mut result = String::new();
let mut chars = pattern.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' {
let mut digits = String::new();
while let Some(&next_char) = chars.peek() {
if next_char.is_ascii_digit() {
digits.push(next_char);
chars.next();
} else {
break;
}
}
if !digits.is_empty() {
if let Ok(index) = digits.parse::<usize>() {
let captured = captures.get(index).unwrap_or(&"");
result.push_str(&escape_regexp_characters(captured));
} else {
result.push('\\');
result.push_str(&digits);
}
} else {
result.push(c);
}
} else {
result.push(c);
}
}
result
}
fn transform_z_anchor(pattern: &str) -> String {
pattern
.replace("\\\\z", "___TEMP___") .replace("\\z", "$(?!\\n)(?<!\\n)") .replace("___TEMP___", "\\\\z") }
pub struct Regex {
pattern: String,
compiled: OnceLock<Option<Arc<onig::Regex>>>,
}
impl Clone for Regex {
fn clone(&self) -> Self {
Regex::new(self.pattern.clone())
}
}
impl fmt::Debug for Regex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.pattern)
}
}
impl PartialEq for Regex {
fn eq(&self, other: &Self) -> bool {
self.pattern == other.pattern
}
}
impl Regex {
pub fn new(pattern: String) -> Self {
let transformed_pattern = transform_z_anchor(&pattern);
Self {
pattern: transformed_pattern,
compiled: OnceLock::new(),
}
}
pub fn pattern(&self) -> &str {
&self.pattern
}
pub fn compiled(&self) -> Option<&Arc<onig::Regex>> {
self.compiled
.get_or_init(|| onig::Regex::new(&self.pattern).ok().map(Arc::new))
.as_ref()
}
pub fn validate(&self) -> Result<(), onig::Error> {
onig::Regex::new(&self.pattern).map(|_| ())
}
}
impl Serialize for Regex {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.pattern)
}
}
impl<'de> Deserialize<'de> for Regex {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let pattern = String::deserialize(deserializer)?;
Ok(Regex::new(pattern))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_transform_z_anchor() {
assert_eq!(transform_z_anchor("\\z"), "$(?!\\n)(?<!\\n)");
assert_eq!(transform_z_anchor("^start\\z"), "^start$(?!\\n)(?<!\\n)");
assert_eq!(transform_z_anchor("\\zmiddle"), "$(?!\\n)(?<!\\n)middle");
assert_eq!(
transform_z_anchor("\\z.*\\z"),
"$(?!\\n)(?<!\\n).*$(?!\\n)(?<!\\n)"
);
assert_eq!(transform_z_anchor("^normal$"), "^normal$");
assert_eq!(transform_z_anchor("\\\\z"), "\\\\z");
assert_eq!(transform_z_anchor("\\A\\G\\n\\t"), "\\A\\G\\n\\t");
assert_eq!(transform_z_anchor(""), "");
assert_eq!(
transform_z_anchor("^(?:(?=(msg(?:id(_plural)?|ctxt))\\s*\"[^\"])|\\s*$).*\\z"),
"^(?:(?=(msg(?:id(_plural)?|ctxt))\\s*\"[^\"])|\\s*$).*$(?!\\n)(?<!\\n)"
);
}
}