#![allow(unused_variables)]
use crate::diagnostics::{Error as DiagnosticError, Result};
use crate::eval::value::{Value, PrimitiveProcedure, PrimitiveImpl, ThreadSafeEnvironment};
use crate::effects::Effect;
use crate::stdlib::text::Text;
use std::sync::Arc;
use crate::regex::compat::{LightRegex, Captures as LightCaptures};
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct TextRegex {
pattern: String,
flags: RegexFlags,
regex: LightRegex,
}
#[derive(Debug, Clone, Copy)]
pub struct RegexFlags {
pub case_insensitive: bool,
pub multiline: bool,
pub dot_matches_newline: bool,
pub unicode: bool,
pub extended: bool,
pub swap_greed: bool,
}
#[derive(Debug, Clone)]
pub struct TextMatchResult {
pub matched_text: Text,
pub start: usize,
pub end: usize,
pub groups: Vec<Option<Text>>,
pub named_groups: HashMap<String, Option<Text>>,
}
pub struct TextMatchIter<'t> {
regex: &'t TextRegex,
text: &'t Text,
last_end: usize,
}
impl TextRegex {
pub fn new(pattern: &str) -> Result<Self> {
Self::with_flags(pattern, RegexFlags::default())
}
pub fn with_flags(pattern: &str, flags: RegexFlags) -> Result<Self> {
let builder = crate::regex::compat::RegexBuilder::new(pattern)
.case_insensitive(flags.case_insensitive)
.multi_line(flags.multiline)
.dot_matches_new_line(flags.dot_matches_newline)
.unicode(flags.unicode);
let regex = builder.build().map_err(|e| DiagnosticError::runtime_error(
format!("Invalid regex pattern: {e}"),
None
))?;
Ok(Self {
pattern: pattern.to_string(),
flags,
regex,
})
}
pub fn pattern(&self) -> &str {
&self.pattern
}
pub fn flags(&self) -> RegexFlags {
self.flags
}
pub fn is_match(&self, text: &Text) -> bool {
let text_str = text.to_string();
self.regex.is_match(&text_str)
}
pub fn find(&self, text: &Text) -> Option<TextMatchResult> {
let text_str = text.to_string();
if let Some(m) = self.regex.find(&text_str) {
let start_char = text_str[..m.start()].chars().count();
let end_char = text_str[..m.end()].chars().count();
let matched_text = text.substring(start_char, end_char)?;
Some(TextMatchResult {
matched_text,
start: start_char,
end: end_char,
groups: vec![],
named_groups: HashMap::new(),
})
} else {
None
}
}
pub fn find_all(&self, text: &Text) -> Vec<TextMatchResult> {
let text_str = text.to_string();
self.regex
.find_iter(&text_str)
.filter_map(|m| {
let start_char = text_str[..m.start()].chars().count();
let end_char = text_str[..m.end()].chars().count();
let matched_text = text.substring(start_char, end_char)?;
Some(TextMatchResult {
matched_text,
start: start_char,
end: end_char,
groups: vec![],
named_groups: HashMap::new(),
})
})
.collect()
}
pub fn find_iter<'t>(&'t self, text: &'t Text) -> TextMatchIter<'t> {
TextMatchIter {
regex: self,
text,
last_end: 0,
}
}
pub fn replace(&self, text: &Text, replacement: &Text) -> Text {
let text_str = text.to_string();
let replacement_str = replacement.to_string();
let result = self.regex.replace(&text_str, &replacement_str);
Text::from_string(result.into_owned())
}
pub fn replace_all(&self, text: &Text, replacement: &Text) -> Text {
let text_str = text.to_string();
let replacement_str = replacement.to_string();
let result = self.regex.replace_all(&text_str, &replacement_str);
Text::from_string(result.into_owned())
}
pub fn replace_all_with<F>(&self, text: &Text, replacer: F) -> Text
where
F: Fn(&TextMatchResult) -> Text,
{
let text_str = text.to_string();
let result = self.regex.replace_all_fn(&text_str, |m| {
let start_char = text_str[..m.start()].chars().count();
let end_char = text_str[..m.end()].chars().count();
if let Some(matched_text) = text.substring(start_char, end_char) {
let match_result = TextMatchResult {
matched_text: matched_text.clone(),
start: start_char,
end: end_char,
groups: vec![],
named_groups: HashMap::new(),
};
replacer(&match_result).to_string()
} else {
String::new()
}
});
Text::from_string(result.into_owned())
}
pub fn split(&self, text: &Text) -> Vec<Text> {
let text_str = text.to_string();
self.regex
.split(&text_str)
.map(|part| Text::from_string(part.to_string()))
.collect()
}
pub fn splitn(&self, text: &Text, limit: usize) -> Vec<Text> {
let text_str = text.to_string();
self.regex
.splitn(&text_str, limit)
.map(|part| Text::from_string(part.to_string()))
.collect()
}
}
impl<'t> Iterator for TextMatchIter<'t> {
type Item = TextMatchResult;
fn next(&mut self) -> Option<Self::Item> {
if self.last_end > self.text.char_length() {
return None;
}
let remaining_text = self.text.substring(self.last_end, self.text.char_length())?;
let match_result = self.regex.find(&remaining_text)?;
let adjusted_result = TextMatchResult {
matched_text: match_result.matched_text,
start: match_result.start + self.last_end,
end: match_result.end + self.last_end,
groups: match_result.groups,
named_groups: match_result.named_groups,
};
self.last_end = adjusted_result.end;
Some(adjusted_result)
}
}
impl Default for RegexFlags {
fn default() -> Self {
Self {
case_insensitive: false,
multiline: false,
dot_matches_newline: false,
unicode: true,
extended: false,
swap_greed: false,
}
}
}
impl RegexFlags {
pub fn case_insensitive() -> Self {
Self {
case_insensitive: true,
..Default::default()
}
}
pub fn multiline() -> Self {
Self {
multiline: true,
..Default::default()
}
}
pub fn extended() -> Self {
Self {
case_insensitive: true,
multiline: true,
dot_matches_newline: true,
unicode: true,
extended: true,
swap_greed: false,
}
}
}
pub fn create_regex_bindings(env: &Arc<ThreadSafeEnvironment>) {
bind_regex_construction(env);
bind_regex_matching(env);
bind_regex_replacement(env);
bind_regex_splitting(env);
}
fn bind_regex_construction(env: &Arc<ThreadSafeEnvironment>) {
env.define("regex-compile".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-compile".to_string(),
arity_min: 1,
arity_max: Some(2),
implementation: PrimitiveImpl::RustFn(primitive_regex_compile),
effects: vec![Effect::Pure],
})));
env.define("regex-compile-ci".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-compile-ci".to_string(),
arity_min: 1,
arity_max: Some(1),
implementation: PrimitiveImpl::RustFn(primitive_regex_compile_ci),
effects: vec![Effect::Pure],
})));
}
fn bind_regex_matching(env: &Arc<ThreadSafeEnvironment>) {
env.define("regex-match?".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-match?".to_string(),
arity_min: 2,
arity_max: Some(2),
implementation: PrimitiveImpl::RustFn(primitive_regex_match_p),
effects: vec![Effect::Pure],
})));
env.define("regex-search".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-search".to_string(),
arity_min: 2,
arity_max: Some(2),
implementation: PrimitiveImpl::RustFn(primitive_regex_search),
effects: vec![Effect::Pure],
})));
env.define("regex-search-all".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-search-all".to_string(),
arity_min: 2,
arity_max: Some(2),
implementation: PrimitiveImpl::RustFn(primitive_regex_search_all),
effects: vec![Effect::Pure],
})));
}
fn bind_regex_replacement(env: &Arc<ThreadSafeEnvironment>) {
env.define("regex-replace".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-replace".to_string(),
arity_min: 3,
arity_max: Some(3),
implementation: PrimitiveImpl::RustFn(primitive_regex_replace),
effects: vec![Effect::Pure],
})));
env.define("regex-replace-all".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-replace-all".to_string(),
arity_min: 3,
arity_max: Some(3),
implementation: PrimitiveImpl::RustFn(primitive_regex_replace_all),
effects: vec![Effect::Pure],
})));
}
fn bind_regex_splitting(env: &Arc<ThreadSafeEnvironment>) {
env.define("regex-split".to_string(), Value::Primitive(Arc::new(PrimitiveProcedure {
name: "regex-split".to_string(),
arity_min: 2,
arity_max: Some(3),
implementation: PrimitiveImpl::RustFn(primitive_regex_split),
effects: vec![Effect::Pure],
})));
}
fn primitive_regex_compile(args: &[Value]) -> Result<Value> {
if args.is_empty() || args.len() > 2 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-compile expects 1-2 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-compile pattern must be a string".to_string(),
None,
))
})?;
let flags = if args.len() > 1 {
RegexFlags::default() } else {
RegexFlags::default()
};
let regex = TextRegex::with_flags(pattern, flags)?;
Ok(Value::string(format!("regex:{pattern}")))
}
fn primitive_regex_compile_ci(args: &[Value]) -> Result<Value> {
if args.len() != 1 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-compile-ci expects 1 argument, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-compile-ci pattern must be a string".to_string(),
None,
))
})?;
let regex = TextRegex::with_flags(pattern, RegexFlags::case_insensitive())?;
Ok(Value::string(format!("regex-ci:{pattern}")))
}
fn primitive_regex_match_p(args: &[Value]) -> Result<Value> {
if args.len() != 2 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-match? expects 2 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-match? pattern must be a string".to_string(),
None,
))
})?;
let text = Text::try_from(&args[1])?;
let regex = TextRegex::new(pattern)?;
Ok(Value::boolean(regex.is_match(&text)))
}
fn primitive_regex_search(args: &[Value]) -> Result<Value> {
if args.len() != 2 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-search expects 2 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-search pattern must be a string".to_string(),
None,
))
})?;
let text = Text::try_from(&args[1])?;
let regex = TextRegex::new(pattern)?;
match regex.find(&text) {
Some(match_result) => {
Ok(match_result.matched_text.into())
}
None => Ok(Value::boolean(false)),
}
}
fn primitive_regex_search_all(args: &[Value]) -> Result<Value> {
if args.len() != 2 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-search-all expects 2 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-search-all pattern must be a string".to_string(),
None,
))
})?;
let text = Text::try_from(&args[1])?;
let regex = TextRegex::new(pattern)?;
let matches = regex.find_all(&text);
let match_values: Vec<Value> = matches
.into_iter()
.map(|m| m.matched_text.into())
.collect();
Ok(Value::list(match_values))
}
fn primitive_regex_replace(args: &[Value]) -> Result<Value> {
if args.len() != 3 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-replace expects 3 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-replace pattern must be a string".to_string(),
None,
))
})?;
let text = Text::try_from(&args[1])?;
let replacement = Text::try_from(&args[2])?;
let regex = TextRegex::new(pattern)?;
let result = regex.replace(&text, &replacement);
Ok(result.into())
}
fn primitive_regex_replace_all(args: &[Value]) -> Result<Value> {
if args.len() != 3 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-replace-all expects 3 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-replace-all pattern must be a string".to_string(),
None,
))
})?;
let text = Text::try_from(&args[1])?;
let replacement = Text::try_from(&args[2])?;
let regex = TextRegex::new(pattern)?;
let result = regex.replace_all(&text, &replacement);
Ok(result.into())
}
fn primitive_regex_split(args: &[Value]) -> Result<Value> {
if args.len() < 2 || args.len() > 3 {
return Err(Box::new(DiagnosticError::runtime_error(
format!("regex-split expects 2-3 arguments, got {}", args.len()),
None,
)));
}
let pattern = args[0].as_string().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-split pattern must be a string".to_string(),
None,
))
})?;
let text = Text::try_from(&args[1])?;
let regex = TextRegex::new(pattern)?;
let parts = if args.len() > 2 {
let limit = args[2].as_integer().ok_or_else(|| {
Box::new(DiagnosticError::runtime_error(
"regex-split limit must be an integer".to_string(),
None,
))
})? as usize;
regex.splitn(&text, limit)
} else {
regex.split(&text)
};
let part_values: Vec<Value> = parts.into_iter().map(|p| p.into()).collect();
Ok(Value::list(part_values))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_regex_compilation() {
let regex = TextRegex::new(r"\d+").unwrap();
assert_eq!(regex.pattern(), r"\d+");
}
#[test]
fn test_regex_matching() {
let regex = TextRegex::new(r"\d+").unwrap();
let text = Text::from_string_slice("abc123def");
assert!(regex.is_match(&text));
let match_result = regex.find(&text).unwrap();
assert_eq!(match_result.matched_text.to_string(), "123");
assert_eq!(match_result.start, 3);
assert_eq!(match_result.end, 6);
}
#[test]
fn test_regex_replacement() {
let regex = TextRegex::new(r"\d+").unwrap();
let text = Text::from_string_slice("abc123def456");
let replacement = Text::from_string_slice("XXX");
let result = regex.replace(&text, &replacement);
assert_eq!(result.to_string(), "abcXXXdef456");
let result_all = regex.replace_all(&text, &replacement);
assert_eq!(result_all.to_string(), "abcXXXdefXXX");
}
#[test]
fn test_regex_splitting() {
let regex = TextRegex::new(r",\s*").unwrap();
let text = Text::from_string_slice("a, b, c, d");
let parts = regex.split(&text);
assert_eq!(parts.len(), 4);
assert_eq!(parts[0].to_string(), "a");
assert_eq!(parts[1].to_string(), "b");
assert_eq!(parts[2].to_string(), "c");
assert_eq!(parts[3].to_string(), "d");
}
#[test]
fn test_case_insensitive_regex() {
let regex = TextRegex::with_flags(r"hello", RegexFlags::case_insensitive()).unwrap();
let text = Text::from_string_slice("Hello World");
assert!(regex.is_match(&text));
let match_result = regex.find(&text).unwrap();
assert_eq!(match_result.matched_text.to_string(), "Hello");
}
#[test]
fn test_named_groups() {
let regex = TextRegex::new(r"(?P<word>\w+)\s+(?P<number>\d+)").unwrap();
let text = Text::from_string_slice("hello 123");
let match_result = regex.find(&text).unwrap();
assert!(match_result.named_groups.contains_key("word"));
assert!(match_result.named_groups.contains_key("number"));
if let Some(Some(word)) = match_result.named_groups.get("word") {
assert_eq!(word.to_string(), "hello");
}
if let Some(Some(number)) = match_result.named_groups.get("number") {
assert_eq!(number.to_string(), "123");
}
}
}