pub fn convert_bre_to_pcre(pattern: &str) -> String {
let mut result = String::new();
let mut chars = pattern.chars().peekable();
let mut escape_next = false;
while let Some(c) = chars.next() {
if escape_next {
match c {
'(' | ')' | '{' | '}' => {
result.push(c);
}
'+' | '?' | '|' => {
result.push(c);
}
'\\' => {
result.push('\\');
}
'1'..='9' => {
result.push('$');
result.push(c);
}
'&' => {
result.push('$');
result.push('&');
}
'n' if chars.peek().is_none() => {
result.push('\\');
result.push(c);
}
_ => {
result.push('\\');
result.push(c);
}
}
escape_next = false;
} else if c == '\\' {
escape_next = true;
} else {
result.push(c);
}
}
if escape_next {
result.push('\\');
}
result
}
pub fn convert_sed_backreferences(replacement: &str) -> String {
let mut result = String::new();
let chars = replacement.chars().peekable();
let mut escape_next = false;
for c in chars {
if escape_next {
match c {
'1'..='9' => {
result.push('$');
result.push(c);
}
'&' => {
result.push('$');
result.push('&');
}
'\\' => {
result.push('\\');
}
'n' => {
result.push('\\');
result.push('n');
}
_ => {
result.push('\\');
result.push(c);
}
}
escape_next = false;
} else if c == '\\' {
escape_next = true;
} else {
result.push(c);
}
}
if escape_next {
result.push('\\');
}
result
}
#[cfg(test)]
mod tests {
use super::*;
fn is_bre_pattern(pattern: &str) -> bool {
pattern.contains("\\(")
|| pattern.contains("\\)")
|| pattern.contains("\\{")
|| pattern.contains("\\}")
|| pattern.contains("\\+")
|| pattern.contains("\\?")
|| pattern.contains("\\|")
|| (pattern.contains("\\1")
|| pattern.contains("\\2")
|| pattern.contains("\\3")
|| pattern.contains("\\4")
|| pattern.contains("\\5")
|| pattern.contains("\\6")
|| pattern.contains("\\7")
|| pattern.contains("\\8")
|| pattern.contains("\\9"))
}
#[test]
fn test_convert_parentheses() {
assert_eq!(convert_bre_to_pcre(r#"\(foo\)"#), "(foo)");
assert_eq!(convert_bre_to_pcre(r#"\(a\)\(b\)"#), "(a)(b)");
assert_eq!(convert_bre_to_pcre(r#"foo\(bar\)"#), "foo(bar)");
}
#[test]
fn test_convert_braces() {
assert_eq!(convert_bre_to_pcre(r#"foo\{3\}"#), "foo{3}");
assert_eq!(convert_bre_to_pcre(r#"\{3,5\}"#), "{3,5}");
}
#[test]
fn test_convert_quantifiers() {
assert_eq!(convert_bre_to_pcre(r#"foo\+"#), "foo+");
assert_eq!(convert_bre_to_pcre(r#"foo\?"#), "foo?");
assert_eq!(convert_bre_to_pcre(r#"foo\*"#), r#"foo\*"#); }
#[test]
fn test_convert_alternation() {
assert_eq!(convert_bre_to_pcre(r#"foo\|bar"#), "foo|bar");
}
#[test]
fn test_convert_backreferences() {
assert_eq!(convert_bre_to_pcre(r#"\1"#), "$1");
assert_eq!(convert_bre_to_pcre(r#"\2\1"#), "$2$1");
assert_eq!(convert_bre_to_pcre(r#"\&"#), "$&");
}
#[test]
fn test_convert_backslash() {
assert_eq!(convert_bre_to_pcre(r#"\\"#), "\\");
assert_eq!(convert_bre_to_pcre(r#"foo\\"#), "foo\\");
assert_eq!(convert_bre_to_pcre(r#"\\\\)"#), r#"\\)"#); }
#[test]
fn test_no_conversion_needed() {
assert_eq!(convert_bre_to_pcre(r#"(foo)"#), "(foo)");
assert_eq!(convert_bre_to_pcre(r#"foo+"#), "foo+");
assert_eq!(convert_bre_to_pcre(r#"foo|bar"#), "foo|bar");
}
#[test]
fn test_is_bre_pattern() {
assert!(is_bre_pattern(r#"\(foo\)"#));
assert!(is_bre_pattern(r#"foo\+"#));
assert!(is_bre_pattern(r#"foo\{3\}"#));
assert!(is_bre_pattern(r#"foo\|bar"#));
assert!(is_bre_pattern(r#"\1"#));
assert!(!is_bre_pattern(r#"(foo)"#));
assert!(!is_bre_pattern(r#"foo+"#));
assert!(!is_bre_pattern(r#"foo|bar"#));
}
#[test]
fn test_convert_sed_backreferences() {
assert_eq!(convert_sed_backreferences(r#"\1"#), "$1");
assert_eq!(convert_sed_backreferences(r#"\2\1"#), "$2$1");
assert_eq!(convert_sed_backreferences(r#"\&"#), "$&");
assert_eq!(convert_sed_backreferences(r#"\\"#), "\\");
assert_eq!(convert_sed_backreferences(r#"\n"#), "\\n");
assert_eq!(convert_sed_backreferences(r#"foo\1bar"#), "foo$1bar");
}
#[test]
fn test_no_backreference_conversion() {
assert_eq!(convert_sed_backreferences(r#"foo"#), "foo");
assert_eq!(convert_sed_backreferences(r#"foo bar"#), "foo bar");
}
#[test]
fn test_complex_bre_pattern() {
let bre_pattern = r#"\(foo\)\(bar\) \2\1"#;
let pcre_pattern = convert_bre_to_pcre(bre_pattern);
assert_eq!(pcre_pattern, r#"(foo)(bar) $2$1"#);
}
#[test]
fn test_pcre_pattern_unchanged() {
assert_eq!(convert_bre_to_pcre(r#"(foo|bar)+"#), r#"(foo|bar)+"#);
assert_eq!(convert_bre_to_pcre(r#"foo{3,5}"#), r#"foo{3,5}"#);
}
#[test]
fn test_simple_patterns() {
assert_eq!(convert_bre_to_pcre("foo"), "foo");
assert_eq!(convert_bre_to_pcre("bar123"), "bar123");
assert_eq!(convert_bre_to_pcre("test_pattern"), "test_pattern");
assert_eq!(convert_bre_to_pcre(""), "");
}
#[test]
fn test_anchors() {
assert_eq!(convert_bre_to_pcre("^foo"), "^foo");
assert_eq!(convert_bre_to_pcre("bar$"), "bar$");
assert_eq!(convert_bre_to_pcre("^start$"), "^start$");
assert_eq!(convert_bre_to_pcre(r#"\^foo"#), r#"\^foo"#); }
#[test]
fn test_character_classes() {
assert_eq!(convert_bre_to_pcre("[a-z]"), "[a-z]");
assert_eq!(convert_bre_to_pcre("[A-Z0-9]"), "[A-Z0-9]");
assert_eq!(convert_bre_to_pcre("[^abc]"), "[^abc]");
assert_eq!(convert_bre_to_pcre("[[:alpha:]]"), "[[:alpha:]]");
assert_eq!(convert_bre_to_pcre(r#"[a\]z]"#), r#"[a\]z]"#); }
#[test]
fn test_escaped_sequences() {
assert_eq!(convert_bre_to_pcre(r#"\t"#), r#"\t"#); assert_eq!(convert_bre_to_pcre(r#"\n"#), r#"\n"#); assert_eq!(convert_bre_to_pcre(r#"\s"#), r#"\s"#); assert_eq!(convert_bre_to_pcre(r#"\w"#), r#"\w"#); }
#[test]
fn test_wildcard() {
assert_eq!(convert_bre_to_pcre("f.o"), "f.o");
assert_eq!(convert_bre_to_pcre(".*"), ".*");
assert_eq!(convert_bre_to_pcre(r#"\.\*"#), r#"\.\*"#); }
#[test]
fn test_complex_nested_patterns() {
assert_eq!(convert_bre_to_pcre(r#"\(foo\(bar\)\)"#), "(foo(bar))");
assert_eq!(convert_bre_to_pcre(r#"\(a\|\(b\|c\)\)"#), "(a|(b|c))");
assert_eq!(convert_bre_to_pcre(r#"\(foo\)\+"#), "(foo)+");
assert_eq!(convert_bre_to_pcre(r#"\(bar\)\{2,5\}"#), "(bar){2,5}");
assert_eq!(
convert_bre_to_pcre(r#"\(foo\)\{3\} \(bar\|baz\)"#),
r#"(foo){3} (bar|baz)"#
);
}
#[test]
fn test_multiple_backreferences_in_replacement() {
assert_eq!(convert_sed_backreferences(r#"\1\2\3"#), "$1$2$3");
assert_eq!(
convert_sed_backreferences(r#"\9\8\7\6\5\4\3\2\1"#),
"$9$8$7$6$5$4$3$2$1"
);
assert_eq!(
convert_sed_backreferences(r#"start\1middle\2end"#),
"start$1middle$2end"
);
assert_eq!(convert_sed_backreferences(r#"\1\1\1"#), "$1$1$1");
}
#[test]
fn test_match_reference_in_replacement() {
assert_eq!(convert_sed_backreferences(r#"\&"#), "$&");
assert_eq!(convert_sed_backreferences(r#"foo\&bar"#), "foo$&bar");
assert_eq!(convert_sed_backreferences(r#"\&\&"#), "$&$&");
assert_eq!(convert_sed_backreferences(r#"\1\&\2"#), "$1$&$2");
}
#[test]
fn test_mixed_backreferences_and_text() {
assert_eq!(
convert_sed_backreferences(r#"prefix_\1_suffix"#),
"prefix_$1_suffix"
);
assert_eq!(
convert_sed_backreferences(r#"Result: \1, \2"#),
"Result: $1, $2"
);
assert_eq!(convert_sed_backreferences(r#"\1:\&:\2"#), "$1:$&:$2");
}
#[test]
fn test_no_backreferences_in_text() {
assert_eq!(convert_sed_backreferences("simple text"), "simple text");
assert_eq!(convert_sed_backreferences("1234567890"), "1234567890");
assert_eq!(convert_sed_backreferences("!@#$%^&*()"), "!@#$%^&*()");
assert_eq!(convert_sed_backreferences(""), "");
}
#[test]
fn test_trailing_backslash_pattern() {
assert_eq!(convert_bre_to_pcre(r#"foo\"#), r#"foo\"#);
assert_eq!(convert_bre_to_pcre(r#"\("#), r#"("#); assert_eq!(convert_bre_to_pcre(r#"\"#), r#"\"#); }
#[test]
fn test_trailing_backslash_replacement() {
assert_eq!(convert_sed_backreferences(r#"foo\"#), r#"foo\"#);
assert_eq!(convert_sed_backreferences(r#"\"#), r#"\"#);
assert_eq!(convert_sed_backreferences(r#"\1\"#), r#"$1\"#);
}
#[test]
fn test_double_backslash_conversion() {
assert_eq!(convert_bre_to_pcre(r#"\\"#), "\\");
assert_eq!(convert_bre_to_pcre(r#"foo\\bar"#), "foo\\bar");
assert_eq!(convert_bre_to_pcre(r#"\\("#), r#"\("#);
assert_eq!(convert_bre_to_pcre(r#"\\\"#), r#"\\"#); assert_eq!(convert_bre_to_pcre(r#"\\\\"#), r#"\\"#); }
#[test]
fn test_double_backslash_replacement() {
assert_eq!(convert_sed_backreferences(r#"\\"#), "\\");
assert_eq!(convert_sed_backreferences(r#"foo\\bar"#), "foo\\bar");
assert_eq!(convert_sed_backreferences(r#"\1\\n"#), "$1\\n");
}
#[test]
fn test_alternation_patterns() {
assert_eq!(convert_bre_to_pcre(r#"foo\|bar"#), "foo|bar");
assert_eq!(
convert_bre_to_pcre(r#"\(foo\|bar\|\baz\)"#),
r#"(foo|bar|\baz)"#
);
assert_eq!(convert_bre_to_pcre(r#"a\|b\|c"#), "a|b|c");
assert_eq!(convert_bre_to_pcre(r#"\(foo\|bar\)\+"#), "(foo|bar)+");
}
#[test]
fn test_repetition_quantifiers() {
assert_eq!(convert_bre_to_pcre(r#"foo\+"#), "foo+");
assert_eq!(convert_bre_to_pcre(r#"foo\?"#), "foo?");
assert_eq!(convert_bre_to_pcre(r#"foo\{3\}"#), "foo{3}");
assert_eq!(convert_bre_to_pcre(r#"foo\{3,5\}"#), "foo{3,5}");
assert_eq!(convert_bre_to_pcre(r#"foo\{3,\}"#), "foo{3,}");
assert_eq!(convert_bre_to_pcre(r#"foo\{,5\}"#), "foo{,5}");
assert_eq!(convert_bre_to_pcre(r#"foo\*"#), r#"foo\*"#);
}
#[test]
fn test_grouped_commands() {
assert_eq!(convert_bre_to_pcre(r#"/foo\|bar/"#), r#"/foo|bar/"#);
assert_eq!(convert_bre_to_pcre(r#"\(test\).*\1"#), r#"(test).*$1"#);
}
#[test]
fn test_digit_backreferences_in_pattern() {
assert_eq!(convert_bre_to_pcre(r#"\1"#), "$1");
assert_eq!(convert_bre_to_pcre(r#"\2"#), "$2");
assert_eq!(convert_bre_to_pcre(r#"\9"#), "$9");
assert_eq!(convert_bre_to_pcre(r#"\0"#), r#"\0"#); }
#[test]
fn test_special_characters_preserved() {
assert_eq!(convert_bre_to_pcre(r#"."#), ".");
assert_eq!(convert_bre_to_pcre(r#"*"#), "*");
assert_eq!(convert_bre_to_pcre(r#"^"#), "^");
assert_eq!(convert_bre_to_pcre(r#"$"#), "$");
assert_eq!(convert_bre_to_pcre(r#"["#), "[");
assert_eq!(convert_bre_to_pcre(r#"]"#), "]");
}
#[test]
fn test_newline_escape_at_end() {
assert_eq!(convert_bre_to_pcre(r#"foo\n"#), r#"foo\n"#);
assert_eq!(convert_bre_to_pcre(r#"\n"#), r#"\n"#);
}
#[test]
fn test_empty_groups() {
assert_eq!(convert_bre_to_pcre(r#"\(\)"#), "()");
assert_eq!(convert_bre_to_pcre(r#"\(\+\)"#), "(+)");
}
#[test]
fn test_unicode_patterns() {
assert_eq!(convert_bre_to_pcre("föö"), "föö");
assert_eq!(convert_bre_to_pcre(r#"\(日本語\)"#), "(日本語)");
assert_eq!(convert_bre_to_pcre("test_测试"), "test_测试");
}
}