use std::borrow::Cow;
pub fn extract_regex_parts(text: &str) -> (String, String, String) {
let content = if let Some(stripped) = text.strip_prefix("qr") {
stripped
} else if text.starts_with('m')
&& text.len() > 1
&& text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
{
&text[1..]
} else {
text
};
let delimiter = match content.chars().next() {
Some(d) => d,
None => return (String::new(), String::new(), String::new()),
};
let closing = get_closing_delimiter(delimiter);
let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
let pattern = format!("{}{}{}", delimiter, body, closing);
(pattern, body, modifiers.to_string())
}
#[derive(Debug, Clone, PartialEq)]
pub enum SubstitutionError {
InvalidModifier(char),
MissingDelimiter,
MissingPattern,
MissingReplacement,
MissingClosingDelimiter,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TransliterationError {
InvalidModifier(char),
InvalidDelimiter(char),
MissingDelimiter,
MissingSearch,
MissingReplacement,
MissingClosingDelimiter,
}
pub fn extract_substitution_parts_strict(
text: &str,
) -> Result<(String, String, String), SubstitutionError> {
let after_s = text.strip_prefix('s').unwrap_or(text);
let content = after_s.trim_start();
let delimiter = match content.chars().next() {
Some(d) => d,
None => return Err(SubstitutionError::MissingDelimiter),
};
let closing = get_closing_delimiter(delimiter);
let is_paired = delimiter != closing;
let (pattern, rest1, pattern_closed) =
extract_delimited_content_strict(content, delimiter, closing);
if !is_paired && !pattern_closed {
return Err(SubstitutionError::MissingClosingDelimiter);
}
if is_paired && !pattern_closed {
return Err(SubstitutionError::MissingClosingDelimiter);
}
let (replacement, modifiers_str, replacement_closed) = if !is_paired {
if rest1.is_empty() {
return Err(SubstitutionError::MissingReplacement);
}
let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
(body, rest, found_closing)
} else {
let trimmed = rest1.trim_start();
if let Some(rd) = trimmed.chars().next() {
if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
return Err(SubstitutionError::MissingReplacement);
}
let repl_closing = get_closing_delimiter(rd);
extract_delimited_content_strict(trimmed, rd, repl_closing)
} else {
return Err(SubstitutionError::MissingReplacement);
}
};
if !is_paired && !replacement_closed {
return Err(SubstitutionError::MissingClosingDelimiter);
}
if is_paired && !replacement_closed {
return Err(SubstitutionError::MissingClosingDelimiter);
}
let modifiers = validate_substitution_modifiers(modifiers_str)
.map_err(SubstitutionError::InvalidModifier)?;
Ok((pattern, replacement, modifiers))
}
fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
let mut chars = text.char_indices();
let is_paired = open != close;
if let Some((_, c)) = chars.next() {
if c != open {
return (String::new(), text, false);
}
} else {
return (String::new(), "", false);
}
let mut body = String::new();
let mut depth = if is_paired { 1 } else { 0 };
let mut escaped = false;
let mut end_pos = text.len();
let mut found_closing = false;
for (i, ch) in chars {
if escaped {
body.push(ch);
escaped = false;
continue;
}
match ch {
'\\' => {
body.push(ch);
escaped = true;
}
c if c == open && is_paired => {
body.push(ch);
depth += 1;
}
c if c == close => {
if is_paired {
depth -= 1;
if depth == 0 {
end_pos = i + ch.len_utf8();
found_closing = true;
break;
}
body.push(ch);
} else {
end_pos = i + ch.len_utf8();
found_closing = true;
break;
}
}
_ => body.push(ch),
}
}
(body, &text[end_pos..], found_closing)
}
pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
let content = text.strip_prefix('s').unwrap_or(text);
let delimiter = match content.chars().next() {
Some(d) => d,
None => return (String::new(), String::new(), String::new()),
};
if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
{
let modifiers = extract_substitution_modifiers(&modifiers_str);
return (pattern, replacement, modifiers);
}
return (String::new(), String::new(), String::new());
}
let closing = get_closing_delimiter(delimiter);
let is_paired = delimiter != closing;
let (mut pattern, rest1, pattern_closed) = if is_paired {
extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
} else {
extract_delimited_content_strict(content, delimiter, closing)
};
let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
(body, Cow::Borrowed(rest))
} else if !is_paired && !pattern_closed {
if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
split_unclosed_substitution_pattern(&pattern)
{
pattern = fallback_pattern;
(fallback_replacement, Cow::Owned(fallback_modifiers))
} else {
(String::new(), Cow::Borrowed(rest1))
}
} else if is_paired {
let trimmed = rest1.trim_start();
if let Some(rd) = trimmed.chars().next() {
if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
(String::new(), Cow::Borrowed(trimmed))
} else {
let repl_closing = get_closing_delimiter(rd);
let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
(body, Cow::Borrowed(rest))
}
} else {
(String::new(), Cow::Borrowed(trimmed))
}
} else {
(String::new(), Cow::Borrowed(rest1))
};
let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
(pattern, replacement, modifiers)
}
pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
let after_op = if let Some(stripped) = text.strip_prefix("tr") {
stripped
} else if let Some(stripped) = text.strip_prefix('y') {
stripped
} else {
text
};
let content = after_op.trim_start();
let delimiter = match content.chars().next() {
Some(d) => d,
None => return (String::new(), String::new(), String::new()),
};
if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
return (String::new(), String::new(), String::new());
}
let closing = get_closing_delimiter(delimiter);
let is_paired = delimiter != closing;
let (search, rest1) = extract_delimited_content(content, delimiter, closing);
let rest2_owned;
let rest2 = if is_paired {
rest1.trim_start()
} else {
rest2_owned = format!("{}{}", delimiter, rest1);
&rest2_owned
};
let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
let chars = rest1.char_indices();
let mut body = String::new();
let mut escaped = false;
let mut end_pos = rest1.len();
for (i, ch) in chars {
if escaped {
body.push(ch);
escaped = false;
continue;
}
match ch {
'\\' => {
body.push(ch);
escaped = true;
}
c if c == closing => {
end_pos = i + ch.len_utf8();
break;
}
_ => body.push(ch),
}
}
(body, &rest1[end_pos..])
} else if is_paired {
if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
let repl_closing = get_closing_delimiter(repl_delimiter);
extract_delimited_content(rest2, repl_delimiter, repl_closing)
} else if let Some(repl_delimiter) = rest2.chars().next() {
if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
(String::new(), rest2)
} else {
extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
}
} else {
(String::new(), rest2)
}
} else {
(String::new(), rest1)
};
let modifiers = modifiers_str
.chars()
.take_while(|c| c.is_ascii_alphabetic())
.filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
.collect();
(search, replacement, modifiers)
}
pub fn extract_transliteration_parts_strict(
text: &str,
) -> Result<(String, String, String), TransliterationError> {
let after_op = if let Some(stripped) = text.strip_prefix("tr") {
stripped
} else if let Some(stripped) = text.strip_prefix('y') {
stripped
} else {
text
};
let content = after_op.trim_start();
let delimiter = match content.chars().next() {
Some(d) => d,
None => return Err(TransliterationError::MissingDelimiter),
};
if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
return Err(TransliterationError::InvalidDelimiter(delimiter));
}
let closing = get_closing_delimiter(delimiter);
let is_paired = delimiter != closing;
let (search, rest1, search_closed) =
extract_delimited_content_strict(content, delimiter, closing);
if !search_closed {
return Err(TransliterationError::MissingClosingDelimiter);
}
let (replacement, modifiers_str, replacement_closed) = if !is_paired {
if rest1.is_empty() {
return Err(TransliterationError::MissingReplacement);
}
let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
(body, rest, found_closing)
} else {
let trimmed = rest1.trim_start();
if let Some(repl_delimiter) = trimmed.chars().next() {
if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
}
let repl_closing = get_closing_delimiter(repl_delimiter);
let (body, rest, found_closing) =
extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
(body, rest, found_closing)
} else {
return Err(TransliterationError::MissingReplacement);
}
};
if !replacement_closed {
return Err(TransliterationError::MissingClosingDelimiter);
}
if search.is_empty() {
return Err(TransliterationError::MissingSearch);
}
let mut modifiers = String::new();
for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
if matches!(modifier, 'c' | 'd' | 's' | 'r') {
modifiers.push(modifier);
} else {
return Err(TransliterationError::InvalidModifier(modifier));
}
}
Ok((search, replacement, modifiers))
}
fn get_closing_delimiter(open: char) -> char {
match open {
'(' => ')',
'[' => ']',
'{' => '}',
'<' => '>',
_ => open,
}
}
fn is_paired_open(ch: char) -> bool {
matches!(ch, '{' | '[' | '(' | '<')
}
fn starts_with_paired_delimiter(text: &str) -> Option<char> {
let trimmed = text.trim_start();
match trimmed.chars().next() {
Some(ch) if is_paired_open(ch) => Some(ch),
_ => None,
}
}
fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
let mut chars = text.char_indices();
let is_paired = open != close;
if let Some((_, c)) = chars.next() {
if c != open {
return (String::new(), text);
}
} else {
return (String::new(), "");
}
let mut body = String::new();
let mut depth = if is_paired { 1 } else { 0 };
let mut escaped = false;
let mut end_pos = text.len();
for (i, ch) in chars {
if escaped {
body.push(ch);
escaped = false;
continue;
}
match ch {
'\\' => {
body.push(ch);
escaped = true;
}
c if c == open && is_paired => {
body.push(ch);
depth += 1;
}
c if c == close => {
if is_paired {
depth -= 1;
if depth == 0 {
end_pos = i + ch.len_utf8();
break;
}
body.push(ch);
} else {
end_pos = i + ch.len_utf8();
break;
}
}
_ => body.push(ch),
}
}
(body, &text[end_pos..])
}
fn scan_inner_string(
text: &str,
pos: usize,
quote: char,
delimiter: char,
) -> Option<(usize, bool)> {
let start = pos + quote.len_utf8();
let rest = text.get(start..)?;
let mut escaped = false;
let mut contains_delim = false;
let mut end_of_string = None;
let mut local_pos = start;
for ch in rest.chars() {
if escaped {
escaped = false;
local_pos += ch.len_utf8();
continue;
}
if ch == '\\' {
escaped = true;
local_pos += ch.len_utf8();
continue;
}
if ch == '\n' {
return None;
}
if ch == delimiter {
contains_delim = true;
}
if ch == quote {
end_of_string = Some(local_pos + ch.len_utf8());
break;
}
local_pos += ch.len_utf8();
}
end_of_string.map(|end| (end, contains_delim))
}
fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
let mut body = String::new();
let mut end_pos = text.len();
let mut found_closing = false;
let mut pos = 0usize;
let mut escaped = false;
while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
if escaped {
body.push(ch);
escaped = false;
pos += ch.len_utf8();
continue;
}
match ch {
'\\' => {
body.push(ch);
escaped = true;
pos += ch.len_utf8();
}
'"' | '\'' if ch != closing => {
let quote = ch;
match scan_inner_string(text, pos, quote, closing) {
Some((string_end, true)) => {
let string_text = &text[pos..string_end];
body.push_str(string_text);
pos = string_end;
}
_ => {
body.push(ch);
pos += ch.len_utf8();
}
}
}
c if c == closing => {
end_pos = pos + ch.len_utf8();
found_closing = true;
break;
}
_ => {
body.push(ch);
pos += ch.len_utf8();
}
}
}
(body, &text[end_pos..], found_closing)
}
fn extract_substitution_pattern_with_replacement_hint(
text: &str,
open: char,
close: char,
) -> (String, &str, bool) {
let mut chars = text.char_indices();
if let Some((_, c)) = chars.next() {
if c != open {
return (String::new(), text, false);
}
} else {
return (String::new(), "", false);
}
let mut body = String::new();
let mut depth = 1usize;
let mut escaped = false;
let mut first_close_pos: Option<usize> = None;
let mut first_body_len: usize = 0;
for (i, ch) in chars {
if escaped {
body.push(ch);
escaped = false;
continue;
}
match ch {
'\\' => {
body.push(ch);
escaped = true;
}
c if c == open => {
body.push(ch);
depth += 1;
}
c if c == close => {
if depth > 1 {
depth -= 1;
body.push(ch);
continue;
}
let rest = &text[i + ch.len_utf8()..];
if first_close_pos.is_none() {
first_close_pos = Some(i + ch.len_utf8());
first_body_len = body.len();
}
if starts_with_paired_delimiter(rest).is_some() {
return (body, rest, true);
}
body.push(ch);
}
_ => body.push(ch),
}
}
if let Some(pos) = first_close_pos {
body.truncate(first_body_len);
return (body, &text[pos..], true);
}
(body, "", false)
}
fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
let mut escaped = false;
for (idx, ch) in pattern.char_indices() {
if escaped {
escaped = false;
continue;
}
if ch == '\\' {
escaped = true;
continue;
}
if is_paired_open(ch) {
let closing = get_closing_delimiter(ch);
let (replacement, rest, found_closing) =
extract_delimited_content_strict(&pattern[idx..], ch, closing);
if found_closing {
let leading = pattern[..idx].to_string();
return Some((leading, replacement, rest.to_string()));
}
}
}
None
}
fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
let mut escaped = false;
let mut candidates = Vec::new();
for (idx, ch) in text.char_indices() {
if escaped {
escaped = false;
continue;
}
if ch == '\\' {
escaped = true;
continue;
}
if is_paired_open(ch) {
candidates.push((idx, ch));
}
}
for (idx, ch) in candidates.into_iter().rev() {
let closing = get_closing_delimiter(ch);
let (replacement, rest, found_closing) =
extract_delimited_content_strict(&text[idx..], ch, closing);
if found_closing {
let leading = text[..idx].to_string();
return Some((leading, replacement, rest.to_string()));
}
}
None
}
fn extract_substitution_modifiers(text: &str) -> String {
text.chars()
.take_while(|c| c.is_ascii_alphabetic())
.filter(|&c| {
matches!(
c,
'g' | 'i'
| 'm'
| 's'
| 'x'
| 'o'
| 'e'
| 'r'
| 'a'
| 'd'
| 'l'
| 'u'
| 'n'
| 'p'
| 'c'
)
})
.collect()
}
pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
let mut valid_modifiers = String::new();
for c in modifiers_str.chars() {
if !c.is_ascii_alphabetic() {
if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
break;
}
return Err(c);
}
if matches!(
c,
'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
) {
valid_modifiers.push(c);
} else {
return Err(c);
}
}
Ok(valid_modifiers)
}