1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if text.starts_with('m')
14 && text.len() > 1
15 && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16 {
17 &text[1..]
18 } else {
19 text
20 };
21
22 let delimiter = match content.chars().next() {
24 Some(d) => d,
25 None => return (String::new(), String::new(), String::new()),
26 };
27 let closing = get_closing_delimiter(delimiter);
28
29 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32 let pattern = format!("{}{}{}", delimiter, body, closing);
34
35 (pattern, body, modifiers.to_string())
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41 InvalidModifier(char),
43 MissingDelimiter,
45 MissingPattern,
47 MissingReplacement,
49 MissingClosingDelimiter,
51}
52
53pub fn extract_substitution_parts_strict(
68 text: &str,
69) -> Result<(String, String, String), SubstitutionError> {
70 let after_s = text.strip_prefix('s').unwrap_or(text);
72 let content = after_s.trim_start();
74
75 let delimiter = match content.chars().next() {
77 Some(d) => d,
78 None => return Err(SubstitutionError::MissingDelimiter),
79 };
80 let closing = get_closing_delimiter(delimiter);
81 let is_paired = delimiter != closing;
82
83 let (pattern, rest1, pattern_closed) =
85 extract_delimited_content_strict(content, delimiter, closing);
86
87 if !is_paired && !pattern_closed {
89 return Err(SubstitutionError::MissingClosingDelimiter);
90 }
91
92 if is_paired && !pattern_closed {
94 return Err(SubstitutionError::MissingClosingDelimiter);
95 }
96
97 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
101 if rest1.is_empty() {
103 return Err(SubstitutionError::MissingReplacement);
104 }
105
106 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
109 (body, rest, found_closing)
110 } else {
111 let trimmed = rest1.trim_start();
113 if let Some(rd) = trimmed.chars().next() {
117 if rd == '{' || rd == '[' || rd == '(' || rd == '<' {
119 let repl_closing = get_closing_delimiter(rd);
120 extract_delimited_content_strict(trimmed, rd, repl_closing)
121 } else {
122 return Err(SubstitutionError::MissingReplacement);
124 }
125 } else {
126 return Err(SubstitutionError::MissingReplacement);
128 }
129 };
130
131 if !is_paired && !replacement_closed {
133 return Err(SubstitutionError::MissingClosingDelimiter);
134 }
135
136 if is_paired && !replacement_closed {
138 return Err(SubstitutionError::MissingClosingDelimiter);
139 }
140
141 let modifiers = validate_substitution_modifiers(modifiers_str)
143 .map_err(SubstitutionError::InvalidModifier)?;
144
145 Ok((pattern, replacement, modifiers))
146}
147
148fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
151 let mut chars = text.char_indices();
152 let is_paired = open != close;
153
154 if let Some((_, c)) = chars.next() {
156 if c != open {
157 return (String::new(), text, false);
158 }
159 } else {
160 return (String::new(), "", false);
161 }
162
163 let mut body = String::new();
164 let mut depth = if is_paired { 1 } else { 0 };
165 let mut escaped = false;
166 let mut end_pos = text.len();
167 let mut found_closing = false;
168
169 for (i, ch) in chars {
170 if escaped {
171 body.push(ch);
172 escaped = false;
173 continue;
174 }
175
176 match ch {
177 '\\' => {
178 body.push(ch);
179 escaped = true;
180 }
181 c if c == open && is_paired => {
182 body.push(ch);
183 depth += 1;
184 }
185 c if c == close => {
186 if is_paired {
187 depth -= 1;
188 if depth == 0 {
189 end_pos = i + ch.len_utf8();
190 found_closing = true;
191 break;
192 }
193 body.push(ch);
194 } else {
195 end_pos = i + ch.len_utf8();
196 found_closing = true;
197 break;
198 }
199 }
200 _ => body.push(ch),
201 }
202 }
203
204 (body, &text[end_pos..], found_closing)
205}
206
207pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
220 let content = text.strip_prefix('s').unwrap_or(text);
222
223 let delimiter = match content.chars().next() {
225 Some(d) => d,
226 None => return (String::new(), String::new(), String::new()),
227 };
228 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
229 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
230 {
231 let modifiers = extract_substitution_modifiers(&modifiers_str);
232 return (pattern, replacement, modifiers);
233 }
234
235 return (String::new(), String::new(), String::new());
236 }
237 let closing = get_closing_delimiter(delimiter);
238 let is_paired = delimiter != closing;
239
240 let (mut pattern, rest1, pattern_closed) = if is_paired {
242 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
243 } else {
244 extract_delimited_content_strict(content, delimiter, closing)
245 };
246
247 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
251 let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
254 (body, Cow::Borrowed(rest))
255 } else if !is_paired && !pattern_closed {
256 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
257 split_unclosed_substitution_pattern(&pattern)
258 {
259 pattern = fallback_pattern;
260 (fallback_replacement, Cow::Owned(fallback_modifiers))
261 } else {
262 (String::new(), Cow::Borrowed(rest1))
263 }
264 } else if is_paired {
265 let trimmed = rest1.trim_start();
266 if let Some(rd) = starts_with_paired_delimiter(trimmed) {
270 let repl_closing = get_closing_delimiter(rd);
271 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
272 (body, Cow::Borrowed(rest))
273 } else {
274 let (body, rest) = extract_unpaired_body(rest1, closing);
275 (body, Cow::Borrowed(rest))
276 }
277 } else {
278 (String::new(), Cow::Borrowed(rest1))
279 };
280
281 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
283
284 (pattern, replacement, modifiers)
285}
286
287pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
289 let content = if let Some(stripped) = text.strip_prefix("tr") {
291 stripped
292 } else if let Some(stripped) = text.strip_prefix('y') {
293 stripped
294 } else {
295 text
296 };
297
298 let delimiter = match content.chars().next() {
300 Some(d) => d,
301 None => return (String::new(), String::new(), String::new()),
302 };
303 let closing = get_closing_delimiter(delimiter);
304 let is_paired = delimiter != closing;
305
306 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
308
309 let rest2_owned;
312 let rest2 = if is_paired {
313 rest1.trim_start()
314 } else {
315 rest2_owned = format!("{}{}", delimiter, rest1);
316 &rest2_owned
317 };
318
319 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
321 let chars = rest1.char_indices();
323 let mut body = String::new();
324 let mut escaped = false;
325 let mut end_pos = rest1.len();
326
327 for (i, ch) in chars {
328 if escaped {
329 body.push(ch);
330 escaped = false;
331 continue;
332 }
333
334 match ch {
335 '\\' => {
336 body.push(ch);
337 escaped = true;
338 }
339 c if c == closing => {
340 end_pos = i + ch.len_utf8();
341 break;
342 }
343 _ => body.push(ch),
344 }
345 }
346
347 (body, &rest1[end_pos..])
348 } else if is_paired {
349 if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
350 let repl_closing = get_closing_delimiter(repl_delimiter);
351 extract_delimited_content(rest2, repl_delimiter, repl_closing)
352 } else {
353 (String::new(), rest2)
354 }
355 } else {
356 (String::new(), rest1)
357 };
358
359 let modifiers = modifiers_str
362 .chars()
363 .take_while(|c| c.is_ascii_alphabetic())
364 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
365 .collect();
366
367 (search, replacement, modifiers)
368}
369
370fn get_closing_delimiter(open: char) -> char {
372 match open {
373 '(' => ')',
374 '[' => ']',
375 '{' => '}',
376 '<' => '>',
377 _ => open,
378 }
379}
380
381fn is_paired_open(ch: char) -> bool {
382 matches!(ch, '{' | '[' | '(' | '<')
383}
384
385fn starts_with_paired_delimiter(text: &str) -> Option<char> {
386 let trimmed = text.trim_start();
387 match trimmed.chars().next() {
388 Some(ch) if is_paired_open(ch) => Some(ch),
389 _ => None,
390 }
391}
392
393fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
395 let mut chars = text.char_indices();
396 let is_paired = open != close;
397
398 if let Some((_, c)) = chars.next() {
400 if c != open {
401 return (String::new(), text);
402 }
403 } else {
404 return (String::new(), "");
405 }
406
407 let mut body = String::new();
408 let mut depth = if is_paired { 1 } else { 0 };
409 let mut escaped = false;
410 let mut end_pos = text.len();
411
412 for (i, ch) in chars {
413 if escaped {
414 body.push(ch);
415 escaped = false;
416 continue;
417 }
418
419 match ch {
420 '\\' => {
421 body.push(ch);
422 escaped = true;
423 }
424 c if c == open && is_paired => {
425 body.push(ch);
426 depth += 1;
427 }
428 c if c == close => {
429 if is_paired {
430 depth -= 1;
431 if depth == 0 {
432 end_pos = i + ch.len_utf8();
433 break;
434 }
435 body.push(ch);
436 } else {
437 end_pos = i + ch.len_utf8();
438 break;
439 }
440 }
441 _ => body.push(ch),
442 }
443 }
444
445 (body, &text[end_pos..])
446}
447
448fn extract_unpaired_body(text: &str, closing: char) -> (String, &str) {
449 let mut body = String::new();
450 let mut escaped = false;
451 let mut end_pos = text.len();
452
453 for (i, ch) in text.char_indices() {
454 if escaped {
455 body.push(ch);
456 escaped = false;
457 continue;
458 }
459
460 match ch {
461 '\\' => {
462 body.push(ch);
463 escaped = true;
464 }
465 c if c == closing => {
466 end_pos = i + ch.len_utf8();
467 break;
468 }
469 _ => body.push(ch),
470 }
471 }
472
473 (body, &text[end_pos..])
474}
475
476fn scan_inner_string(
490 text: &str,
491 pos: usize,
492 quote: char,
493 delimiter: char,
494) -> Option<(usize, bool)> {
495 let start = pos + quote.len_utf8();
496 let rest = text.get(start..)?;
497 let mut escaped = false;
498 let mut contains_delim = false;
499 let mut end_of_string = None;
500 let mut local_pos = start;
501 for ch in rest.chars() {
502 if escaped {
503 escaped = false;
504 local_pos += ch.len_utf8();
505 continue;
506 }
507 if ch == '\\' {
508 escaped = true;
509 local_pos += ch.len_utf8();
510 continue;
511 }
512 if ch == '\n' {
514 return None;
515 }
516 if ch == delimiter {
517 contains_delim = true;
518 }
519 if ch == quote {
520 end_of_string = Some(local_pos + ch.len_utf8());
521 break;
522 }
523 local_pos += ch.len_utf8();
524 }
525 end_of_string.map(|end| (end, contains_delim))
526}
527
528fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
538 let mut body = String::new();
539 let mut end_pos = text.len();
540 let mut found_closing = false;
541 let mut pos = 0usize;
542 let mut escaped = false;
543
544 while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
545 if escaped {
546 body.push(ch);
547 escaped = false;
548 pos += ch.len_utf8();
549 continue;
550 }
551
552 match ch {
553 '\\' => {
554 body.push(ch);
555 escaped = true;
556 pos += ch.len_utf8();
557 }
558 '"' | '\'' if ch != closing => {
564 let quote = ch;
565 match scan_inner_string(text, pos, quote, closing) {
566 Some((string_end, true)) => {
567 let string_text = &text[pos..string_end];
569 body.push_str(string_text);
570 pos = string_end;
571 }
572 _ => {
573 body.push(ch);
576 pos += ch.len_utf8();
577 }
578 }
579 }
580 c if c == closing => {
581 end_pos = pos + ch.len_utf8();
582 found_closing = true;
583 break;
584 }
585 _ => {
586 body.push(ch);
587 pos += ch.len_utf8();
588 }
589 }
590 }
591
592 (body, &text[end_pos..], found_closing)
593}
594
595fn extract_substitution_pattern_with_replacement_hint(
596 text: &str,
597 open: char,
598 close: char,
599) -> (String, &str, bool) {
600 let mut chars = text.char_indices();
601
602 if let Some((_, c)) = chars.next() {
604 if c != open {
605 return (String::new(), text, false);
606 }
607 } else {
608 return (String::new(), "", false);
609 }
610
611 let mut body = String::new();
612 let mut depth = 1usize;
613 let mut escaped = false;
614 let mut first_close_pos: Option<usize> = None;
615 let mut first_body_len: usize = 0;
616
617 for (i, ch) in chars {
618 if escaped {
619 body.push(ch);
620 escaped = false;
621 continue;
622 }
623
624 match ch {
625 '\\' => {
626 body.push(ch);
627 escaped = true;
628 }
629 c if c == open => {
630 body.push(ch);
631 depth += 1;
632 }
633 c if c == close => {
634 if depth > 1 {
635 depth -= 1;
636 body.push(ch);
637 continue;
638 }
639
640 let rest = &text[i + ch.len_utf8()..];
641 if first_close_pos.is_none() {
642 first_close_pos = Some(i + ch.len_utf8());
643 first_body_len = body.len();
644 }
645
646 if starts_with_paired_delimiter(rest).is_some() {
647 return (body, rest, true);
648 }
649
650 body.push(ch);
651 }
652 _ => body.push(ch),
653 }
654 }
655
656 if let Some(pos) = first_close_pos {
657 body.truncate(first_body_len);
658 return (body, &text[pos..], true);
659 }
660
661 (body, "", false)
662}
663
664fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
665 let mut escaped = false;
666
667 for (idx, ch) in pattern.char_indices() {
668 if escaped {
669 escaped = false;
670 continue;
671 }
672
673 if ch == '\\' {
674 escaped = true;
675 continue;
676 }
677
678 if is_paired_open(ch) {
679 let closing = get_closing_delimiter(ch);
680 let (replacement, rest, found_closing) =
681 extract_delimited_content_strict(&pattern[idx..], ch, closing);
682 if found_closing {
683 let leading = pattern[..idx].to_string();
684 return Some((leading, replacement, rest.to_string()));
685 }
686 }
687 }
688
689 None
690}
691
692fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
693 let mut escaped = false;
694 let mut candidates = Vec::new();
695
696 for (idx, ch) in text.char_indices() {
697 if escaped {
698 escaped = false;
699 continue;
700 }
701
702 if ch == '\\' {
703 escaped = true;
704 continue;
705 }
706
707 if is_paired_open(ch) {
708 candidates.push((idx, ch));
709 }
710 }
711
712 for (idx, ch) in candidates.into_iter().rev() {
713 let closing = get_closing_delimiter(ch);
714 let (replacement, rest, found_closing) =
715 extract_delimited_content_strict(&text[idx..], ch, closing);
716 if found_closing {
717 let leading = text[..idx].to_string();
718 return Some((leading, replacement, rest.to_string()));
719 }
720 }
721
722 None
723}
724
725fn extract_substitution_modifiers(text: &str) -> String {
735 text.chars()
736 .take_while(|c| c.is_ascii_alphabetic())
737 .filter(|&c| {
738 matches!(
739 c,
740 'g' | 'i'
741 | 'm'
742 | 's'
743 | 'x'
744 | 'o'
745 | 'e'
746 | 'r'
747 | 'a'
748 | 'd'
749 | 'l'
750 | 'u'
751 | 'n'
752 | 'p'
753 | 'c'
754 )
755 })
756 .collect()
757}
758
759pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
783 let mut valid_modifiers = String::new();
784
785 for c in modifiers_str.chars() {
786 if !c.is_ascii_alphabetic() {
788 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
790 break;
791 }
792 return Err(c);
794 }
795
796 if matches!(
798 c,
799 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
800 ) {
801 valid_modifiers.push(c);
802 } else {
803 return Err(c);
805 }
806 }
807
808 Ok(valid_modifiers)
809}