1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if text.starts_with('m')
14 && text.len() > 1
15 && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16 {
17 &text[1..]
18 } else {
19 text
20 };
21
22 let delimiter = match content.chars().next() {
24 Some(d) => d,
25 None => return (String::new(), String::new(), String::new()),
26 };
27 let closing = get_closing_delimiter(delimiter);
28
29 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32 let pattern = format!("{}{}{}", delimiter, body, closing);
34
35 (pattern, body, modifiers.to_string())
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41 InvalidModifier(char),
43 MissingDelimiter,
45 MissingPattern,
47 MissingReplacement,
49 MissingClosingDelimiter,
51}
52
53#[derive(Debug, Clone, PartialEq)]
55pub enum TransliterationError {
56 InvalidModifier(char),
58 InvalidDelimiter(char),
60 MissingDelimiter,
62 MissingSearch,
64 MissingReplacement,
66 MissingClosingDelimiter,
68}
69
70pub fn extract_substitution_parts_strict(
85 text: &str,
86) -> Result<(String, String, String), SubstitutionError> {
87 let after_s = text.strip_prefix('s').unwrap_or(text);
89 let content = after_s.trim_start();
91
92 let delimiter = match content.chars().next() {
94 Some(d) => d,
95 None => return Err(SubstitutionError::MissingDelimiter),
96 };
97 let closing = get_closing_delimiter(delimiter);
98 let is_paired = delimiter != closing;
99
100 let (pattern, rest1, pattern_closed) =
102 extract_delimited_content_strict(content, delimiter, closing);
103
104 if !is_paired && !pattern_closed {
106 return Err(SubstitutionError::MissingClosingDelimiter);
107 }
108
109 if is_paired && !pattern_closed {
111 return Err(SubstitutionError::MissingClosingDelimiter);
112 }
113
114 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
118 if rest1.is_empty() {
120 return Err(SubstitutionError::MissingReplacement);
121 }
122
123 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
126 (body, rest, found_closing)
127 } else {
128 let trimmed = rest1.trim_start();
131 if let Some(rd) = trimmed.chars().next() {
132 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
133 return Err(SubstitutionError::MissingReplacement);
134 }
135 let repl_closing = get_closing_delimiter(rd);
136 extract_delimited_content_strict(trimmed, rd, repl_closing)
137 } else {
138 return Err(SubstitutionError::MissingReplacement);
140 }
141 };
142
143 if !is_paired && !replacement_closed {
145 return Err(SubstitutionError::MissingClosingDelimiter);
146 }
147
148 if is_paired && !replacement_closed {
150 return Err(SubstitutionError::MissingClosingDelimiter);
151 }
152
153 let modifiers = validate_substitution_modifiers(modifiers_str)
155 .map_err(SubstitutionError::InvalidModifier)?;
156
157 Ok((pattern, replacement, modifiers))
158}
159
160fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
163 let mut chars = text.char_indices();
164 let is_paired = open != close;
165
166 if let Some((_, c)) = chars.next() {
168 if c != open {
169 return (String::new(), text, false);
170 }
171 } else {
172 return (String::new(), "", false);
173 }
174
175 let mut body = String::new();
176 let mut depth = if is_paired { 1 } else { 0 };
177 let mut escaped = false;
178 let mut end_pos = text.len();
179 let mut found_closing = false;
180
181 for (i, ch) in chars {
182 if escaped {
183 body.push(ch);
184 escaped = false;
185 continue;
186 }
187
188 match ch {
189 '\\' => {
190 body.push(ch);
191 escaped = true;
192 }
193 c if c == open && is_paired => {
194 body.push(ch);
195 depth += 1;
196 }
197 c if c == close => {
198 if is_paired {
199 depth -= 1;
200 if depth == 0 {
201 end_pos = i + ch.len_utf8();
202 found_closing = true;
203 break;
204 }
205 body.push(ch);
206 } else {
207 end_pos = i + ch.len_utf8();
208 found_closing = true;
209 break;
210 }
211 }
212 _ => body.push(ch),
213 }
214 }
215
216 (body, &text[end_pos..], found_closing)
217}
218
219pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
232 let content = text.strip_prefix('s').unwrap_or(text);
234
235 let delimiter = match content.chars().next() {
237 Some(d) => d,
238 None => return (String::new(), String::new(), String::new()),
239 };
240 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
241 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
242 {
243 let modifiers = extract_substitution_modifiers(&modifiers_str);
244 return (pattern, replacement, modifiers);
245 }
246
247 return (String::new(), String::new(), String::new());
248 }
249 let closing = get_closing_delimiter(delimiter);
250 let is_paired = delimiter != closing;
251
252 let (mut pattern, rest1, pattern_closed) = if is_paired {
254 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
255 } else {
256 extract_delimited_content_strict(content, delimiter, closing)
257 };
258
259 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
263 let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
266 (body, Cow::Borrowed(rest))
267 } else if !is_paired && !pattern_closed {
268 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
269 split_unclosed_substitution_pattern(&pattern)
270 {
271 pattern = fallback_pattern;
272 (fallback_replacement, Cow::Owned(fallback_modifiers))
273 } else {
274 (String::new(), Cow::Borrowed(rest1))
275 }
276 } else if is_paired {
277 let trimmed = rest1.trim_start();
278 if let Some(rd) = trimmed.chars().next() {
279 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
280 (String::new(), Cow::Borrowed(trimmed))
281 } else {
282 let repl_closing = get_closing_delimiter(rd);
283 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
284 (body, Cow::Borrowed(rest))
285 }
286 } else {
287 (String::new(), Cow::Borrowed(trimmed))
288 }
289 } else {
290 (String::new(), Cow::Borrowed(rest1))
291 };
292
293 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
295
296 (pattern, replacement, modifiers)
297}
298
299pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
301 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
303 stripped
304 } else if let Some(stripped) = text.strip_prefix('y') {
305 stripped
306 } else {
307 text
308 };
309 let content = after_op.trim_start();
310
311 let delimiter = match content.chars().next() {
313 Some(d) => d,
314 None => return (String::new(), String::new(), String::new()),
315 };
316 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
317 return (String::new(), String::new(), String::new());
318 }
319 let closing = get_closing_delimiter(delimiter);
320 let is_paired = delimiter != closing;
321
322 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
324
325 let rest2_owned;
328 let rest2 = if is_paired {
329 rest1.trim_start()
330 } else {
331 rest2_owned = format!("{}{}", delimiter, rest1);
332 &rest2_owned
333 };
334
335 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
337 let chars = rest1.char_indices();
339 let mut body = String::new();
340 let mut escaped = false;
341 let mut end_pos = rest1.len();
342
343 for (i, ch) in chars {
344 if escaped {
345 body.push(ch);
346 escaped = false;
347 continue;
348 }
349
350 match ch {
351 '\\' => {
352 body.push(ch);
353 escaped = true;
354 }
355 c if c == closing => {
356 end_pos = i + ch.len_utf8();
357 break;
358 }
359 _ => body.push(ch),
360 }
361 }
362
363 (body, &rest1[end_pos..])
364 } else if is_paired {
365 if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
366 let repl_closing = get_closing_delimiter(repl_delimiter);
367 extract_delimited_content(rest2, repl_delimiter, repl_closing)
368 } else if let Some(repl_delimiter) = rest2.chars().next() {
369 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
370 (String::new(), rest2)
371 } else {
372 extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
373 }
374 } else {
375 (String::new(), rest2)
376 }
377 } else {
378 (String::new(), rest1)
379 };
380
381 let modifiers = modifiers_str
384 .chars()
385 .take_while(|c| c.is_ascii_alphabetic())
386 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
387 .collect();
388
389 (search, replacement, modifiers)
390}
391
392pub fn extract_transliteration_parts_strict(
402 text: &str,
403) -> Result<(String, String, String), TransliterationError> {
404 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
406 stripped
407 } else if let Some(stripped) = text.strip_prefix('y') {
408 stripped
409 } else {
410 text
411 };
412 let content = after_op.trim_start();
413
414 let delimiter = match content.chars().next() {
416 Some(d) => d,
417 None => return Err(TransliterationError::MissingDelimiter),
418 };
419 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
420 return Err(TransliterationError::InvalidDelimiter(delimiter));
421 }
422 let closing = get_closing_delimiter(delimiter);
423 let is_paired = delimiter != closing;
424
425 let (search, rest1, search_closed) =
427 extract_delimited_content_strict(content, delimiter, closing);
428 if !search_closed {
429 return Err(TransliterationError::MissingClosingDelimiter);
430 }
431
432 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
434 if rest1.is_empty() {
435 return Err(TransliterationError::MissingReplacement);
436 }
437 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
438 (body, rest, found_closing)
439 } else {
440 let trimmed = rest1.trim_start();
441 if let Some(repl_delimiter) = trimmed.chars().next() {
442 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
447 return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
448 }
449 let repl_closing = get_closing_delimiter(repl_delimiter);
450 let (body, rest, found_closing) =
451 extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
452 (body, rest, found_closing)
453 } else {
454 return Err(TransliterationError::MissingReplacement);
455 }
456 };
457
458 if !replacement_closed {
459 return Err(TransliterationError::MissingClosingDelimiter);
460 }
461
462 if search.is_empty() {
463 return Err(TransliterationError::MissingSearch);
464 }
465
466 let mut modifiers = String::new();
468 for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
469 if matches!(modifier, 'c' | 'd' | 's' | 'r') {
470 modifiers.push(modifier);
471 } else {
472 return Err(TransliterationError::InvalidModifier(modifier));
473 }
474 }
475
476 Ok((search, replacement, modifiers))
477}
478
479fn get_closing_delimiter(open: char) -> char {
481 match open {
482 '(' => ')',
483 '[' => ']',
484 '{' => '}',
485 '<' => '>',
486 _ => open,
487 }
488}
489
490fn is_paired_open(ch: char) -> bool {
491 matches!(ch, '{' | '[' | '(' | '<')
492}
493
494fn starts_with_paired_delimiter(text: &str) -> Option<char> {
495 let trimmed = text.trim_start();
496 match trimmed.chars().next() {
497 Some(ch) if is_paired_open(ch) => Some(ch),
498 _ => None,
499 }
500}
501
502fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
504 let mut chars = text.char_indices();
505 let is_paired = open != close;
506
507 if let Some((_, c)) = chars.next() {
509 if c != open {
510 return (String::new(), text);
511 }
512 } else {
513 return (String::new(), "");
514 }
515
516 let mut body = String::new();
517 let mut depth = if is_paired { 1 } else { 0 };
518 let mut escaped = false;
519 let mut end_pos = text.len();
520
521 for (i, ch) in chars {
522 if escaped {
523 body.push(ch);
524 escaped = false;
525 continue;
526 }
527
528 match ch {
529 '\\' => {
530 body.push(ch);
531 escaped = true;
532 }
533 c if c == open && is_paired => {
534 body.push(ch);
535 depth += 1;
536 }
537 c if c == close => {
538 if is_paired {
539 depth -= 1;
540 if depth == 0 {
541 end_pos = i + ch.len_utf8();
542 break;
543 }
544 body.push(ch);
545 } else {
546 end_pos = i + ch.len_utf8();
547 break;
548 }
549 }
550 _ => body.push(ch),
551 }
552 }
553
554 (body, &text[end_pos..])
555}
556
557fn scan_inner_string(
571 text: &str,
572 pos: usize,
573 quote: char,
574 delimiter: char,
575) -> Option<(usize, bool)> {
576 let start = pos + quote.len_utf8();
577 let rest = text.get(start..)?;
578 let mut escaped = false;
579 let mut contains_delim = false;
580 let mut end_of_string = None;
581 let mut local_pos = start;
582 for ch in rest.chars() {
583 if escaped {
584 escaped = false;
585 local_pos += ch.len_utf8();
586 continue;
587 }
588 if ch == '\\' {
589 escaped = true;
590 local_pos += ch.len_utf8();
591 continue;
592 }
593 if ch == '\n' {
595 return None;
596 }
597 if ch == delimiter {
598 contains_delim = true;
599 }
600 if ch == quote {
601 end_of_string = Some(local_pos + ch.len_utf8());
602 break;
603 }
604 local_pos += ch.len_utf8();
605 }
606 end_of_string.map(|end| (end, contains_delim))
607}
608
609fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
619 let mut body = String::new();
620 let mut end_pos = text.len();
621 let mut found_closing = false;
622 let mut pos = 0usize;
623 let mut escaped = false;
624
625 while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
626 if escaped {
627 body.push(ch);
628 escaped = false;
629 pos += ch.len_utf8();
630 continue;
631 }
632
633 match ch {
634 '\\' => {
635 body.push(ch);
636 escaped = true;
637 pos += ch.len_utf8();
638 }
639 '"' | '\'' if ch != closing => {
645 let quote = ch;
646 match scan_inner_string(text, pos, quote, closing) {
647 Some((string_end, true)) => {
648 let string_text = &text[pos..string_end];
650 body.push_str(string_text);
651 pos = string_end;
652 }
653 _ => {
654 body.push(ch);
657 pos += ch.len_utf8();
658 }
659 }
660 }
661 c if c == closing => {
662 end_pos = pos + ch.len_utf8();
663 found_closing = true;
664 break;
665 }
666 _ => {
667 body.push(ch);
668 pos += ch.len_utf8();
669 }
670 }
671 }
672
673 (body, &text[end_pos..], found_closing)
674}
675
676fn extract_substitution_pattern_with_replacement_hint(
677 text: &str,
678 open: char,
679 close: char,
680) -> (String, &str, bool) {
681 let mut chars = text.char_indices();
682
683 if let Some((_, c)) = chars.next() {
685 if c != open {
686 return (String::new(), text, false);
687 }
688 } else {
689 return (String::new(), "", false);
690 }
691
692 let mut body = String::new();
693 let mut depth = 1usize;
694 let mut escaped = false;
695 let mut first_close_pos: Option<usize> = None;
696 let mut first_body_len: usize = 0;
697
698 for (i, ch) in chars {
699 if escaped {
700 body.push(ch);
701 escaped = false;
702 continue;
703 }
704
705 match ch {
706 '\\' => {
707 body.push(ch);
708 escaped = true;
709 }
710 c if c == open => {
711 body.push(ch);
712 depth += 1;
713 }
714 c if c == close => {
715 if depth > 1 {
716 depth -= 1;
717 body.push(ch);
718 continue;
719 }
720
721 let rest = &text[i + ch.len_utf8()..];
722 if first_close_pos.is_none() {
723 first_close_pos = Some(i + ch.len_utf8());
724 first_body_len = body.len();
725 }
726
727 if starts_with_paired_delimiter(rest).is_some() {
728 return (body, rest, true);
729 }
730
731 body.push(ch);
732 }
733 _ => body.push(ch),
734 }
735 }
736
737 if let Some(pos) = first_close_pos {
738 body.truncate(first_body_len);
739 return (body, &text[pos..], true);
740 }
741
742 (body, "", false)
743}
744
745fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
746 let mut escaped = false;
747
748 for (idx, ch) in pattern.char_indices() {
749 if escaped {
750 escaped = false;
751 continue;
752 }
753
754 if ch == '\\' {
755 escaped = true;
756 continue;
757 }
758
759 if is_paired_open(ch) {
760 let closing = get_closing_delimiter(ch);
761 let (replacement, rest, found_closing) =
762 extract_delimited_content_strict(&pattern[idx..], ch, closing);
763 if found_closing {
764 let leading = pattern[..idx].to_string();
765 return Some((leading, replacement, rest.to_string()));
766 }
767 }
768 }
769
770 None
771}
772
773fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
774 let mut escaped = false;
775 let mut candidates = Vec::new();
776
777 for (idx, ch) in text.char_indices() {
778 if escaped {
779 escaped = false;
780 continue;
781 }
782
783 if ch == '\\' {
784 escaped = true;
785 continue;
786 }
787
788 if is_paired_open(ch) {
789 candidates.push((idx, ch));
790 }
791 }
792
793 for (idx, ch) in candidates.into_iter().rev() {
794 let closing = get_closing_delimiter(ch);
795 let (replacement, rest, found_closing) =
796 extract_delimited_content_strict(&text[idx..], ch, closing);
797 if found_closing {
798 let leading = text[..idx].to_string();
799 return Some((leading, replacement, rest.to_string()));
800 }
801 }
802
803 None
804}
805
806fn extract_substitution_modifiers(text: &str) -> String {
816 text.chars()
817 .take_while(|c| c.is_ascii_alphabetic())
818 .filter(|&c| {
819 matches!(
820 c,
821 'g' | 'i'
822 | 'm'
823 | 's'
824 | 'x'
825 | 'o'
826 | 'e'
827 | 'r'
828 | 'a'
829 | 'd'
830 | 'l'
831 | 'u'
832 | 'n'
833 | 'p'
834 | 'c'
835 )
836 })
837 .collect()
838}
839
840pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
864 let mut valid_modifiers = String::new();
865
866 for c in modifiers_str.chars() {
867 if !c.is_ascii_alphabetic() {
869 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
871 break;
872 }
873 return Err(c);
875 }
876
877 if matches!(
879 c,
880 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
881 ) {
882 valid_modifiers.push(c);
883 } else {
884 return Err(c);
886 }
887 }
888
889 Ok(valid_modifiers)
890}