1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if let Some(stripped) = strip_match_prefix(text) {
14 stripped
15 } else {
16 text
17 };
18
19 let delimiter = match content.chars().next() {
21 Some(d) => d,
22 None => return (String::new(), String::new(), String::new()),
23 };
24 let closing = get_closing_delimiter(delimiter);
25
26 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
28
29 let pattern = format!("{}{}{}", delimiter, body, closing);
31
32 (pattern, body, modifiers.to_string())
33}
34
35fn strip_match_prefix(text: &str) -> Option<&str> {
36 let stripped = text.strip_prefix('m')?;
37 let delimiter = stripped.chars().next()?;
38 (!delimiter.is_alphabetic()).then_some(stripped)
39}
40
41#[derive(Debug, Clone, PartialEq)]
43pub enum SubstitutionError {
44 InvalidModifier(char),
46 MissingDelimiter,
48 MissingPattern,
50 MissingReplacement,
52 MissingClosingDelimiter,
54}
55
56#[derive(Debug, Clone, PartialEq)]
58pub enum TransliterationError {
59 InvalidModifier(char),
61 InvalidDelimiter(char),
63 MissingDelimiter,
65 MissingSearch,
67 MissingReplacement,
69 MissingClosingDelimiter,
71}
72
73pub fn extract_substitution_parts_strict(
88 text: &str,
89) -> Result<(String, String, String), SubstitutionError> {
90 let after_s = text.strip_prefix('s').unwrap_or(text);
92 let content = after_s.trim_start();
94
95 let delimiter = match content.chars().next() {
97 Some(d) => d,
98 None => return Err(SubstitutionError::MissingDelimiter),
99 };
100 let closing = get_closing_delimiter(delimiter);
101 let is_paired = delimiter != closing;
102
103 let (pattern, rest1, pattern_closed) =
105 extract_delimited_content_strict(content, delimiter, closing);
106
107 if !is_paired && !pattern_closed {
109 return Err(SubstitutionError::MissingClosingDelimiter);
110 }
111
112 if is_paired && !pattern_closed {
114 return Err(SubstitutionError::MissingClosingDelimiter);
115 }
116
117 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
121 if rest1.is_empty() {
123 return Err(SubstitutionError::MissingReplacement);
124 }
125
126 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
129 (body, rest, found_closing)
130 } else {
131 let trimmed = skip_paired_replacement_gap(rest1);
134 if let Some(rd) = trimmed.chars().next() {
135 let repl_closing = get_closing_delimiter(rd);
136 extract_delimited_content_strict(trimmed, rd, repl_closing)
137 } else {
138 return Err(SubstitutionError::MissingReplacement);
140 }
141 };
142
143 if !is_paired && !replacement_closed {
145 return Err(SubstitutionError::MissingClosingDelimiter);
146 }
147
148 if is_paired && !replacement_closed {
150 return Err(SubstitutionError::MissingClosingDelimiter);
151 }
152
153 let modifiers = validate_substitution_modifiers(modifiers_str)
155 .map_err(SubstitutionError::InvalidModifier)?;
156
157 Ok((pattern, replacement, modifiers))
158}
159
160fn skip_paired_replacement_gap(mut text: &str) -> &str {
161 let mut comment_eligible = false;
162 loop {
163 let trimmed = text.trim_start_matches(char::is_whitespace);
164 let saw_whitespace = trimmed.len() != text.len();
165 text = trimmed;
166 comment_eligible |= saw_whitespace;
167
168 if comment_eligible && text.starts_with('#') {
169 text = after_line_comment(text);
170 comment_eligible = true;
171 continue;
172 }
173
174 return text;
175 }
176}
177
178fn after_line_comment(text: &str) -> &str {
179 for (idx, ch) in text.char_indices() {
180 if matches!(ch, '\n' | '\r') {
181 return &text[idx + ch.len_utf8()..];
182 }
183 }
184 ""
185}
186
187fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
190 let mut chars = text.char_indices();
191 let is_paired = open != close;
192
193 if let Some((_, c)) = chars.next() {
195 if c != open {
196 return (String::new(), text, false);
197 }
198 } else {
199 return (String::new(), "", false);
200 }
201
202 let mut body = String::new();
203 let mut depth = if is_paired { 1 } else { 0 };
204 let mut escaped = false;
205 let mut end_pos = text.len();
206 let mut found_closing = false;
207
208 for (i, ch) in chars {
209 if escaped {
210 body.push(ch);
211 escaped = false;
212 continue;
213 }
214
215 match ch {
216 '\\' => {
217 body.push(ch);
218 escaped = true;
219 }
220 c if c == open && is_paired => {
221 body.push(ch);
222 depth += 1;
223 }
224 c if c == close => {
225 if is_paired {
226 depth -= 1;
227 if depth == 0 {
228 end_pos = i + ch.len_utf8();
229 found_closing = true;
230 break;
231 }
232 body.push(ch);
233 } else {
234 end_pos = i + ch.len_utf8();
235 found_closing = true;
236 break;
237 }
238 }
239 _ => body.push(ch),
240 }
241 }
242
243 (body, &text[end_pos..], found_closing)
244}
245
246pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
259 let content = text.strip_prefix('s').unwrap_or(text);
261
262 let delimiter = match content.chars().next() {
264 Some(d) => d,
265 None => return (String::new(), String::new(), String::new()),
266 };
267 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
268 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
269 {
270 let modifiers = extract_substitution_modifiers(&modifiers_str);
271 return (pattern, replacement, modifiers);
272 }
273
274 return (String::new(), String::new(), String::new());
275 }
276 let closing = get_closing_delimiter(delimiter);
277 let is_paired = delimiter != closing;
278
279 let (mut pattern, rest1, pattern_closed) = if is_paired {
281 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
282 } else {
283 extract_delimited_content_strict(content, delimiter, closing)
284 };
285
286 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
290 let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
293 (body, Cow::Borrowed(rest))
294 } else if !is_paired && !pattern_closed {
295 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
296 split_unclosed_substitution_pattern(&pattern)
297 {
298 pattern = fallback_pattern;
299 (fallback_replacement, Cow::Owned(fallback_modifiers))
300 } else {
301 (String::new(), Cow::Borrowed(rest1))
302 }
303 } else if is_paired {
304 let trimmed = rest1.trim_start();
305 if let Some(rd) = trimmed.chars().next() {
306 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
307 (String::new(), Cow::Borrowed(trimmed))
308 } else {
309 let repl_closing = get_closing_delimiter(rd);
310 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
311 (body, Cow::Borrowed(rest))
312 }
313 } else {
314 (String::new(), Cow::Borrowed(trimmed))
315 }
316 } else {
317 (String::new(), Cow::Borrowed(rest1))
318 };
319
320 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
322
323 (pattern, replacement, modifiers)
324}
325
326pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
328 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
330 stripped
331 } else if let Some(stripped) = text.strip_prefix('y') {
332 stripped
333 } else {
334 text
335 };
336 let content = after_op.trim_start();
337
338 let delimiter = match content.chars().next() {
340 Some(d) => d,
341 None => return (String::new(), String::new(), String::new()),
342 };
343 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
344 return (String::new(), String::new(), String::new());
345 }
346 let closing = get_closing_delimiter(delimiter);
347 let is_paired = delimiter != closing;
348
349 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
351
352 let rest2_owned;
355 let rest2 = if is_paired {
356 rest1.trim_start()
357 } else {
358 rest2_owned = format!("{}{}", delimiter, rest1);
359 &rest2_owned
360 };
361
362 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
364 let chars = rest1.char_indices();
366 let mut body = String::new();
367 let mut escaped = false;
368 let mut end_pos = rest1.len();
369
370 for (i, ch) in chars {
371 if escaped {
372 body.push(ch);
373 escaped = false;
374 continue;
375 }
376
377 match ch {
378 '\\' => {
379 body.push(ch);
380 escaped = true;
381 }
382 c if c == closing => {
383 end_pos = i + ch.len_utf8();
384 break;
385 }
386 _ => body.push(ch),
387 }
388 }
389
390 (body, &rest1[end_pos..])
391 } else if is_paired {
392 if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
393 let repl_closing = get_closing_delimiter(repl_delimiter);
394 extract_delimited_content(rest2, repl_delimiter, repl_closing)
395 } else if let Some(repl_delimiter) = rest2.chars().next() {
396 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
397 (String::new(), rest2)
398 } else {
399 extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
400 }
401 } else {
402 (String::new(), rest2)
403 }
404 } else {
405 (String::new(), rest1)
406 };
407
408 let modifiers = modifiers_str
411 .chars()
412 .take_while(|c| c.is_ascii_alphabetic())
413 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
414 .collect();
415
416 (search, replacement, modifiers)
417}
418
419pub fn extract_transliteration_parts_strict(
429 text: &str,
430) -> Result<(String, String, String), TransliterationError> {
431 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
433 stripped
434 } else if let Some(stripped) = text.strip_prefix('y') {
435 stripped
436 } else {
437 text
438 };
439 let content = after_op.trim_start();
440
441 let delimiter = match content.chars().next() {
443 Some(d) => d,
444 None => return Err(TransliterationError::MissingDelimiter),
445 };
446 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
447 return Err(TransliterationError::InvalidDelimiter(delimiter));
448 }
449 let closing = get_closing_delimiter(delimiter);
450 let is_paired = delimiter != closing;
451
452 let (search, rest1, search_closed) =
454 extract_delimited_content_strict(content, delimiter, closing);
455 if !search_closed {
456 return Err(TransliterationError::MissingClosingDelimiter);
457 }
458
459 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
461 if rest1.is_empty() {
462 return Err(TransliterationError::MissingReplacement);
463 }
464 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
465 (body, rest, found_closing)
466 } else {
467 let trimmed = skip_paired_replacement_gap(rest1);
468 if let Some(repl_delimiter) = trimmed.chars().next() {
469 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
474 return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
475 }
476 let repl_closing = get_closing_delimiter(repl_delimiter);
477 let (body, rest, found_closing) =
478 extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
479 (body, rest, found_closing)
480 } else {
481 return Err(TransliterationError::MissingReplacement);
482 }
483 };
484
485 if !replacement_closed {
486 return Err(TransliterationError::MissingClosingDelimiter);
487 }
488
489 if search.is_empty() {
490 return Err(TransliterationError::MissingSearch);
491 }
492
493 let mut modifiers = String::new();
495 for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
496 if matches!(modifier, 'c' | 'd' | 's' | 'r') {
497 modifiers.push(modifier);
498 } else {
499 return Err(TransliterationError::InvalidModifier(modifier));
500 }
501 }
502
503 Ok((search, replacement, modifiers))
504}
505
506fn get_closing_delimiter(open: char) -> char {
508 match open {
509 '(' => ')',
510 '[' => ']',
511 '{' => '}',
512 '<' => '>',
513 _ => open,
514 }
515}
516
517fn is_paired_open(ch: char) -> bool {
518 matches!(ch, '{' | '[' | '(' | '<')
519}
520
521fn starts_with_paired_delimiter(text: &str) -> Option<char> {
522 let trimmed = text.trim_start();
523 match trimmed.chars().next() {
524 Some(ch) if is_paired_open(ch) => Some(ch),
525 _ => None,
526 }
527}
528
529fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
531 let mut chars = text.char_indices();
532 let is_paired = open != close;
533
534 if let Some((_, c)) = chars.next() {
536 if c != open {
537 return (String::new(), text);
538 }
539 } else {
540 return (String::new(), "");
541 }
542
543 let mut body = String::new();
544 let mut depth = if is_paired { 1 } else { 0 };
545 let mut escaped = false;
546 let mut end_pos = text.len();
547
548 for (i, ch) in chars {
549 if escaped {
550 body.push(ch);
551 escaped = false;
552 continue;
553 }
554
555 match ch {
556 '\\' => {
557 body.push(ch);
558 escaped = true;
559 }
560 c if c == open && is_paired => {
561 body.push(ch);
562 depth += 1;
563 }
564 c if c == close => {
565 if is_paired {
566 depth -= 1;
567 if depth == 0 {
568 end_pos = i + ch.len_utf8();
569 break;
570 }
571 body.push(ch);
572 } else {
573 end_pos = i + ch.len_utf8();
574 break;
575 }
576 }
577 _ => body.push(ch),
578 }
579 }
580
581 (body, &text[end_pos..])
582}
583
584fn scan_inner_string(
598 text: &str,
599 pos: usize,
600 quote: char,
601 delimiter: char,
602) -> Option<(usize, bool)> {
603 if is_word_apostrophe(text, pos, quote) {
604 return None;
605 }
606 if text.get(..pos).and_then(|prefix| prefix.chars().next_back()) == Some(quote) {
609 return None;
610 }
611 let start = pos + quote.len_utf8();
612 let rest = text.get(start..)?;
613 if rest.starts_with(quote) {
614 return None;
615 }
616 let mut escaped = false;
617 let mut contains_delim = false;
618 let mut end_of_string = None;
619 let mut local_pos = start;
620 for ch in rest.chars() {
621 if escaped {
622 escaped = false;
623 local_pos += ch.len_utf8();
624 continue;
625 }
626 if ch == '\\' {
627 escaped = true;
628 local_pos += ch.len_utf8();
629 continue;
630 }
631 if ch == '\n' {
633 return None;
634 }
635 if ch == delimiter {
636 contains_delim = true;
637 }
638 if ch == quote {
639 end_of_string = Some(local_pos + ch.len_utf8());
640 break;
641 }
642 local_pos += ch.len_utf8();
643 }
644 end_of_string.map(|end| (end, contains_delim))
645}
646
647fn is_word_apostrophe(text: &str, pos: usize, quote: char) -> bool {
648 quote == '\''
649 && text
650 .get(..pos)
651 .and_then(|prefix| prefix.chars().next_back())
652 .is_some_and(|ch| ch.is_ascii_alphanumeric() || ch == '_')
653}
654
655fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
665 let mut body = String::new();
666 let mut end_pos = text.len();
667 let mut found_closing = false;
668 let mut pos = 0usize;
669 let mut escaped = false;
670
671 while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
672 if escaped {
673 body.push(ch);
674 escaped = false;
675 pos += ch.len_utf8();
676 continue;
677 }
678
679 match ch {
680 '\\' => {
681 body.push(ch);
682 escaped = true;
683 pos += ch.len_utf8();
684 }
685 '"' | '\'' if ch != closing => {
691 let quote = ch;
692 match scan_inner_string(text, pos, quote, closing) {
693 Some((string_end, true)) => {
694 let string_text = &text[pos..string_end];
696 body.push_str(string_text);
697 pos = string_end;
698 }
699 _ => {
700 body.push(ch);
703 pos += ch.len_utf8();
704 }
705 }
706 }
707 c if c == closing => {
708 end_pos = pos + ch.len_utf8();
709 found_closing = true;
710 break;
711 }
712 _ => {
713 body.push(ch);
714 pos += ch.len_utf8();
715 }
716 }
717 }
718
719 (body, &text[end_pos..], found_closing)
720}
721
722fn extract_substitution_pattern_with_replacement_hint(
723 text: &str,
724 open: char,
725 close: char,
726) -> (String, &str, bool) {
727 let mut chars = text.char_indices();
728
729 if let Some((_, c)) = chars.next() {
731 if c != open {
732 return (String::new(), text, false);
733 }
734 } else {
735 return (String::new(), "", false);
736 }
737
738 let mut body = String::new();
739 let mut depth = 1usize;
740 let mut escaped = false;
741 let mut first_close_pos: Option<usize> = None;
742 let mut first_body_len: usize = 0;
743
744 for (i, ch) in chars {
745 if escaped {
746 body.push(ch);
747 escaped = false;
748 continue;
749 }
750
751 match ch {
752 '\\' => {
753 body.push(ch);
754 escaped = true;
755 }
756 c if c == open => {
757 body.push(ch);
758 depth += 1;
759 }
760 c if c == close => {
761 if depth > 1 {
762 depth -= 1;
763 body.push(ch);
764 continue;
765 }
766
767 let rest = &text[i + ch.len_utf8()..];
768 if first_close_pos.is_none() {
769 first_close_pos = Some(i + ch.len_utf8());
770 first_body_len = body.len();
771 }
772
773 if starts_with_paired_delimiter(rest).is_some() {
774 return (body, rest, true);
775 }
776
777 body.push(ch);
778 }
779 _ => body.push(ch),
780 }
781 }
782
783 if let Some(pos) = first_close_pos {
784 body.truncate(first_body_len);
785 return (body, &text[pos..], true);
786 }
787
788 (body, "", false)
789}
790
791fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
792 let mut escaped = false;
793
794 for (idx, ch) in pattern.char_indices() {
795 if escaped {
796 escaped = false;
797 continue;
798 }
799
800 if ch == '\\' {
801 escaped = true;
802 continue;
803 }
804
805 if is_paired_open(ch) {
806 let closing = get_closing_delimiter(ch);
807 let (replacement, rest, found_closing) =
808 extract_delimited_content_strict(&pattern[idx..], ch, closing);
809 if found_closing {
810 let leading = pattern[..idx].to_string();
811 return Some((leading, replacement, rest.to_string()));
812 }
813 }
814 }
815
816 None
817}
818
819fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
820 let mut escaped = false;
821 let mut candidates = Vec::new();
822
823 for (idx, ch) in text.char_indices() {
824 if escaped {
825 escaped = false;
826 continue;
827 }
828
829 if ch == '\\' {
830 escaped = true;
831 continue;
832 }
833
834 if is_paired_open(ch) {
835 candidates.push((idx, ch));
836 }
837 }
838
839 for (idx, ch) in candidates.into_iter().rev() {
840 let closing = get_closing_delimiter(ch);
841 let (replacement, rest, found_closing) =
842 extract_delimited_content_strict(&text[idx..], ch, closing);
843 if found_closing {
844 let leading = text[..idx].to_string();
845 return Some((leading, replacement, rest.to_string()));
846 }
847 }
848
849 None
850}
851
852fn extract_substitution_modifiers(text: &str) -> String {
862 text.chars()
863 .take_while(|c| c.is_ascii_alphabetic())
864 .filter(|&c| {
865 matches!(
866 c,
867 'g' | 'i'
868 | 'm'
869 | 's'
870 | 'x'
871 | 'o'
872 | 'e'
873 | 'r'
874 | 'a'
875 | 'd'
876 | 'l'
877 | 'u'
878 | 'n'
879 | 'p'
880 | 'c'
881 )
882 })
883 .collect()
884}
885
886pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
910 let mut valid_modifiers = String::new();
911
912 for c in modifiers_str.chars() {
913 if !c.is_ascii_alphabetic() {
915 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
917 break;
918 }
919 return Err(c);
921 }
922
923 if matches!(
925 c,
926 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
927 ) {
928 valid_modifiers.push(c);
929 } else {
930 return Err(c);
932 }
933 }
934
935 Ok(valid_modifiers)
936}