1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if let Some(stripped) = strip_match_prefix(text) {
14 stripped
15 } else {
16 text
17 };
18
19 let delimiter = match content.chars().next() {
21 Some(d) => d,
22 None => return (String::new(), String::new(), String::new()),
23 };
24 let closing = get_closing_delimiter(delimiter);
25
26 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
28
29 let pattern = format!("{}{}{}", delimiter, body, closing);
31
32 (pattern, body, modifiers.to_string())
33}
34
35fn strip_match_prefix(text: &str) -> Option<&str> {
36 let stripped = text.strip_prefix('m')?;
37 let delimiter = stripped.chars().next()?;
38 (!delimiter.is_alphabetic()).then_some(stripped)
39}
40
41#[derive(Debug, Clone, PartialEq)]
43pub enum SubstitutionError {
44 InvalidModifier(char),
46 MissingDelimiter,
48 MissingPattern,
50 MissingReplacement,
52 MissingClosingDelimiter,
54}
55
56#[derive(Debug, Clone, PartialEq)]
58pub enum TransliterationError {
59 InvalidModifier(char),
61 InvalidDelimiter(char),
63 MissingDelimiter,
65 MissingSearch,
67 MissingReplacement,
69 MissingClosingDelimiter,
71}
72
73pub fn extract_substitution_parts_strict(
88 text: &str,
89) -> Result<(String, String, String), SubstitutionError> {
90 let after_s = text.strip_prefix('s').unwrap_or(text);
92 let content = after_s.trim_start();
94
95 let delimiter = match content.chars().next() {
97 Some(d) => d,
98 None => return Err(SubstitutionError::MissingDelimiter),
99 };
100 let closing = get_closing_delimiter(delimiter);
101 let is_paired = delimiter != closing;
102
103 let (pattern, rest1, pattern_closed) =
105 extract_delimited_content_strict(content, delimiter, closing);
106
107 if !is_paired && !pattern_closed {
109 return Err(SubstitutionError::MissingClosingDelimiter);
110 }
111
112 if is_paired && !pattern_closed {
114 return Err(SubstitutionError::MissingClosingDelimiter);
115 }
116
117 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
121 if rest1.is_empty() {
123 return Err(SubstitutionError::MissingReplacement);
124 }
125
126 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
129 (body, rest, found_closing)
130 } else {
131 let trimmed = skip_paired_replacement_gap(rest1);
134 if let Some(rd) = trimmed.chars().next() {
135 let repl_closing = get_closing_delimiter(rd);
136 extract_delimited_content_strict(trimmed, rd, repl_closing)
137 } else {
138 return Err(SubstitutionError::MissingReplacement);
140 }
141 };
142
143 if !is_paired && !replacement_closed {
145 return Err(SubstitutionError::MissingClosingDelimiter);
146 }
147
148 if is_paired && !replacement_closed {
150 return Err(SubstitutionError::MissingClosingDelimiter);
151 }
152
153 let modifiers = validate_substitution_modifiers(modifiers_str)
155 .map_err(SubstitutionError::InvalidModifier)?;
156
157 Ok((pattern, replacement, modifiers))
158}
159
160fn skip_paired_replacement_gap(mut text: &str) -> &str {
161 let mut comment_eligible = false;
162 loop {
163 let trimmed = text.trim_start_matches(char::is_whitespace);
164 let saw_whitespace = trimmed.len() != text.len();
165 text = trimmed;
166 comment_eligible |= saw_whitespace;
167
168 if comment_eligible && text.starts_with('#') {
169 text = after_line_comment(text);
170 comment_eligible = true;
171 continue;
172 }
173
174 return text;
175 }
176}
177
178fn after_line_comment(text: &str) -> &str {
179 for (idx, ch) in text.char_indices() {
180 if matches!(ch, '\n' | '\r') {
181 return &text[idx + ch.len_utf8()..];
182 }
183 }
184 ""
185}
186
187fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
190 let mut chars = text.char_indices();
191 let is_paired = open != close;
192
193 if let Some((_, c)) = chars.next() {
195 if c != open {
196 return (String::new(), text, false);
197 }
198 } else {
199 return (String::new(), "", false);
200 }
201
202 let mut body = String::new();
203 let mut depth = if is_paired { 1 } else { 0 };
204 let mut escaped = false;
205 let mut end_pos = text.len();
206 let mut found_closing = false;
207
208 for (i, ch) in chars {
209 if escaped {
210 body.push(ch);
211 escaped = false;
212 continue;
213 }
214
215 match ch {
216 '\\' => {
217 body.push(ch);
218 escaped = true;
219 }
220 c if c == open && is_paired => {
221 body.push(ch);
222 depth += 1;
223 }
224 c if c == close => {
225 if is_paired {
226 depth -= 1;
227 if depth == 0 {
228 end_pos = i + ch.len_utf8();
229 found_closing = true;
230 break;
231 }
232 body.push(ch);
233 } else {
234 end_pos = i + ch.len_utf8();
235 found_closing = true;
236 break;
237 }
238 }
239 _ => body.push(ch),
240 }
241 }
242
243 (body, &text[end_pos..], found_closing)
244}
245
246pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
259 let content = text.strip_prefix('s').unwrap_or(text);
261
262 let delimiter = match content.chars().next() {
264 Some(d) => d,
265 None => return (String::new(), String::new(), String::new()),
266 };
267 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
268 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
269 {
270 let modifiers = extract_substitution_modifiers(&modifiers_str);
271 return (pattern, replacement, modifiers);
272 }
273
274 return (String::new(), String::new(), String::new());
275 }
276 let closing = get_closing_delimiter(delimiter);
277 let is_paired = delimiter != closing;
278
279 let (mut pattern, rest1, pattern_closed) = if is_paired {
281 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
282 } else {
283 extract_delimited_content_strict(content, delimiter, closing)
284 };
285
286 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
290 let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
293 (body, Cow::Borrowed(rest))
294 } else if !is_paired && !pattern_closed {
295 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
296 split_unclosed_substitution_pattern(&pattern)
297 {
298 pattern = fallback_pattern;
299 (fallback_replacement, Cow::Owned(fallback_modifiers))
300 } else {
301 (String::new(), Cow::Borrowed(rest1))
302 }
303 } else if is_paired {
304 let trimmed = rest1.trim_start();
305 if let Some(rd) = trimmed.chars().next() {
306 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
307 (String::new(), Cow::Borrowed(trimmed))
308 } else {
309 let repl_closing = get_closing_delimiter(rd);
310 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
311 (body, Cow::Borrowed(rest))
312 }
313 } else {
314 (String::new(), Cow::Borrowed(trimmed))
315 }
316 } else {
317 (String::new(), Cow::Borrowed(rest1))
318 };
319
320 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
322
323 (pattern, replacement, modifiers)
324}
325
326pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
328 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
330 stripped
331 } else if let Some(stripped) = text.strip_prefix('y') {
332 stripped
333 } else {
334 text
335 };
336 let content = after_op.trim_start();
337
338 let delimiter = match content.chars().next() {
340 Some(d) => d,
341 None => return (String::new(), String::new(), String::new()),
342 };
343 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
344 return (String::new(), String::new(), String::new());
345 }
346 let closing = get_closing_delimiter(delimiter);
347 let is_paired = delimiter != closing;
348
349 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
351
352 let rest2_owned;
355 let rest2 = if is_paired {
356 rest1.trim_start()
357 } else {
358 rest2_owned = format!("{}{}", delimiter, rest1);
359 &rest2_owned
360 };
361
362 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
364 let chars = rest1.char_indices();
366 let mut body = String::new();
367 let mut escaped = false;
368 let mut end_pos = rest1.len();
369
370 for (i, ch) in chars {
371 if escaped {
372 body.push(ch);
373 escaped = false;
374 continue;
375 }
376
377 match ch {
378 '\\' => {
379 body.push(ch);
380 escaped = true;
381 }
382 c if c == closing => {
383 end_pos = i + ch.len_utf8();
384 break;
385 }
386 _ => body.push(ch),
387 }
388 }
389
390 (body, &rest1[end_pos..])
391 } else if is_paired {
392 if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
393 let repl_closing = get_closing_delimiter(repl_delimiter);
394 extract_delimited_content(rest2, repl_delimiter, repl_closing)
395 } else if let Some(repl_delimiter) = rest2.chars().next() {
396 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
397 (String::new(), rest2)
398 } else {
399 extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
400 }
401 } else {
402 (String::new(), rest2)
403 }
404 } else {
405 (String::new(), rest1)
406 };
407
408 let modifiers = modifiers_str
411 .chars()
412 .take_while(|c| c.is_ascii_alphabetic())
413 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
414 .collect();
415
416 (search, replacement, modifiers)
417}
418
419pub fn extract_transliteration_parts_strict(
429 text: &str,
430) -> Result<(String, String, String), TransliterationError> {
431 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
433 stripped
434 } else if let Some(stripped) = text.strip_prefix('y') {
435 stripped
436 } else {
437 text
438 };
439 let content = after_op.trim_start();
440
441 let delimiter = match content.chars().next() {
443 Some(d) => d,
444 None => return Err(TransliterationError::MissingDelimiter),
445 };
446 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
447 return Err(TransliterationError::InvalidDelimiter(delimiter));
448 }
449 let closing = get_closing_delimiter(delimiter);
450 let is_paired = delimiter != closing;
451
452 let (search, rest1, search_closed) =
454 extract_delimited_content_strict(content, delimiter, closing);
455 if !search_closed {
456 return Err(TransliterationError::MissingClosingDelimiter);
457 }
458
459 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
461 if rest1.is_empty() {
462 return Err(TransliterationError::MissingReplacement);
463 }
464 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
465 (body, rest, found_closing)
466 } else {
467 let trimmed = skip_paired_replacement_gap(rest1);
468 if let Some(repl_delimiter) = trimmed.chars().next() {
469 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
474 return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
475 }
476 let repl_closing = get_closing_delimiter(repl_delimiter);
477 let (body, rest, found_closing) =
478 extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
479 (body, rest, found_closing)
480 } else {
481 return Err(TransliterationError::MissingReplacement);
482 }
483 };
484
485 if !replacement_closed {
486 return Err(TransliterationError::MissingClosingDelimiter);
487 }
488
489 let mut modifiers = String::new();
494 for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
495 if matches!(modifier, 'c' | 'd' | 's' | 'r') {
496 modifiers.push(modifier);
497 } else {
498 return Err(TransliterationError::InvalidModifier(modifier));
499 }
500 }
501
502 Ok((search, replacement, modifiers))
503}
504
505fn get_closing_delimiter(open: char) -> char {
507 match open {
508 '(' => ')',
509 '[' => ']',
510 '{' => '}',
511 '<' => '>',
512 _ => open,
513 }
514}
515
516fn is_paired_open(ch: char) -> bool {
517 matches!(ch, '{' | '[' | '(' | '<')
518}
519
520fn starts_with_paired_delimiter(text: &str) -> Option<char> {
521 let trimmed = text.trim_start();
522 match trimmed.chars().next() {
523 Some(ch) if is_paired_open(ch) => Some(ch),
524 _ => None,
525 }
526}
527
528fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
530 let mut chars = text.char_indices();
531 let is_paired = open != close;
532
533 if let Some((_, c)) = chars.next() {
535 if c != open {
536 return (String::new(), text);
537 }
538 } else {
539 return (String::new(), "");
540 }
541
542 let mut body = String::new();
543 let mut depth = if is_paired { 1 } else { 0 };
544 let mut escaped = false;
545 let mut end_pos = text.len();
546
547 for (i, ch) in chars {
548 if escaped {
549 body.push(ch);
550 escaped = false;
551 continue;
552 }
553
554 match ch {
555 '\\' => {
556 body.push(ch);
557 escaped = true;
558 }
559 c if c == open && is_paired => {
560 body.push(ch);
561 depth += 1;
562 }
563 c if c == close => {
564 if is_paired {
565 depth -= 1;
566 if depth == 0 {
567 end_pos = i + ch.len_utf8();
568 break;
569 }
570 body.push(ch);
571 } else {
572 end_pos = i + ch.len_utf8();
573 break;
574 }
575 }
576 _ => body.push(ch),
577 }
578 }
579
580 (body, &text[end_pos..])
581}
582
583fn scan_inner_string(
597 text: &str,
598 pos: usize,
599 quote: char,
600 delimiter: char,
601) -> Option<(usize, bool)> {
602 if is_word_apostrophe(text, pos, quote) {
603 return None;
604 }
605 if text.get(..pos).and_then(|prefix| prefix.chars().next_back()) == Some(quote) {
608 return None;
609 }
610 let start = pos + quote.len_utf8();
611 let rest = text.get(start..)?;
612 if rest.starts_with(quote) {
613 return None;
614 }
615 let mut escaped = false;
616 let mut contains_delim = false;
617 let mut end_of_string = None;
618 let mut local_pos = start;
619 for ch in rest.chars() {
620 if escaped {
621 escaped = false;
622 local_pos += ch.len_utf8();
623 continue;
624 }
625 if ch == '\\' {
626 escaped = true;
627 local_pos += ch.len_utf8();
628 continue;
629 }
630 if ch == '\n' {
632 return None;
633 }
634 if ch == delimiter {
635 contains_delim = true;
636 }
637 if ch == quote {
638 end_of_string = Some(local_pos + ch.len_utf8());
639 break;
640 }
641 local_pos += ch.len_utf8();
642 }
643 end_of_string.map(|end| (end, contains_delim))
644}
645
646fn is_word_apostrophe(text: &str, pos: usize, quote: char) -> bool {
647 quote == '\''
648 && text
649 .get(..pos)
650 .and_then(|prefix| prefix.chars().next_back())
651 .is_some_and(|ch| ch.is_ascii_alphanumeric() || ch == '_')
652}
653
654fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
664 let mut body = String::new();
665 let mut end_pos = text.len();
666 let mut found_closing = false;
667 let mut pos = 0usize;
668 let mut escaped = false;
669
670 while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
671 if escaped {
672 body.push(ch);
673 escaped = false;
674 pos += ch.len_utf8();
675 continue;
676 }
677
678 match ch {
679 '\\' => {
680 body.push(ch);
681 escaped = true;
682 pos += ch.len_utf8();
683 }
684 '"' | '\'' if ch != closing => {
690 let quote = ch;
691 match scan_inner_string(text, pos, quote, closing) {
692 Some((string_end, true)) => {
693 let string_text = &text[pos..string_end];
695 body.push_str(string_text);
696 pos = string_end;
697 }
698 _ => {
699 body.push(ch);
702 pos += ch.len_utf8();
703 }
704 }
705 }
706 c if c == closing => {
707 end_pos = pos + ch.len_utf8();
708 found_closing = true;
709 break;
710 }
711 _ => {
712 body.push(ch);
713 pos += ch.len_utf8();
714 }
715 }
716 }
717
718 (body, &text[end_pos..], found_closing)
719}
720
721fn extract_substitution_pattern_with_replacement_hint(
722 text: &str,
723 open: char,
724 close: char,
725) -> (String, &str, bool) {
726 let mut chars = text.char_indices();
727
728 if let Some((_, c)) = chars.next() {
730 if c != open {
731 return (String::new(), text, false);
732 }
733 } else {
734 return (String::new(), "", false);
735 }
736
737 let mut body = String::new();
738 let mut depth = 1usize;
739 let mut escaped = false;
740 let mut first_close_pos: Option<usize> = None;
741 let mut first_body_len: usize = 0;
742
743 for (i, ch) in chars {
744 if escaped {
745 body.push(ch);
746 escaped = false;
747 continue;
748 }
749
750 match ch {
751 '\\' => {
752 body.push(ch);
753 escaped = true;
754 }
755 c if c == open => {
756 body.push(ch);
757 depth += 1;
758 }
759 c if c == close => {
760 if depth > 1 {
761 depth -= 1;
762 body.push(ch);
763 continue;
764 }
765
766 let rest = &text[i + ch.len_utf8()..];
767 if first_close_pos.is_none() {
768 first_close_pos = Some(i + ch.len_utf8());
769 first_body_len = body.len();
770 }
771
772 if starts_with_paired_delimiter(rest).is_some() {
773 return (body, rest, true);
774 }
775
776 body.push(ch);
777 }
778 _ => body.push(ch),
779 }
780 }
781
782 if let Some(pos) = first_close_pos {
783 body.truncate(first_body_len);
784 return (body, &text[pos..], true);
785 }
786
787 (body, "", false)
788}
789
790fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
791 let mut escaped = false;
792
793 for (idx, ch) in pattern.char_indices() {
794 if escaped {
795 escaped = false;
796 continue;
797 }
798
799 if ch == '\\' {
800 escaped = true;
801 continue;
802 }
803
804 if is_paired_open(ch) {
805 let closing = get_closing_delimiter(ch);
806 let (replacement, rest, found_closing) =
807 extract_delimited_content_strict(&pattern[idx..], ch, closing);
808 if found_closing {
809 let leading = pattern[..idx].to_string();
810 return Some((leading, replacement, rest.to_string()));
811 }
812 }
813 }
814
815 None
816}
817
818fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
819 let mut escaped = false;
820 let mut candidates = Vec::new();
821
822 for (idx, ch) in text.char_indices() {
823 if escaped {
824 escaped = false;
825 continue;
826 }
827
828 if ch == '\\' {
829 escaped = true;
830 continue;
831 }
832
833 if is_paired_open(ch) {
834 candidates.push((idx, ch));
835 }
836 }
837
838 for (idx, ch) in candidates.into_iter().rev() {
839 let closing = get_closing_delimiter(ch);
840 let (replacement, rest, found_closing) =
841 extract_delimited_content_strict(&text[idx..], ch, closing);
842 if found_closing {
843 let leading = text[..idx].to_string();
844 return Some((leading, replacement, rest.to_string()));
845 }
846 }
847
848 None
849}
850
851fn extract_substitution_modifiers(text: &str) -> String {
861 text.chars()
862 .take_while(|c| c.is_ascii_alphabetic())
863 .filter(|&c| {
864 matches!(
865 c,
866 'g' | 'i'
867 | 'm'
868 | 's'
869 | 'x'
870 | 'o'
871 | 'e'
872 | 'r'
873 | 'a'
874 | 'd'
875 | 'l'
876 | 'u'
877 | 'n'
878 | 'p'
879 | 'c'
880 )
881 })
882 .collect()
883}
884
885pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
909 let mut valid_modifiers = String::new();
910
911 for c in modifiers_str.chars() {
912 if !c.is_ascii_alphabetic() {
914 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
916 break;
917 }
918 return Err(c);
920 }
921
922 if matches!(
924 c,
925 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
926 ) {
927 valid_modifiers.push(c);
928 } else {
929 return Err(c);
931 }
932 }
933
934 Ok(valid_modifiers)
935}