1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if text.starts_with('m')
14 && text.len() > 1
15 && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16 {
17 &text[1..]
18 } else {
19 text
20 };
21
22 let delimiter = match content.chars().next() {
24 Some(d) => d,
25 None => return (String::new(), String::new(), String::new()),
26 };
27 let closing = get_closing_delimiter(delimiter);
28
29 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32 let pattern = format!("{}{}{}", delimiter, body, closing);
34
35 (pattern, body, modifiers.to_string())
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41 InvalidModifier(char),
43 MissingDelimiter,
45 MissingPattern,
47 MissingReplacement,
49 MissingClosingDelimiter,
51}
52
53#[derive(Debug, Clone, PartialEq)]
55pub enum TransliterationError {
56 InvalidModifier(char),
58 InvalidDelimiter(char),
60 MissingDelimiter,
62 MissingSearch,
64 MissingReplacement,
66 MissingClosingDelimiter,
68}
69
70pub fn extract_substitution_parts_strict(
85 text: &str,
86) -> Result<(String, String, String), SubstitutionError> {
87 let after_s = text.strip_prefix('s').unwrap_or(text);
89 let content = after_s.trim_start();
91
92 let delimiter = match content.chars().next() {
94 Some(d) => d,
95 None => return Err(SubstitutionError::MissingDelimiter),
96 };
97 let closing = get_closing_delimiter(delimiter);
98 let is_paired = delimiter != closing;
99
100 let (pattern, rest1, pattern_closed) =
102 extract_delimited_content_strict(content, delimiter, closing);
103
104 if !is_paired && !pattern_closed {
106 return Err(SubstitutionError::MissingClosingDelimiter);
107 }
108
109 if is_paired && !pattern_closed {
111 return Err(SubstitutionError::MissingClosingDelimiter);
112 }
113
114 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
118 if rest1.is_empty() {
120 return Err(SubstitutionError::MissingReplacement);
121 }
122
123 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
126 (body, rest, found_closing)
127 } else {
128 let trimmed = skip_paired_replacement_gap(rest1);
131 if let Some(rd) = trimmed.chars().next() {
132 let repl_closing = get_closing_delimiter(rd);
133 extract_delimited_content_strict(trimmed, rd, repl_closing)
134 } else {
135 return Err(SubstitutionError::MissingReplacement);
137 }
138 };
139
140 if !is_paired && !replacement_closed {
142 return Err(SubstitutionError::MissingClosingDelimiter);
143 }
144
145 if is_paired && !replacement_closed {
147 return Err(SubstitutionError::MissingClosingDelimiter);
148 }
149
150 let modifiers = validate_substitution_modifiers(modifiers_str)
152 .map_err(SubstitutionError::InvalidModifier)?;
153
154 Ok((pattern, replacement, modifiers))
155}
156
157fn skip_paired_replacement_gap(mut text: &str) -> &str {
158 let mut comment_eligible = false;
159 loop {
160 let trimmed = text.trim_start_matches(char::is_whitespace);
161 let saw_whitespace = trimmed.len() != text.len();
162 text = trimmed;
163 comment_eligible |= saw_whitespace;
164
165 if comment_eligible && text.starts_with('#') {
166 text = after_line_comment(text);
167 comment_eligible = true;
168 continue;
169 }
170
171 return text;
172 }
173}
174
175fn after_line_comment(text: &str) -> &str {
176 for (idx, ch) in text.char_indices() {
177 if matches!(ch, '\n' | '\r') {
178 return &text[idx + ch.len_utf8()..];
179 }
180 }
181 ""
182}
183
184fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
187 let mut chars = text.char_indices();
188 let is_paired = open != close;
189
190 if let Some((_, c)) = chars.next() {
192 if c != open {
193 return (String::new(), text, false);
194 }
195 } else {
196 return (String::new(), "", false);
197 }
198
199 let mut body = String::new();
200 let mut depth = if is_paired { 1 } else { 0 };
201 let mut escaped = false;
202 let mut end_pos = text.len();
203 let mut found_closing = false;
204
205 for (i, ch) in chars {
206 if escaped {
207 body.push(ch);
208 escaped = false;
209 continue;
210 }
211
212 match ch {
213 '\\' => {
214 body.push(ch);
215 escaped = true;
216 }
217 c if c == open && is_paired => {
218 body.push(ch);
219 depth += 1;
220 }
221 c if c == close => {
222 if is_paired {
223 depth -= 1;
224 if depth == 0 {
225 end_pos = i + ch.len_utf8();
226 found_closing = true;
227 break;
228 }
229 body.push(ch);
230 } else {
231 end_pos = i + ch.len_utf8();
232 found_closing = true;
233 break;
234 }
235 }
236 _ => body.push(ch),
237 }
238 }
239
240 (body, &text[end_pos..], found_closing)
241}
242
243pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
256 let content = text.strip_prefix('s').unwrap_or(text);
258
259 let delimiter = match content.chars().next() {
261 Some(d) => d,
262 None => return (String::new(), String::new(), String::new()),
263 };
264 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
265 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
266 {
267 let modifiers = extract_substitution_modifiers(&modifiers_str);
268 return (pattern, replacement, modifiers);
269 }
270
271 return (String::new(), String::new(), String::new());
272 }
273 let closing = get_closing_delimiter(delimiter);
274 let is_paired = delimiter != closing;
275
276 let (mut pattern, rest1, pattern_closed) = if is_paired {
278 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
279 } else {
280 extract_delimited_content_strict(content, delimiter, closing)
281 };
282
283 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
287 let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
290 (body, Cow::Borrowed(rest))
291 } else if !is_paired && !pattern_closed {
292 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
293 split_unclosed_substitution_pattern(&pattern)
294 {
295 pattern = fallback_pattern;
296 (fallback_replacement, Cow::Owned(fallback_modifiers))
297 } else {
298 (String::new(), Cow::Borrowed(rest1))
299 }
300 } else if is_paired {
301 let trimmed = rest1.trim_start();
302 if let Some(rd) = trimmed.chars().next() {
303 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
304 (String::new(), Cow::Borrowed(trimmed))
305 } else {
306 let repl_closing = get_closing_delimiter(rd);
307 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
308 (body, Cow::Borrowed(rest))
309 }
310 } else {
311 (String::new(), Cow::Borrowed(trimmed))
312 }
313 } else {
314 (String::new(), Cow::Borrowed(rest1))
315 };
316
317 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
319
320 (pattern, replacement, modifiers)
321}
322
323pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
325 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
327 stripped
328 } else if let Some(stripped) = text.strip_prefix('y') {
329 stripped
330 } else {
331 text
332 };
333 let content = after_op.trim_start();
334
335 let delimiter = match content.chars().next() {
337 Some(d) => d,
338 None => return (String::new(), String::new(), String::new()),
339 };
340 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
341 return (String::new(), String::new(), String::new());
342 }
343 let closing = get_closing_delimiter(delimiter);
344 let is_paired = delimiter != closing;
345
346 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
348
349 let rest2_owned;
352 let rest2 = if is_paired {
353 rest1.trim_start()
354 } else {
355 rest2_owned = format!("{}{}", delimiter, rest1);
356 &rest2_owned
357 };
358
359 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
361 let chars = rest1.char_indices();
363 let mut body = String::new();
364 let mut escaped = false;
365 let mut end_pos = rest1.len();
366
367 for (i, ch) in chars {
368 if escaped {
369 body.push(ch);
370 escaped = false;
371 continue;
372 }
373
374 match ch {
375 '\\' => {
376 body.push(ch);
377 escaped = true;
378 }
379 c if c == closing => {
380 end_pos = i + ch.len_utf8();
381 break;
382 }
383 _ => body.push(ch),
384 }
385 }
386
387 (body, &rest1[end_pos..])
388 } else if is_paired {
389 if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
390 let repl_closing = get_closing_delimiter(repl_delimiter);
391 extract_delimited_content(rest2, repl_delimiter, repl_closing)
392 } else if let Some(repl_delimiter) = rest2.chars().next() {
393 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
394 (String::new(), rest2)
395 } else {
396 extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
397 }
398 } else {
399 (String::new(), rest2)
400 }
401 } else {
402 (String::new(), rest1)
403 };
404
405 let modifiers = modifiers_str
408 .chars()
409 .take_while(|c| c.is_ascii_alphabetic())
410 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
411 .collect();
412
413 (search, replacement, modifiers)
414}
415
416pub fn extract_transliteration_parts_strict(
426 text: &str,
427) -> Result<(String, String, String), TransliterationError> {
428 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
430 stripped
431 } else if let Some(stripped) = text.strip_prefix('y') {
432 stripped
433 } else {
434 text
435 };
436 let content = after_op.trim_start();
437
438 let delimiter = match content.chars().next() {
440 Some(d) => d,
441 None => return Err(TransliterationError::MissingDelimiter),
442 };
443 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
444 return Err(TransliterationError::InvalidDelimiter(delimiter));
445 }
446 let closing = get_closing_delimiter(delimiter);
447 let is_paired = delimiter != closing;
448
449 let (search, rest1, search_closed) =
451 extract_delimited_content_strict(content, delimiter, closing);
452 if !search_closed {
453 return Err(TransliterationError::MissingClosingDelimiter);
454 }
455
456 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
458 if rest1.is_empty() {
459 return Err(TransliterationError::MissingReplacement);
460 }
461 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
462 (body, rest, found_closing)
463 } else {
464 let trimmed = skip_paired_replacement_gap(rest1);
465 if let Some(repl_delimiter) = trimmed.chars().next() {
466 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
471 return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
472 }
473 let repl_closing = get_closing_delimiter(repl_delimiter);
474 let (body, rest, found_closing) =
475 extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
476 (body, rest, found_closing)
477 } else {
478 return Err(TransliterationError::MissingReplacement);
479 }
480 };
481
482 if !replacement_closed {
483 return Err(TransliterationError::MissingClosingDelimiter);
484 }
485
486 if search.is_empty() {
487 return Err(TransliterationError::MissingSearch);
488 }
489
490 let mut modifiers = String::new();
492 for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
493 if matches!(modifier, 'c' | 'd' | 's' | 'r') {
494 modifiers.push(modifier);
495 } else {
496 return Err(TransliterationError::InvalidModifier(modifier));
497 }
498 }
499
500 Ok((search, replacement, modifiers))
501}
502
503fn get_closing_delimiter(open: char) -> char {
505 match open {
506 '(' => ')',
507 '[' => ']',
508 '{' => '}',
509 '<' => '>',
510 _ => open,
511 }
512}
513
514fn is_paired_open(ch: char) -> bool {
515 matches!(ch, '{' | '[' | '(' | '<')
516}
517
518fn starts_with_paired_delimiter(text: &str) -> Option<char> {
519 let trimmed = text.trim_start();
520 match trimmed.chars().next() {
521 Some(ch) if is_paired_open(ch) => Some(ch),
522 _ => None,
523 }
524}
525
526fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
528 let mut chars = text.char_indices();
529 let is_paired = open != close;
530
531 if let Some((_, c)) = chars.next() {
533 if c != open {
534 return (String::new(), text);
535 }
536 } else {
537 return (String::new(), "");
538 }
539
540 let mut body = String::new();
541 let mut depth = if is_paired { 1 } else { 0 };
542 let mut escaped = false;
543 let mut end_pos = text.len();
544
545 for (i, ch) in chars {
546 if escaped {
547 body.push(ch);
548 escaped = false;
549 continue;
550 }
551
552 match ch {
553 '\\' => {
554 body.push(ch);
555 escaped = true;
556 }
557 c if c == open && is_paired => {
558 body.push(ch);
559 depth += 1;
560 }
561 c if c == close => {
562 if is_paired {
563 depth -= 1;
564 if depth == 0 {
565 end_pos = i + ch.len_utf8();
566 break;
567 }
568 body.push(ch);
569 } else {
570 end_pos = i + ch.len_utf8();
571 break;
572 }
573 }
574 _ => body.push(ch),
575 }
576 }
577
578 (body, &text[end_pos..])
579}
580
581fn scan_inner_string(
595 text: &str,
596 pos: usize,
597 quote: char,
598 delimiter: char,
599) -> Option<(usize, bool)> {
600 if is_word_apostrophe(text, pos, quote) {
601 return None;
602 }
603 if text.get(..pos).and_then(|prefix| prefix.chars().next_back()) == Some(quote) {
606 return None;
607 }
608 let start = pos + quote.len_utf8();
609 let rest = text.get(start..)?;
610 if rest.starts_with(quote) {
611 return None;
612 }
613 let mut escaped = false;
614 let mut contains_delim = false;
615 let mut end_of_string = None;
616 let mut local_pos = start;
617 for ch in rest.chars() {
618 if escaped {
619 escaped = false;
620 local_pos += ch.len_utf8();
621 continue;
622 }
623 if ch == '\\' {
624 escaped = true;
625 local_pos += ch.len_utf8();
626 continue;
627 }
628 if ch == '\n' {
630 return None;
631 }
632 if ch == delimiter {
633 contains_delim = true;
634 }
635 if ch == quote {
636 end_of_string = Some(local_pos + ch.len_utf8());
637 break;
638 }
639 local_pos += ch.len_utf8();
640 }
641 end_of_string.map(|end| (end, contains_delim))
642}
643
644fn is_word_apostrophe(text: &str, pos: usize, quote: char) -> bool {
645 quote == '\''
646 && text
647 .get(..pos)
648 .and_then(|prefix| prefix.chars().next_back())
649 .is_some_and(|ch| ch.is_ascii_alphanumeric() || ch == '_')
650}
651
652fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
662 let mut body = String::new();
663 let mut end_pos = text.len();
664 let mut found_closing = false;
665 let mut pos = 0usize;
666 let mut escaped = false;
667
668 while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
669 if escaped {
670 body.push(ch);
671 escaped = false;
672 pos += ch.len_utf8();
673 continue;
674 }
675
676 match ch {
677 '\\' => {
678 body.push(ch);
679 escaped = true;
680 pos += ch.len_utf8();
681 }
682 '"' | '\'' if ch != closing => {
688 let quote = ch;
689 match scan_inner_string(text, pos, quote, closing) {
690 Some((string_end, true)) => {
691 let string_text = &text[pos..string_end];
693 body.push_str(string_text);
694 pos = string_end;
695 }
696 _ => {
697 body.push(ch);
700 pos += ch.len_utf8();
701 }
702 }
703 }
704 c if c == closing => {
705 end_pos = pos + ch.len_utf8();
706 found_closing = true;
707 break;
708 }
709 _ => {
710 body.push(ch);
711 pos += ch.len_utf8();
712 }
713 }
714 }
715
716 (body, &text[end_pos..], found_closing)
717}
718
719fn extract_substitution_pattern_with_replacement_hint(
720 text: &str,
721 open: char,
722 close: char,
723) -> (String, &str, bool) {
724 let mut chars = text.char_indices();
725
726 if let Some((_, c)) = chars.next() {
728 if c != open {
729 return (String::new(), text, false);
730 }
731 } else {
732 return (String::new(), "", false);
733 }
734
735 let mut body = String::new();
736 let mut depth = 1usize;
737 let mut escaped = false;
738 let mut first_close_pos: Option<usize> = None;
739 let mut first_body_len: usize = 0;
740
741 for (i, ch) in chars {
742 if escaped {
743 body.push(ch);
744 escaped = false;
745 continue;
746 }
747
748 match ch {
749 '\\' => {
750 body.push(ch);
751 escaped = true;
752 }
753 c if c == open => {
754 body.push(ch);
755 depth += 1;
756 }
757 c if c == close => {
758 if depth > 1 {
759 depth -= 1;
760 body.push(ch);
761 continue;
762 }
763
764 let rest = &text[i + ch.len_utf8()..];
765 if first_close_pos.is_none() {
766 first_close_pos = Some(i + ch.len_utf8());
767 first_body_len = body.len();
768 }
769
770 if starts_with_paired_delimiter(rest).is_some() {
771 return (body, rest, true);
772 }
773
774 body.push(ch);
775 }
776 _ => body.push(ch),
777 }
778 }
779
780 if let Some(pos) = first_close_pos {
781 body.truncate(first_body_len);
782 return (body, &text[pos..], true);
783 }
784
785 (body, "", false)
786}
787
788fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
789 let mut escaped = false;
790
791 for (idx, ch) in pattern.char_indices() {
792 if escaped {
793 escaped = false;
794 continue;
795 }
796
797 if ch == '\\' {
798 escaped = true;
799 continue;
800 }
801
802 if is_paired_open(ch) {
803 let closing = get_closing_delimiter(ch);
804 let (replacement, rest, found_closing) =
805 extract_delimited_content_strict(&pattern[idx..], ch, closing);
806 if found_closing {
807 let leading = pattern[..idx].to_string();
808 return Some((leading, replacement, rest.to_string()));
809 }
810 }
811 }
812
813 None
814}
815
816fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
817 let mut escaped = false;
818 let mut candidates = Vec::new();
819
820 for (idx, ch) in text.char_indices() {
821 if escaped {
822 escaped = false;
823 continue;
824 }
825
826 if ch == '\\' {
827 escaped = true;
828 continue;
829 }
830
831 if is_paired_open(ch) {
832 candidates.push((idx, ch));
833 }
834 }
835
836 for (idx, ch) in candidates.into_iter().rev() {
837 let closing = get_closing_delimiter(ch);
838 let (replacement, rest, found_closing) =
839 extract_delimited_content_strict(&text[idx..], ch, closing);
840 if found_closing {
841 let leading = text[..idx].to_string();
842 return Some((leading, replacement, rest.to_string()));
843 }
844 }
845
846 None
847}
848
849fn extract_substitution_modifiers(text: &str) -> String {
859 text.chars()
860 .take_while(|c| c.is_ascii_alphabetic())
861 .filter(|&c| {
862 matches!(
863 c,
864 'g' | 'i'
865 | 'm'
866 | 's'
867 | 'x'
868 | 'o'
869 | 'e'
870 | 'r'
871 | 'a'
872 | 'd'
873 | 'l'
874 | 'u'
875 | 'n'
876 | 'p'
877 | 'c'
878 )
879 })
880 .collect()
881}
882
883pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
907 let mut valid_modifiers = String::new();
908
909 for c in modifiers_str.chars() {
910 if !c.is_ascii_alphabetic() {
912 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
914 break;
915 }
916 return Err(c);
918 }
919
920 if matches!(
922 c,
923 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
924 ) {
925 valid_modifiers.push(c);
926 } else {
927 return Err(c);
929 }
930 }
931
932 Ok(valid_modifiers)
933}