1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if text.starts_with('m')
14 && text.len() > 1
15 && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16 {
17 &text[1..]
18 } else {
19 text
20 };
21
22 let delimiter = match content.chars().next() {
24 Some(d) => d,
25 None => return (String::new(), String::new(), String::new()),
26 };
27 let closing = get_closing_delimiter(delimiter);
28
29 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32 let pattern = format!("{}{}{}", delimiter, body, closing);
34
35 (pattern, body, modifiers.to_string())
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41 InvalidModifier(char),
43 MissingDelimiter,
45 MissingPattern,
47 MissingReplacement,
49 MissingClosingDelimiter,
51}
52
53#[derive(Debug, Clone, PartialEq)]
55pub enum TransliterationError {
56 InvalidModifier(char),
58 InvalidDelimiter(char),
60 MissingDelimiter,
62 MissingSearch,
64 MissingReplacement,
66 MissingClosingDelimiter,
68}
69
70pub fn extract_substitution_parts_strict(
85 text: &str,
86) -> Result<(String, String, String), SubstitutionError> {
87 let after_s = text.strip_prefix('s').unwrap_or(text);
89 let content = after_s.trim_start();
91
92 let delimiter = match content.chars().next() {
94 Some(d) => d,
95 None => return Err(SubstitutionError::MissingDelimiter),
96 };
97 let closing = get_closing_delimiter(delimiter);
98 let is_paired = delimiter != closing;
99
100 let (pattern, rest1, pattern_closed) =
102 extract_delimited_content_strict(content, delimiter, closing);
103
104 if !is_paired && !pattern_closed {
106 return Err(SubstitutionError::MissingClosingDelimiter);
107 }
108
109 if is_paired && !pattern_closed {
111 return Err(SubstitutionError::MissingClosingDelimiter);
112 }
113
114 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
118 if rest1.is_empty() {
120 return Err(SubstitutionError::MissingReplacement);
121 }
122
123 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
126 (body, rest, found_closing)
127 } else {
128 let trimmed = skip_paired_substitution_replacement_gap(rest1);
131 if let Some(rd) = trimmed.chars().next() {
132 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
133 return Err(SubstitutionError::MissingReplacement);
134 }
135 let repl_closing = get_closing_delimiter(rd);
136 extract_delimited_content_strict(trimmed, rd, repl_closing)
137 } else {
138 return Err(SubstitutionError::MissingReplacement);
140 }
141 };
142
143 if !is_paired && !replacement_closed {
145 return Err(SubstitutionError::MissingClosingDelimiter);
146 }
147
148 if is_paired && !replacement_closed {
150 return Err(SubstitutionError::MissingClosingDelimiter);
151 }
152
153 let modifiers = validate_substitution_modifiers(modifiers_str)
155 .map_err(SubstitutionError::InvalidModifier)?;
156
157 Ok((pattern, replacement, modifiers))
158}
159
160fn skip_paired_substitution_replacement_gap(mut text: &str) -> &str {
161 let mut comment_eligible = false;
162 loop {
163 let trimmed = text.trim_start_matches(char::is_whitespace);
164 let saw_whitespace = trimmed.len() != text.len();
165 text = trimmed;
166 comment_eligible |= saw_whitespace;
167
168 if comment_eligible && text.starts_with('#') {
169 text = after_line_comment(text);
170 comment_eligible = true;
171 continue;
172 }
173
174 return text;
175 }
176}
177
178fn after_line_comment(text: &str) -> &str {
179 for (idx, ch) in text.char_indices() {
180 if matches!(ch, '\n' | '\r') {
181 return &text[idx + ch.len_utf8()..];
182 }
183 }
184 ""
185}
186
187fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
190 let mut chars = text.char_indices();
191 let is_paired = open != close;
192
193 if let Some((_, c)) = chars.next() {
195 if c != open {
196 return (String::new(), text, false);
197 }
198 } else {
199 return (String::new(), "", false);
200 }
201
202 let mut body = String::new();
203 let mut depth = if is_paired { 1 } else { 0 };
204 let mut escaped = false;
205 let mut end_pos = text.len();
206 let mut found_closing = false;
207
208 for (i, ch) in chars {
209 if escaped {
210 body.push(ch);
211 escaped = false;
212 continue;
213 }
214
215 match ch {
216 '\\' => {
217 body.push(ch);
218 escaped = true;
219 }
220 c if c == open && is_paired => {
221 body.push(ch);
222 depth += 1;
223 }
224 c if c == close => {
225 if is_paired {
226 depth -= 1;
227 if depth == 0 {
228 end_pos = i + ch.len_utf8();
229 found_closing = true;
230 break;
231 }
232 body.push(ch);
233 } else {
234 end_pos = i + ch.len_utf8();
235 found_closing = true;
236 break;
237 }
238 }
239 _ => body.push(ch),
240 }
241 }
242
243 (body, &text[end_pos..], found_closing)
244}
245
246pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
259 let content = text.strip_prefix('s').unwrap_or(text);
261
262 let delimiter = match content.chars().next() {
264 Some(d) => d,
265 None => return (String::new(), String::new(), String::new()),
266 };
267 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
268 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
269 {
270 let modifiers = extract_substitution_modifiers(&modifiers_str);
271 return (pattern, replacement, modifiers);
272 }
273
274 return (String::new(), String::new(), String::new());
275 }
276 let closing = get_closing_delimiter(delimiter);
277 let is_paired = delimiter != closing;
278
279 let (mut pattern, rest1, pattern_closed) = if is_paired {
281 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
282 } else {
283 extract_delimited_content_strict(content, delimiter, closing)
284 };
285
286 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
290 let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
293 (body, Cow::Borrowed(rest))
294 } else if !is_paired && !pattern_closed {
295 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
296 split_unclosed_substitution_pattern(&pattern)
297 {
298 pattern = fallback_pattern;
299 (fallback_replacement, Cow::Owned(fallback_modifiers))
300 } else {
301 (String::new(), Cow::Borrowed(rest1))
302 }
303 } else if is_paired {
304 let trimmed = rest1.trim_start();
305 if let Some(rd) = trimmed.chars().next() {
306 if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
307 (String::new(), Cow::Borrowed(trimmed))
308 } else {
309 let repl_closing = get_closing_delimiter(rd);
310 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
311 (body, Cow::Borrowed(rest))
312 }
313 } else {
314 (String::new(), Cow::Borrowed(trimmed))
315 }
316 } else {
317 (String::new(), Cow::Borrowed(rest1))
318 };
319
320 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
322
323 (pattern, replacement, modifiers)
324}
325
326pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
328 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
330 stripped
331 } else if let Some(stripped) = text.strip_prefix('y') {
332 stripped
333 } else {
334 text
335 };
336 let content = after_op.trim_start();
337
338 let delimiter = match content.chars().next() {
340 Some(d) => d,
341 None => return (String::new(), String::new(), String::new()),
342 };
343 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
344 return (String::new(), String::new(), String::new());
345 }
346 let closing = get_closing_delimiter(delimiter);
347 let is_paired = delimiter != closing;
348
349 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
351
352 let rest2_owned;
355 let rest2 = if is_paired {
356 rest1.trim_start()
357 } else {
358 rest2_owned = format!("{}{}", delimiter, rest1);
359 &rest2_owned
360 };
361
362 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
364 let chars = rest1.char_indices();
366 let mut body = String::new();
367 let mut escaped = false;
368 let mut end_pos = rest1.len();
369
370 for (i, ch) in chars {
371 if escaped {
372 body.push(ch);
373 escaped = false;
374 continue;
375 }
376
377 match ch {
378 '\\' => {
379 body.push(ch);
380 escaped = true;
381 }
382 c if c == closing => {
383 end_pos = i + ch.len_utf8();
384 break;
385 }
386 _ => body.push(ch),
387 }
388 }
389
390 (body, &rest1[end_pos..])
391 } else if is_paired {
392 if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
393 let repl_closing = get_closing_delimiter(repl_delimiter);
394 extract_delimited_content(rest2, repl_delimiter, repl_closing)
395 } else if let Some(repl_delimiter) = rest2.chars().next() {
396 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
397 (String::new(), rest2)
398 } else {
399 extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
400 }
401 } else {
402 (String::new(), rest2)
403 }
404 } else {
405 (String::new(), rest1)
406 };
407
408 let modifiers = modifiers_str
411 .chars()
412 .take_while(|c| c.is_ascii_alphabetic())
413 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
414 .collect();
415
416 (search, replacement, modifiers)
417}
418
419pub fn extract_transliteration_parts_strict(
429 text: &str,
430) -> Result<(String, String, String), TransliterationError> {
431 let after_op = if let Some(stripped) = text.strip_prefix("tr") {
433 stripped
434 } else if let Some(stripped) = text.strip_prefix('y') {
435 stripped
436 } else {
437 text
438 };
439 let content = after_op.trim_start();
440
441 let delimiter = match content.chars().next() {
443 Some(d) => d,
444 None => return Err(TransliterationError::MissingDelimiter),
445 };
446 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
447 return Err(TransliterationError::InvalidDelimiter(delimiter));
448 }
449 let closing = get_closing_delimiter(delimiter);
450 let is_paired = delimiter != closing;
451
452 let (search, rest1, search_closed) =
454 extract_delimited_content_strict(content, delimiter, closing);
455 if !search_closed {
456 return Err(TransliterationError::MissingClosingDelimiter);
457 }
458
459 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
461 if rest1.is_empty() {
462 return Err(TransliterationError::MissingReplacement);
463 }
464 let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
465 (body, rest, found_closing)
466 } else {
467 let trimmed = rest1.trim_start();
468 if let Some(repl_delimiter) = trimmed.chars().next() {
469 if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
474 return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
475 }
476 let repl_closing = get_closing_delimiter(repl_delimiter);
477 let (body, rest, found_closing) =
478 extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
479 (body, rest, found_closing)
480 } else {
481 return Err(TransliterationError::MissingReplacement);
482 }
483 };
484
485 if !replacement_closed {
486 return Err(TransliterationError::MissingClosingDelimiter);
487 }
488
489 if search.is_empty() {
490 return Err(TransliterationError::MissingSearch);
491 }
492
493 let mut modifiers = String::new();
495 for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
496 if matches!(modifier, 'c' | 'd' | 's' | 'r') {
497 modifiers.push(modifier);
498 } else {
499 return Err(TransliterationError::InvalidModifier(modifier));
500 }
501 }
502
503 Ok((search, replacement, modifiers))
504}
505
506fn get_closing_delimiter(open: char) -> char {
508 match open {
509 '(' => ')',
510 '[' => ']',
511 '{' => '}',
512 '<' => '>',
513 _ => open,
514 }
515}
516
517fn is_paired_open(ch: char) -> bool {
518 matches!(ch, '{' | '[' | '(' | '<')
519}
520
521fn starts_with_paired_delimiter(text: &str) -> Option<char> {
522 let trimmed = text.trim_start();
523 match trimmed.chars().next() {
524 Some(ch) if is_paired_open(ch) => Some(ch),
525 _ => None,
526 }
527}
528
529fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
531 let mut chars = text.char_indices();
532 let is_paired = open != close;
533
534 if let Some((_, c)) = chars.next() {
536 if c != open {
537 return (String::new(), text);
538 }
539 } else {
540 return (String::new(), "");
541 }
542
543 let mut body = String::new();
544 let mut depth = if is_paired { 1 } else { 0 };
545 let mut escaped = false;
546 let mut end_pos = text.len();
547
548 for (i, ch) in chars {
549 if escaped {
550 body.push(ch);
551 escaped = false;
552 continue;
553 }
554
555 match ch {
556 '\\' => {
557 body.push(ch);
558 escaped = true;
559 }
560 c if c == open && is_paired => {
561 body.push(ch);
562 depth += 1;
563 }
564 c if c == close => {
565 if is_paired {
566 depth -= 1;
567 if depth == 0 {
568 end_pos = i + ch.len_utf8();
569 break;
570 }
571 body.push(ch);
572 } else {
573 end_pos = i + ch.len_utf8();
574 break;
575 }
576 }
577 _ => body.push(ch),
578 }
579 }
580
581 (body, &text[end_pos..])
582}
583
584fn scan_inner_string(
598 text: &str,
599 pos: usize,
600 quote: char,
601 delimiter: char,
602) -> Option<(usize, bool)> {
603 let start = pos + quote.len_utf8();
604 let rest = text.get(start..)?;
605 let mut escaped = false;
606 let mut contains_delim = false;
607 let mut end_of_string = None;
608 let mut local_pos = start;
609 for ch in rest.chars() {
610 if escaped {
611 escaped = false;
612 local_pos += ch.len_utf8();
613 continue;
614 }
615 if ch == '\\' {
616 escaped = true;
617 local_pos += ch.len_utf8();
618 continue;
619 }
620 if ch == '\n' {
622 return None;
623 }
624 if ch == delimiter {
625 contains_delim = true;
626 }
627 if ch == quote {
628 end_of_string = Some(local_pos + ch.len_utf8());
629 break;
630 }
631 local_pos += ch.len_utf8();
632 }
633 end_of_string.map(|end| (end, contains_delim))
634}
635
636fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
646 let mut body = String::new();
647 let mut end_pos = text.len();
648 let mut found_closing = false;
649 let mut pos = 0usize;
650 let mut escaped = false;
651
652 while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
653 if escaped {
654 body.push(ch);
655 escaped = false;
656 pos += ch.len_utf8();
657 continue;
658 }
659
660 match ch {
661 '\\' => {
662 body.push(ch);
663 escaped = true;
664 pos += ch.len_utf8();
665 }
666 '"' | '\'' if ch != closing => {
672 let quote = ch;
673 match scan_inner_string(text, pos, quote, closing) {
674 Some((string_end, true)) => {
675 let string_text = &text[pos..string_end];
677 body.push_str(string_text);
678 pos = string_end;
679 }
680 _ => {
681 body.push(ch);
684 pos += ch.len_utf8();
685 }
686 }
687 }
688 c if c == closing => {
689 end_pos = pos + ch.len_utf8();
690 found_closing = true;
691 break;
692 }
693 _ => {
694 body.push(ch);
695 pos += ch.len_utf8();
696 }
697 }
698 }
699
700 (body, &text[end_pos..], found_closing)
701}
702
703fn extract_substitution_pattern_with_replacement_hint(
704 text: &str,
705 open: char,
706 close: char,
707) -> (String, &str, bool) {
708 let mut chars = text.char_indices();
709
710 if let Some((_, c)) = chars.next() {
712 if c != open {
713 return (String::new(), text, false);
714 }
715 } else {
716 return (String::new(), "", false);
717 }
718
719 let mut body = String::new();
720 let mut depth = 1usize;
721 let mut escaped = false;
722 let mut first_close_pos: Option<usize> = None;
723 let mut first_body_len: usize = 0;
724
725 for (i, ch) in chars {
726 if escaped {
727 body.push(ch);
728 escaped = false;
729 continue;
730 }
731
732 match ch {
733 '\\' => {
734 body.push(ch);
735 escaped = true;
736 }
737 c if c == open => {
738 body.push(ch);
739 depth += 1;
740 }
741 c if c == close => {
742 if depth > 1 {
743 depth -= 1;
744 body.push(ch);
745 continue;
746 }
747
748 let rest = &text[i + ch.len_utf8()..];
749 if first_close_pos.is_none() {
750 first_close_pos = Some(i + ch.len_utf8());
751 first_body_len = body.len();
752 }
753
754 if starts_with_paired_delimiter(rest).is_some() {
755 return (body, rest, true);
756 }
757
758 body.push(ch);
759 }
760 _ => body.push(ch),
761 }
762 }
763
764 if let Some(pos) = first_close_pos {
765 body.truncate(first_body_len);
766 return (body, &text[pos..], true);
767 }
768
769 (body, "", false)
770}
771
772fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
773 let mut escaped = false;
774
775 for (idx, ch) in pattern.char_indices() {
776 if escaped {
777 escaped = false;
778 continue;
779 }
780
781 if ch == '\\' {
782 escaped = true;
783 continue;
784 }
785
786 if is_paired_open(ch) {
787 let closing = get_closing_delimiter(ch);
788 let (replacement, rest, found_closing) =
789 extract_delimited_content_strict(&pattern[idx..], ch, closing);
790 if found_closing {
791 let leading = pattern[..idx].to_string();
792 return Some((leading, replacement, rest.to_string()));
793 }
794 }
795 }
796
797 None
798}
799
800fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
801 let mut escaped = false;
802 let mut candidates = Vec::new();
803
804 for (idx, ch) in text.char_indices() {
805 if escaped {
806 escaped = false;
807 continue;
808 }
809
810 if ch == '\\' {
811 escaped = true;
812 continue;
813 }
814
815 if is_paired_open(ch) {
816 candidates.push((idx, ch));
817 }
818 }
819
820 for (idx, ch) in candidates.into_iter().rev() {
821 let closing = get_closing_delimiter(ch);
822 let (replacement, rest, found_closing) =
823 extract_delimited_content_strict(&text[idx..], ch, closing);
824 if found_closing {
825 let leading = text[..idx].to_string();
826 return Some((leading, replacement, rest.to_string()));
827 }
828 }
829
830 None
831}
832
833fn extract_substitution_modifiers(text: &str) -> String {
843 text.chars()
844 .take_while(|c| c.is_ascii_alphabetic())
845 .filter(|&c| {
846 matches!(
847 c,
848 'g' | 'i'
849 | 'm'
850 | 's'
851 | 'x'
852 | 'o'
853 | 'e'
854 | 'r'
855 | 'a'
856 | 'd'
857 | 'l'
858 | 'u'
859 | 'n'
860 | 'p'
861 | 'c'
862 )
863 })
864 .collect()
865}
866
867pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
891 let mut valid_modifiers = String::new();
892
893 for c in modifiers_str.chars() {
894 if !c.is_ascii_alphabetic() {
896 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
898 break;
899 }
900 return Err(c);
902 }
903
904 if matches!(
906 c,
907 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
908 ) {
909 valid_modifiers.push(c);
910 } else {
911 return Err(c);
913 }
914 }
915
916 Ok(valid_modifiers)
917}