1use std::borrow::Cow;
7
8pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10 let content = if let Some(stripped) = text.strip_prefix("qr") {
12 stripped
13 } else if text.starts_with('m')
14 && text.len() > 1
15 && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16 {
17 &text[1..]
18 } else {
19 text
20 };
21
22 let delimiter = match content.chars().next() {
24 Some(d) => d,
25 None => return (String::new(), String::new(), String::new()),
26 };
27 let closing = get_closing_delimiter(delimiter);
28
29 let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32 let pattern = format!("{}{}{}", delimiter, body, closing);
34
35 (pattern, body, modifiers.to_string())
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41 InvalidModifier(char),
43 MissingDelimiter,
45 MissingPattern,
47 MissingReplacement,
49 MissingClosingDelimiter,
51}
52
53pub fn extract_substitution_parts_strict(
68 text: &str,
69) -> Result<(String, String, String), SubstitutionError> {
70 let content = text.strip_prefix('s').unwrap_or(text);
72
73 let delimiter = match content.chars().next() {
75 Some(d) => d,
76 None => return Err(SubstitutionError::MissingDelimiter),
77 };
78 let closing = get_closing_delimiter(delimiter);
79 let is_paired = delimiter != closing;
80
81 let (pattern, rest1, pattern_closed) =
83 extract_delimited_content_strict(content, delimiter, closing);
84
85 if !is_paired && !pattern_closed {
87 return Err(SubstitutionError::MissingClosingDelimiter);
88 }
89
90 if is_paired && !pattern_closed {
92 return Err(SubstitutionError::MissingClosingDelimiter);
93 }
94
95 let (replacement, modifiers_str, replacement_closed) = if !is_paired {
99 if rest1.is_empty() {
101 return Err(SubstitutionError::MissingReplacement);
102 }
103
104 let chars = rest1.char_indices();
106 let mut body = String::new();
107 let mut escaped = false;
108 let mut end_pos = rest1.len();
109 let mut found_closing = false;
110
111 for (i, ch) in chars {
112 if escaped {
113 body.push(ch);
114 escaped = false;
115 continue;
116 }
117
118 match ch {
119 '\\' => {
120 body.push(ch);
121 escaped = true;
122 }
123 c if c == closing => {
124 end_pos = i + ch.len_utf8();
125 found_closing = true;
126 break;
127 }
128 _ => body.push(ch),
129 }
130 }
131
132 (body, &rest1[end_pos..], found_closing)
133 } else {
134 let trimmed = rest1.trim_start();
136 if let Some(rd) = trimmed.chars().next() {
140 if rd == '{' || rd == '[' || rd == '(' || rd == '<' {
142 let repl_closing = get_closing_delimiter(rd);
143 extract_delimited_content_strict(trimmed, rd, repl_closing)
144 } else {
145 return Err(SubstitutionError::MissingReplacement);
147 }
148 } else {
149 return Err(SubstitutionError::MissingReplacement);
151 }
152 };
153
154 if !is_paired && !replacement_closed {
156 return Err(SubstitutionError::MissingClosingDelimiter);
157 }
158
159 if is_paired && !replacement_closed {
161 return Err(SubstitutionError::MissingClosingDelimiter);
162 }
163
164 let modifiers = validate_substitution_modifiers(modifiers_str)
166 .map_err(SubstitutionError::InvalidModifier)?;
167
168 Ok((pattern, replacement, modifiers))
169}
170
171fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
174 let mut chars = text.char_indices();
175 let is_paired = open != close;
176
177 if let Some((_, c)) = chars.next() {
179 if c != open {
180 return (String::new(), text, false);
181 }
182 } else {
183 return (String::new(), "", false);
184 }
185
186 let mut body = String::new();
187 let mut depth = if is_paired { 1 } else { 0 };
188 let mut escaped = false;
189 let mut end_pos = text.len();
190 let mut found_closing = false;
191
192 for (i, ch) in chars {
193 if escaped {
194 body.push(ch);
195 escaped = false;
196 continue;
197 }
198
199 match ch {
200 '\\' => {
201 body.push(ch);
202 escaped = true;
203 }
204 c if c == open && is_paired => {
205 body.push(ch);
206 depth += 1;
207 }
208 c if c == close => {
209 if is_paired {
210 depth -= 1;
211 if depth == 0 {
212 end_pos = i + ch.len_utf8();
213 found_closing = true;
214 break;
215 }
216 body.push(ch);
217 } else {
218 end_pos = i + ch.len_utf8();
219 found_closing = true;
220 break;
221 }
222 }
223 _ => body.push(ch),
224 }
225 }
226
227 (body, &text[end_pos..], found_closing)
228}
229
230pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
243 let content = text.strip_prefix('s').unwrap_or(text);
245
246 let delimiter = match content.chars().next() {
248 Some(d) => d,
249 None => return (String::new(), String::new(), String::new()),
250 };
251 if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
252 if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
253 {
254 let modifiers = extract_substitution_modifiers(&modifiers_str);
255 return (pattern, replacement, modifiers);
256 }
257
258 return (String::new(), String::new(), String::new());
259 }
260 let closing = get_closing_delimiter(delimiter);
261 let is_paired = delimiter != closing;
262
263 let (mut pattern, rest1, pattern_closed) = if is_paired {
265 extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
266 } else {
267 extract_delimited_content_strict(content, delimiter, closing)
268 };
269
270 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
274 let (body, rest) = extract_unpaired_body(rest1, closing);
276 (body, Cow::Borrowed(rest))
277 } else if !is_paired && !pattern_closed {
278 if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
279 split_unclosed_substitution_pattern(&pattern)
280 {
281 pattern = fallback_pattern;
282 (fallback_replacement, Cow::Owned(fallback_modifiers))
283 } else {
284 (String::new(), Cow::Borrowed(rest1))
285 }
286 } else if is_paired {
287 let trimmed = rest1.trim_start();
288 if let Some(rd) = starts_with_paired_delimiter(trimmed) {
292 let repl_closing = get_closing_delimiter(rd);
293 let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
294 (body, Cow::Borrowed(rest))
295 } else {
296 let (body, rest) = extract_unpaired_body(rest1, closing);
297 (body, Cow::Borrowed(rest))
298 }
299 } else {
300 (String::new(), Cow::Borrowed(rest1))
301 };
302
303 let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
305
306 (pattern, replacement, modifiers)
307}
308
309pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
311 let content = if let Some(stripped) = text.strip_prefix("tr") {
313 stripped
314 } else if let Some(stripped) = text.strip_prefix('y') {
315 stripped
316 } else {
317 text
318 };
319
320 let delimiter = match content.chars().next() {
322 Some(d) => d,
323 None => return (String::new(), String::new(), String::new()),
324 };
325 let closing = get_closing_delimiter(delimiter);
326 let is_paired = delimiter != closing;
327
328 let (search, rest1) = extract_delimited_content(content, delimiter, closing);
330
331 let rest2_owned;
333 let rest2 = if is_paired {
334 let trimmed = rest1.trim_start();
335 if trimmed.starts_with(delimiter) {
337 trimmed
339 } else {
340 ""
342 }
343 } else {
344 rest2_owned = format!("{}{}", delimiter, rest1);
345 &rest2_owned
346 };
347
348 let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
350 let chars = rest1.char_indices();
352 let mut body = String::new();
353 let mut escaped = false;
354 let mut end_pos = rest1.len();
355
356 for (i, ch) in chars {
357 if escaped {
358 body.push(ch);
359 escaped = false;
360 continue;
361 }
362
363 match ch {
364 '\\' => {
365 body.push(ch);
366 escaped = true;
367 }
368 c if c == closing => {
369 end_pos = i + ch.len_utf8();
370 break;
371 }
372 _ => body.push(ch),
373 }
374 }
375
376 (body, &rest1[end_pos..])
377 } else if is_paired {
378 extract_delimited_content(rest2, delimiter, closing)
379 } else {
380 (String::new(), rest1)
381 };
382
383 let modifiers = modifiers_str
386 .chars()
387 .take_while(|c| c.is_ascii_alphabetic())
388 .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
389 .collect();
390
391 (search, replacement, modifiers)
392}
393
394fn get_closing_delimiter(open: char) -> char {
396 match open {
397 '(' => ')',
398 '[' => ']',
399 '{' => '}',
400 '<' => '>',
401 _ => open,
402 }
403}
404
405fn is_paired_open(ch: char) -> bool {
406 matches!(ch, '{' | '[' | '(' | '<')
407}
408
409fn starts_with_paired_delimiter(text: &str) -> Option<char> {
410 let trimmed = text.trim_start();
411 match trimmed.chars().next() {
412 Some(ch) if is_paired_open(ch) => Some(ch),
413 _ => None,
414 }
415}
416
417fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
419 let mut chars = text.char_indices();
420 let is_paired = open != close;
421
422 if let Some((_, c)) = chars.next() {
424 if c != open {
425 return (String::new(), text);
426 }
427 } else {
428 return (String::new(), "");
429 }
430
431 let mut body = String::new();
432 let mut depth = if is_paired { 1 } else { 0 };
433 let mut escaped = false;
434 let mut end_pos = text.len();
435
436 for (i, ch) in chars {
437 if escaped {
438 body.push(ch);
439 escaped = false;
440 continue;
441 }
442
443 match ch {
444 '\\' => {
445 body.push(ch);
446 escaped = true;
447 }
448 c if c == open && is_paired => {
449 body.push(ch);
450 depth += 1;
451 }
452 c if c == close => {
453 if is_paired {
454 depth -= 1;
455 if depth == 0 {
456 end_pos = i + ch.len_utf8();
457 break;
458 }
459 body.push(ch);
460 } else {
461 end_pos = i + ch.len_utf8();
462 break;
463 }
464 }
465 _ => body.push(ch),
466 }
467 }
468
469 (body, &text[end_pos..])
470}
471
472fn extract_unpaired_body(text: &str, closing: char) -> (String, &str) {
473 let mut body = String::new();
474 let mut escaped = false;
475 let mut end_pos = text.len();
476
477 for (i, ch) in text.char_indices() {
478 if escaped {
479 body.push(ch);
480 escaped = false;
481 continue;
482 }
483
484 match ch {
485 '\\' => {
486 body.push(ch);
487 escaped = true;
488 }
489 c if c == closing => {
490 end_pos = i + ch.len_utf8();
491 break;
492 }
493 _ => body.push(ch),
494 }
495 }
496
497 (body, &text[end_pos..])
498}
499
500fn extract_substitution_pattern_with_replacement_hint(
501 text: &str,
502 open: char,
503 close: char,
504) -> (String, &str, bool) {
505 let mut chars = text.char_indices();
506
507 if let Some((_, c)) = chars.next() {
509 if c != open {
510 return (String::new(), text, false);
511 }
512 } else {
513 return (String::new(), "", false);
514 }
515
516 let mut body = String::new();
517 let mut depth = 1usize;
518 let mut escaped = false;
519 let mut first_close_pos: Option<usize> = None;
520 let mut first_body_len: usize = 0;
521
522 for (i, ch) in chars {
523 if escaped {
524 body.push(ch);
525 escaped = false;
526 continue;
527 }
528
529 match ch {
530 '\\' => {
531 body.push(ch);
532 escaped = true;
533 }
534 c if c == open => {
535 body.push(ch);
536 depth += 1;
537 }
538 c if c == close => {
539 if depth > 1 {
540 depth -= 1;
541 body.push(ch);
542 continue;
543 }
544
545 let rest = &text[i + ch.len_utf8()..];
546 if first_close_pos.is_none() {
547 first_close_pos = Some(i + ch.len_utf8());
548 first_body_len = body.len();
549 }
550
551 if starts_with_paired_delimiter(rest).is_some() {
552 return (body, rest, true);
553 }
554
555 body.push(ch);
556 }
557 _ => body.push(ch),
558 }
559 }
560
561 if let Some(pos) = first_close_pos {
562 body.truncate(first_body_len);
563 return (body, &text[pos..], true);
564 }
565
566 (body, "", false)
567}
568
569fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
570 let mut escaped = false;
571
572 for (idx, ch) in pattern.char_indices() {
573 if escaped {
574 escaped = false;
575 continue;
576 }
577
578 if ch == '\\' {
579 escaped = true;
580 continue;
581 }
582
583 if is_paired_open(ch) {
584 let closing = get_closing_delimiter(ch);
585 let (replacement, rest, found_closing) =
586 extract_delimited_content_strict(&pattern[idx..], ch, closing);
587 if found_closing {
588 let leading = pattern[..idx].to_string();
589 return Some((leading, replacement, rest.to_string()));
590 }
591 }
592 }
593
594 None
595}
596
597fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
598 let mut escaped = false;
599 let mut candidates = Vec::new();
600
601 for (idx, ch) in text.char_indices() {
602 if escaped {
603 escaped = false;
604 continue;
605 }
606
607 if ch == '\\' {
608 escaped = true;
609 continue;
610 }
611
612 if is_paired_open(ch) {
613 candidates.push((idx, ch));
614 }
615 }
616
617 for (idx, ch) in candidates.into_iter().rev() {
618 let closing = get_closing_delimiter(ch);
619 let (replacement, rest, found_closing) =
620 extract_delimited_content_strict(&text[idx..], ch, closing);
621 if found_closing {
622 let leading = text[..idx].to_string();
623 return Some((leading, replacement, rest.to_string()));
624 }
625 }
626
627 None
628}
629
630fn extract_substitution_modifiers(text: &str) -> String {
640 text.chars()
641 .take_while(|c| c.is_ascii_alphabetic())
642 .filter(|&c| {
643 matches!(
644 c,
645 'g' | 'i'
646 | 'm'
647 | 's'
648 | 'x'
649 | 'o'
650 | 'e'
651 | 'r'
652 | 'a'
653 | 'd'
654 | 'l'
655 | 'u'
656 | 'n'
657 | 'p'
658 | 'c'
659 )
660 })
661 .collect()
662}
663
664pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
688 let mut valid_modifiers = String::new();
689
690 for c in modifiers_str.chars() {
691 if !c.is_ascii_alphabetic() {
693 if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
695 break;
696 }
697 return Err(c);
699 }
700
701 if matches!(
703 c,
704 'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
705 ) {
706 valid_modifiers.push(c);
707 } else {
708 return Err(c);
710 }
711 }
712
713 Ok(valid_modifiers)
714}