Skip to main content

perl_parser_core/syntax/
quote.rs

1//! Uniform quote operator parsing for the Perl parser.
2//!
3//! This module provides consistent parsing for quote-like operators,
4//! properly extracting patterns, bodies, and modifiers.
5
6use std::borrow::Cow;
7
8/// Extract pattern and modifiers from a regex-like token (qr, m, or bare //)
9pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10    // Handle different prefixes
11    let content = if let Some(stripped) = text.strip_prefix("qr") {
12        stripped
13    } else if text.starts_with('m')
14        && text.len() > 1
15        && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16    {
17        &text[1..]
18    } else {
19        text
20    };
21
22    // Get delimiter - content must be non-empty to have a delimiter
23    let delimiter = match content.chars().next() {
24        Some(d) => d,
25        None => return (String::new(), String::new(), String::new()),
26    };
27    let closing = get_closing_delimiter(delimiter);
28
29    // Extract body and modifiers
30    let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32    // Include delimiters in the pattern string for compatibility
33    let pattern = format!("{}{}{}", delimiter, body, closing);
34
35    (pattern, body, modifiers.to_string())
36}
37
38/// Error type for substitution operator parsing failures
39#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41    /// Invalid modifier character found
42    InvalidModifier(char),
43    /// Missing delimiter after 's'
44    MissingDelimiter,
45    /// Pattern is missing or empty (just `s/`)
46    MissingPattern,
47    /// Replacement section is missing (e.g., `s/pattern` without replacement part)
48    MissingReplacement,
49    /// Closing delimiter is missing after replacement (e.g., `s/pattern/replacement` without final `/`)
50    MissingClosingDelimiter,
51}
52
53/// Error type for transliteration operator parsing failures
54#[derive(Debug, Clone, PartialEq)]
55pub enum TransliterationError {
56    /// Invalid modifier character found
57    InvalidModifier(char),
58    /// Invalid delimiter after `tr`/`y`
59    InvalidDelimiter(char),
60    /// Missing delimiter after `tr`/`y`
61    MissingDelimiter,
62    /// Search list section is missing
63    MissingSearch,
64    /// Replacement list section is missing
65    MissingReplacement,
66    /// Closing delimiter is missing
67    MissingClosingDelimiter,
68}
69
70/// Extract pattern, replacement, and modifiers from a substitution token with strict validation
71///
72/// This function parses substitution operators like s/pattern/replacement/flags
73/// and handles various delimiter forms including:
74/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
75/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
76///
77/// Unlike `extract_substitution_parts`, this function returns an error if invalid modifiers
78/// are present instead of silently filtering them.
79///
80/// # Errors
81///
82/// Returns `Err(SubstitutionError::InvalidModifier(c))` if an invalid modifier character is found.
83/// Valid modifiers are: g, i, m, s, x, o, e, r
84pub fn extract_substitution_parts_strict(
85    text: &str,
86) -> Result<(String, String, String), SubstitutionError> {
87    // Skip 's' prefix
88    let after_s = text.strip_prefix('s').unwrap_or(text);
89    // Perl allows whitespace between 's' and its delimiter (e.g. `s { pattern } { replacement }g`)
90    let content = after_s.trim_start();
91
92    // Get delimiter - check for missing delimiter (just 's' or 's' followed by nothing)
93    let delimiter = match content.chars().next() {
94        Some(d) => d,
95        None => return Err(SubstitutionError::MissingDelimiter),
96    };
97    let closing = get_closing_delimiter(delimiter);
98    let is_paired = delimiter != closing;
99
100    // Parse first body (pattern) with strict validation
101    let (pattern, rest1, pattern_closed) =
102        extract_delimited_content_strict(content, delimiter, closing);
103
104    // For non-paired delimiters: if pattern wasn't closed, missing closing delimiter
105    if !is_paired && !pattern_closed {
106        return Err(SubstitutionError::MissingClosingDelimiter);
107    }
108
109    // For paired delimiters: if pattern wasn't closed, missing closing delimiter
110    if is_paired && !pattern_closed {
111        return Err(SubstitutionError::MissingClosingDelimiter);
112    }
113
114    // Parse second body (replacement)
115    // For paired delimiters, the replacement may use a different delimiter than the pattern
116    // e.g., s[pattern]{replacement} is valid Perl
117    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
118        // Non-paired delimiters: must have replacement section
119        if rest1.is_empty() {
120            return Err(SubstitutionError::MissingReplacement);
121        }
122
123        // Parse replacement, skipping string literals so that delimiter chars
124        // inside "foo/bar" or 'a/b' don't terminate the replacement early.
125        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
126        (body, rest, found_closing)
127    } else {
128        // Paired pattern delimiters still allow either paired or non-paired delimiters
129        // for the replacement side (e.g. s{foo}/bar/ and s[foo]{bar}).
130        let trimmed = skip_paired_substitution_replacement_gap(rest1);
131        if let Some(rd) = trimmed.chars().next() {
132            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
133                return Err(SubstitutionError::MissingReplacement);
134            }
135            let repl_closing = get_closing_delimiter(rd);
136            extract_delimited_content_strict(trimmed, rd, repl_closing)
137        } else {
138            // No more content - missing replacement
139            return Err(SubstitutionError::MissingReplacement);
140        }
141    };
142
143    // For non-paired delimiters, must have found the closing delimiter for replacement
144    if !is_paired && !replacement_closed {
145        return Err(SubstitutionError::MissingClosingDelimiter);
146    }
147
148    // For paired delimiters, must have found the closing delimiter for replacement
149    if is_paired && !replacement_closed {
150        return Err(SubstitutionError::MissingClosingDelimiter);
151    }
152
153    // Validate modifiers strictly - reject if any invalid modifiers present
154    let modifiers = validate_substitution_modifiers(modifiers_str)
155        .map_err(SubstitutionError::InvalidModifier)?;
156
157    Ok((pattern, replacement, modifiers))
158}
159
160fn skip_paired_substitution_replacement_gap(mut text: &str) -> &str {
161    let mut comment_eligible = false;
162    loop {
163        let trimmed = text.trim_start_matches(char::is_whitespace);
164        let saw_whitespace = trimmed.len() != text.len();
165        text = trimmed;
166        comment_eligible |= saw_whitespace;
167
168        if comment_eligible && text.starts_with('#') {
169            text = after_line_comment(text);
170            comment_eligible = true;
171            continue;
172        }
173
174        return text;
175    }
176}
177
178fn after_line_comment(text: &str) -> &str {
179    for (idx, ch) in text.char_indices() {
180        if matches!(ch, '\n' | '\r') {
181            return &text[idx + ch.len_utf8()..];
182        }
183    }
184    ""
185}
186
187/// Extract content between delimiters with strict tracking of whether closing was found.
188/// Returns (content, rest, found_closing).
189fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
190    let mut chars = text.char_indices();
191    let is_paired = open != close;
192
193    // Skip opening delimiter
194    if let Some((_, c)) = chars.next() {
195        if c != open {
196            return (String::new(), text, false);
197        }
198    } else {
199        return (String::new(), "", false);
200    }
201
202    let mut body = String::new();
203    let mut depth = if is_paired { 1 } else { 0 };
204    let mut escaped = false;
205    let mut end_pos = text.len();
206    let mut found_closing = false;
207
208    for (i, ch) in chars {
209        if escaped {
210            body.push(ch);
211            escaped = false;
212            continue;
213        }
214
215        match ch {
216            '\\' => {
217                body.push(ch);
218                escaped = true;
219            }
220            c if c == open && is_paired => {
221                body.push(ch);
222                depth += 1;
223            }
224            c if c == close => {
225                if is_paired {
226                    depth -= 1;
227                    if depth == 0 {
228                        end_pos = i + ch.len_utf8();
229                        found_closing = true;
230                        break;
231                    }
232                    body.push(ch);
233                } else {
234                    end_pos = i + ch.len_utf8();
235                    found_closing = true;
236                    break;
237                }
238            }
239            _ => body.push(ch),
240        }
241    }
242
243    (body, &text[end_pos..], found_closing)
244}
245
246/// Extract pattern, replacement, and modifiers from a substitution token
247///
248/// This function parses substitution operators like s/pattern/replacement/flags
249/// and handles various delimiter forms including:
250/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
251/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
252///
253/// For paired delimiters, properly handles nested delimiters within the pattern
254/// or replacement parts. Returns (pattern, replacement, modifiers) as strings.
255///
256/// Note: This function silently filters invalid modifiers. For strict validation,
257/// use `extract_substitution_parts_strict` instead.
258pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
259    // Skip 's' prefix
260    let content = text.strip_prefix('s').unwrap_or(text);
261
262    // Get delimiter - content must be non-empty to have a delimiter
263    let delimiter = match content.chars().next() {
264        Some(d) => d,
265        None => return (String::new(), String::new(), String::new()),
266    };
267    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
268        if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
269        {
270            let modifiers = extract_substitution_modifiers(&modifiers_str);
271            return (pattern, replacement, modifiers);
272        }
273
274        return (String::new(), String::new(), String::new());
275    }
276    let closing = get_closing_delimiter(delimiter);
277    let is_paired = delimiter != closing;
278
279    // Parse first body (pattern)
280    let (mut pattern, rest1, pattern_closed) = if is_paired {
281        extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
282    } else {
283        extract_delimited_content_strict(content, delimiter, closing)
284    };
285
286    // Parse second body (replacement)
287    // For paired delimiters, the replacement may use a different delimiter than the pattern
288    // e.g., s[pattern]{replacement} is valid Perl
289    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
290        // Non-paired delimiters: manually parse the replacement, skipping string literals
291        // so that delimiter chars inside "foo/bar" or 'a/b' don't end the replacement early.
292        let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
293        (body, Cow::Borrowed(rest))
294    } else if !is_paired && !pattern_closed {
295        if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
296            split_unclosed_substitution_pattern(&pattern)
297        {
298            pattern = fallback_pattern;
299            (fallback_replacement, Cow::Owned(fallback_modifiers))
300        } else {
301            (String::new(), Cow::Borrowed(rest1))
302        }
303    } else if is_paired {
304        let trimmed = rest1.trim_start();
305        if let Some(rd) = trimmed.chars().next() {
306            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
307                (String::new(), Cow::Borrowed(trimmed))
308            } else {
309                let repl_closing = get_closing_delimiter(rd);
310                let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
311                (body, Cow::Borrowed(rest))
312            }
313        } else {
314            (String::new(), Cow::Borrowed(trimmed))
315        }
316    } else {
317        (String::new(), Cow::Borrowed(rest1))
318    };
319
320    // Extract and validate only valid substitution modifiers
321    let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
322
323    (pattern, replacement, modifiers)
324}
325
326/// Extract search, replace, and modifiers from a transliteration token
327pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
328    // Skip 'tr' or 'y' prefix
329    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
330        stripped
331    } else if let Some(stripped) = text.strip_prefix('y') {
332        stripped
333    } else {
334        text
335    };
336    let content = after_op.trim_start();
337
338    // Get delimiter - content must be non-empty to have a delimiter
339    let delimiter = match content.chars().next() {
340        Some(d) => d,
341        None => return (String::new(), String::new(), String::new()),
342    };
343    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
344        return (String::new(), String::new(), String::new());
345    }
346    let closing = get_closing_delimiter(delimiter);
347    let is_paired = delimiter != closing;
348
349    // Parse first body (search pattern)
350    let (search, rest1) = extract_delimited_content(content, delimiter, closing);
351
352    // For paired delimiters, skip whitespace and allow any paired opening delimiter for the
353    // replacement list. Perl accepts forms like tr[abc]{xyz} in addition to tr[abc][xyz].
354    let rest2_owned;
355    let rest2 = if is_paired {
356        rest1.trim_start()
357    } else {
358        rest2_owned = format!("{}{}", delimiter, rest1);
359        &rest2_owned
360    };
361
362    // Parse second body (replacement pattern)
363    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
364        // Manually parse the replacement for non-paired delimiters
365        let chars = rest1.char_indices();
366        let mut body = String::new();
367        let mut escaped = false;
368        let mut end_pos = rest1.len();
369
370        for (i, ch) in chars {
371            if escaped {
372                body.push(ch);
373                escaped = false;
374                continue;
375            }
376
377            match ch {
378                '\\' => {
379                    body.push(ch);
380                    escaped = true;
381                }
382                c if c == closing => {
383                    end_pos = i + ch.len_utf8();
384                    break;
385                }
386                _ => body.push(ch),
387            }
388        }
389
390        (body, &rest1[end_pos..])
391    } else if is_paired {
392        if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
393            let repl_closing = get_closing_delimiter(repl_delimiter);
394            extract_delimited_content(rest2, repl_delimiter, repl_closing)
395        } else if let Some(repl_delimiter) = rest2.chars().next() {
396            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
397                (String::new(), rest2)
398            } else {
399                extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
400            }
401        } else {
402            (String::new(), rest2)
403        }
404    } else {
405        (String::new(), rest1)
406    };
407
408    // Extract and validate only valid transliteration modifiers
409    // Security fix: Apply consistent validation for all delimiter types
410    let modifiers = modifiers_str
411        .chars()
412        .take_while(|c| c.is_ascii_alphabetic())
413        .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
414        .collect();
415
416    (search, replacement, modifiers)
417}
418
419/// Extract search, replace, and modifiers from a transliteration token with strict validation.
420///
421/// Supports both `tr///` and `y///` syntax, including optional whitespace between
422/// the operator and delimiter (e.g. `tr /a/b/`).
423///
424/// # Errors
425///
426/// Returns `Err(TransliterationError::InvalidModifier(c))` if an invalid modifier
427/// character is encountered. Valid modifiers are: `c`, `d`, `s`, `r`.
428pub fn extract_transliteration_parts_strict(
429    text: &str,
430) -> Result<(String, String, String), TransliterationError> {
431    // Skip `tr` or `y` prefix, then allow optional whitespace before delimiter.
432    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
433        stripped
434    } else if let Some(stripped) = text.strip_prefix('y') {
435        stripped
436    } else {
437        text
438    };
439    let content = after_op.trim_start();
440
441    // Get delimiter.
442    let delimiter = match content.chars().next() {
443        Some(d) => d,
444        None => return Err(TransliterationError::MissingDelimiter),
445    };
446    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
447        return Err(TransliterationError::InvalidDelimiter(delimiter));
448    }
449    let closing = get_closing_delimiter(delimiter);
450    let is_paired = delimiter != closing;
451
452    // Parse first body (search).
453    let (search, rest1, search_closed) =
454        extract_delimited_content_strict(content, delimiter, closing);
455    if !search_closed {
456        return Err(TransliterationError::MissingClosingDelimiter);
457    }
458
459    // Parse second body (replacement).
460    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
461        if rest1.is_empty() {
462            return Err(TransliterationError::MissingReplacement);
463        }
464        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
465        (body, rest, found_closing)
466    } else {
467        let trimmed = rest1.trim_start();
468        if let Some(repl_delimiter) = trimmed.chars().next() {
469            // After a paired search delimiter (e.g. `{...}`), the replacement must
470            // also start with a valid non-alphanumeric, non-whitespace delimiter.
471            // An alphanumeric character here (e.g. `tr{abc}xyz`) is an invalid
472            // delimiter, not merely a missing replacement section.
473            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
474                return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
475            }
476            let repl_closing = get_closing_delimiter(repl_delimiter);
477            let (body, rest, found_closing) =
478                extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
479            (body, rest, found_closing)
480        } else {
481            return Err(TransliterationError::MissingReplacement);
482        }
483    };
484
485    if !replacement_closed {
486        return Err(TransliterationError::MissingClosingDelimiter);
487    }
488
489    if search.is_empty() {
490        return Err(TransliterationError::MissingSearch);
491    }
492
493    // Validate transliteration modifiers strictly.
494    let mut modifiers = String::new();
495    for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
496        if matches!(modifier, 'c' | 'd' | 's' | 'r') {
497            modifiers.push(modifier);
498        } else {
499            return Err(TransliterationError::InvalidModifier(modifier));
500        }
501    }
502
503    Ok((search, replacement, modifiers))
504}
505
506/// Get the closing delimiter for a given opening delimiter
507fn get_closing_delimiter(open: char) -> char {
508    match open {
509        '(' => ')',
510        '[' => ']',
511        '{' => '}',
512        '<' => '>',
513        _ => open,
514    }
515}
516
517fn is_paired_open(ch: char) -> bool {
518    matches!(ch, '{' | '[' | '(' | '<')
519}
520
521fn starts_with_paired_delimiter(text: &str) -> Option<char> {
522    let trimmed = text.trim_start();
523    match trimmed.chars().next() {
524        Some(ch) if is_paired_open(ch) => Some(ch),
525        _ => None,
526    }
527}
528
529/// Extract content between delimiters and return (content, rest)
530fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
531    let mut chars = text.char_indices();
532    let is_paired = open != close;
533
534    // Skip opening delimiter
535    if let Some((_, c)) = chars.next() {
536        if c != open {
537            return (String::new(), text);
538        }
539    } else {
540        return (String::new(), "");
541    }
542
543    let mut body = String::new();
544    let mut depth = if is_paired { 1 } else { 0 };
545    let mut escaped = false;
546    let mut end_pos = text.len();
547
548    for (i, ch) in chars {
549        if escaped {
550            body.push(ch);
551            escaped = false;
552            continue;
553        }
554
555        match ch {
556            '\\' => {
557                body.push(ch);
558                escaped = true;
559            }
560            c if c == open && is_paired => {
561                body.push(ch);
562                depth += 1;
563            }
564            c if c == close => {
565                if is_paired {
566                    depth -= 1;
567                    if depth == 0 {
568                        end_pos = i + ch.len_utf8();
569                        break;
570                    }
571                    body.push(ch);
572                } else {
573                    end_pos = i + ch.len_utf8();
574                    break;
575                }
576            }
577            _ => body.push(ch),
578        }
579    }
580
581    (body, &text[end_pos..])
582}
583
584/// Lookahead helper: determine whether a `quote` char at byte `pos` in `text` is the
585/// opening of a genuine inner string literal that protects `closing` delimiter chars.
586///
587/// Returns `Some((end_pos, true))` when:
588///   - A matching closing `quote` is found on the SAME LINE (no `\n` crossed), AND
589///   - The content between the two `quote` chars contains `closing`.
590///   - `end_pos` is the byte offset just after the closing `quote`.
591///
592/// Returns `None` (or `Some((_, false))`) when:
593///   - A newline or end of `text` is reached before the matching closing `quote`, OR
594///   - The string content does not contain `closing`.
595///
596/// Stopping at newlines prevents cross-statement false positives in multiline source.
597fn scan_inner_string(
598    text: &str,
599    pos: usize,
600    quote: char,
601    delimiter: char,
602) -> Option<(usize, bool)> {
603    let start = pos + quote.len_utf8();
604    let rest = text.get(start..)?;
605    let mut escaped = false;
606    let mut contains_delim = false;
607    let mut end_of_string = None;
608    let mut local_pos = start;
609    for ch in rest.chars() {
610        if escaped {
611            escaped = false;
612            local_pos += ch.len_utf8();
613            continue;
614        }
615        if ch == '\\' {
616            escaped = true;
617            local_pos += ch.len_utf8();
618            continue;
619        }
620        // Newline terminates the scan: inner string literals don't span lines.
621        if ch == '\n' {
622            return None;
623        }
624        if ch == delimiter {
625            contains_delim = true;
626        }
627        if ch == quote {
628            end_of_string = Some(local_pos + ch.len_utf8());
629            break;
630        }
631        local_pos += ch.len_utf8();
632    }
633    end_of_string.map(|end| (end, contains_delim))
634}
635
636/// Like `extract_unpaired_body` but skips over string literals (`"..."` / `'...'`)
637/// so that the closing delimiter character inside a string is not mistaken for the
638/// end of the replacement section.  Returns `(body, rest, found_closing)`.
639///
640/// Uses lookahead to determine whether a `'` or `"` is actually an inner string:
641/// only enters string-skip mode when the candidate string (a) has a matching closing
642/// quote on the same line AND (b) contains the closing delimiter in its content.
643/// This prevents lone apostrophes (e.g. the `'` in `s/''/'/g`) from triggering
644/// string-skip, which would cause replacement scanning to cross statement boundaries.
645fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
646    let mut body = String::new();
647    let mut end_pos = text.len();
648    let mut found_closing = false;
649    let mut pos = 0usize;
650    let mut escaped = false;
651
652    while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
653        if escaped {
654            body.push(ch);
655            escaped = false;
656            pos += ch.len_utf8();
657            continue;
658        }
659
660        match ch {
661            '\\' => {
662                body.push(ch);
663                escaped = true;
664                pos += ch.len_utf8();
665            }
666            // Skip over string literals to avoid treating delimiter chars inside
667            // "foo/bar" or 'a/b' as the closing delimiter of the replacement.
668            //
669            // Guard: only enter string-skip when lookahead confirms a matching closing
670            // quote exists on the same line AND the content contains the closing delimiter.
671            '"' | '\'' if ch != closing => {
672                let quote = ch;
673                match scan_inner_string(text, pos, quote, closing) {
674                    Some((string_end, true)) => {
675                        // String content contains the closing delimiter → skip the string.
676                        let string_text = &text[pos..string_end];
677                        body.push_str(string_text);
678                        pos = string_end;
679                    }
680                    _ => {
681                        // No closing quote on same line, or content has no delimiter:
682                        // treat the opening quote as a literal character.
683                        body.push(ch);
684                        pos += ch.len_utf8();
685                    }
686                }
687            }
688            c if c == closing => {
689                end_pos = pos + ch.len_utf8();
690                found_closing = true;
691                break;
692            }
693            _ => {
694                body.push(ch);
695                pos += ch.len_utf8();
696            }
697        }
698    }
699
700    (body, &text[end_pos..], found_closing)
701}
702
703fn extract_substitution_pattern_with_replacement_hint(
704    text: &str,
705    open: char,
706    close: char,
707) -> (String, &str, bool) {
708    let mut chars = text.char_indices();
709
710    // Skip opening delimiter
711    if let Some((_, c)) = chars.next() {
712        if c != open {
713            return (String::new(), text, false);
714        }
715    } else {
716        return (String::new(), "", false);
717    }
718
719    let mut body = String::new();
720    let mut depth = 1usize;
721    let mut escaped = false;
722    let mut first_close_pos: Option<usize> = None;
723    let mut first_body_len: usize = 0;
724
725    for (i, ch) in chars {
726        if escaped {
727            body.push(ch);
728            escaped = false;
729            continue;
730        }
731
732        match ch {
733            '\\' => {
734                body.push(ch);
735                escaped = true;
736            }
737            c if c == open => {
738                body.push(ch);
739                depth += 1;
740            }
741            c if c == close => {
742                if depth > 1 {
743                    depth -= 1;
744                    body.push(ch);
745                    continue;
746                }
747
748                let rest = &text[i + ch.len_utf8()..];
749                if first_close_pos.is_none() {
750                    first_close_pos = Some(i + ch.len_utf8());
751                    first_body_len = body.len();
752                }
753
754                if starts_with_paired_delimiter(rest).is_some() {
755                    return (body, rest, true);
756                }
757
758                body.push(ch);
759            }
760            _ => body.push(ch),
761        }
762    }
763
764    if let Some(pos) = first_close_pos {
765        body.truncate(first_body_len);
766        return (body, &text[pos..], true);
767    }
768
769    (body, "", false)
770}
771
772fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
773    let mut escaped = false;
774
775    for (idx, ch) in pattern.char_indices() {
776        if escaped {
777            escaped = false;
778            continue;
779        }
780
781        if ch == '\\' {
782            escaped = true;
783            continue;
784        }
785
786        if is_paired_open(ch) {
787            let closing = get_closing_delimiter(ch);
788            let (replacement, rest, found_closing) =
789                extract_delimited_content_strict(&pattern[idx..], ch, closing);
790            if found_closing {
791                let leading = pattern[..idx].to_string();
792                return Some((leading, replacement, rest.to_string()));
793            }
794        }
795    }
796
797    None
798}
799
800fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
801    let mut escaped = false;
802    let mut candidates = Vec::new();
803
804    for (idx, ch) in text.char_indices() {
805        if escaped {
806            escaped = false;
807            continue;
808        }
809
810        if ch == '\\' {
811            escaped = true;
812            continue;
813        }
814
815        if is_paired_open(ch) {
816            candidates.push((idx, ch));
817        }
818    }
819
820    for (idx, ch) in candidates.into_iter().rev() {
821        let closing = get_closing_delimiter(ch);
822        let (replacement, rest, found_closing) =
823            extract_delimited_content_strict(&text[idx..], ch, closing);
824        if found_closing {
825            let leading = text[..idx].to_string();
826            return Some((leading, replacement, rest.to_string()));
827        }
828    }
829
830    None
831}
832
833/// Extract and validate substitution modifiers, returning only valid ones
834///
835/// Valid Perl substitution modifiers include:
836/// - Core modifiers: g, i, m, s, x, o, e, r
837/// - Charset modifiers (Perl 5.14+): a, d, l, u
838/// - Additional modifiers: n (5.22+), p, c
839///
840/// This function provides panic-safe modifier validation for substitution operators,
841/// filtering out invalid modifiers to prevent security vulnerabilities.
842fn extract_substitution_modifiers(text: &str) -> String {
843    text.chars()
844        .take_while(|c| c.is_ascii_alphabetic())
845        .filter(|&c| {
846            matches!(
847                c,
848                'g' | 'i'
849                    | 'm'
850                    | 's'
851                    | 'x'
852                    | 'o'
853                    | 'e'
854                    | 'r'
855                    | 'a'
856                    | 'd'
857                    | 'l'
858                    | 'u'
859                    | 'n'
860                    | 'p'
861                    | 'c'
862            )
863        })
864        .collect()
865}
866
867/// Validate substitution modifiers and return an error if any are invalid
868///
869/// Valid Perl substitution modifiers include:
870/// - Core modifiers: g, i, m, s, x, o, e, r
871/// - Charset modifiers (Perl 5.14+): a, d, l, u
872/// - Additional modifiers: n (5.22+), p, c
873///
874/// # Arguments
875///
876/// * `modifiers_str` - The raw modifier string following the substitution operator
877///
878/// # Returns
879///
880/// * `Ok(String)` - The validated modifiers if all are valid
881/// * `Err(char)` - The first invalid modifier character encountered
882///
883/// # Examples
884///
885/// ```ignore
886/// assert!(validate_substitution_modifiers("gi").is_ok());
887/// assert!(validate_substitution_modifiers("gia").is_ok());  // 'a' for ASCII mode
888/// assert!(validate_substitution_modifiers("giz").is_err()); // 'z' is invalid
889/// ```
890pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
891    let mut valid_modifiers = String::new();
892
893    for c in modifiers_str.chars() {
894        // Stop at non-alphabetic characters (end of modifiers)
895        if !c.is_ascii_alphabetic() {
896            // If it's whitespace or end of input, that's ok
897            if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
898                break;
899            }
900            // Non-alphabetic, non-whitespace character in modifier position is invalid
901            return Err(c);
902        }
903
904        // Check if it's a valid substitution modifier
905        if matches!(
906            c,
907            'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
908        ) {
909            valid_modifiers.push(c);
910        } else {
911            // Invalid alphabetic modifier
912            return Err(c);
913        }
914    }
915
916    Ok(valid_modifiers)
917}