Skip to main content

perl_parser_core/syntax/
quote.rs

1//! Uniform quote operator parsing for the Perl parser.
2//!
3//! This module provides consistent parsing for quote-like operators,
4//! properly extracting patterns, bodies, and modifiers.
5
6use std::borrow::Cow;
7
8/// Extract pattern and modifiers from a regex-like token (qr, m, or bare //)
9pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10    // Handle different prefixes
11    let content = if let Some(stripped) = text.strip_prefix("qr") {
12        stripped
13    } else if text.starts_with('m')
14        && text.len() > 1
15        && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16    {
17        &text[1..]
18    } else {
19        text
20    };
21
22    // Get delimiter - content must be non-empty to have a delimiter
23    let delimiter = match content.chars().next() {
24        Some(d) => d,
25        None => return (String::new(), String::new(), String::new()),
26    };
27    let closing = get_closing_delimiter(delimiter);
28
29    // Extract body and modifiers
30    let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32    // Include delimiters in the pattern string for compatibility
33    let pattern = format!("{}{}{}", delimiter, body, closing);
34
35    (pattern, body, modifiers.to_string())
36}
37
38/// Error type for substitution operator parsing failures
39#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41    /// Invalid modifier character found
42    InvalidModifier(char),
43    /// Missing delimiter after 's'
44    MissingDelimiter,
45    /// Pattern is missing or empty (just `s/`)
46    MissingPattern,
47    /// Replacement section is missing (e.g., `s/pattern` without replacement part)
48    MissingReplacement,
49    /// Closing delimiter is missing after replacement (e.g., `s/pattern/replacement` without final `/`)
50    MissingClosingDelimiter,
51}
52
53/// Error type for transliteration operator parsing failures
54#[derive(Debug, Clone, PartialEq)]
55pub enum TransliterationError {
56    /// Invalid modifier character found
57    InvalidModifier(char),
58    /// Invalid delimiter after `tr`/`y`
59    InvalidDelimiter(char),
60    /// Missing delimiter after `tr`/`y`
61    MissingDelimiter,
62    /// Search list section is missing
63    MissingSearch,
64    /// Replacement list section is missing
65    MissingReplacement,
66    /// Closing delimiter is missing
67    MissingClosingDelimiter,
68}
69
70/// Extract pattern, replacement, and modifiers from a substitution token with strict validation
71///
72/// This function parses substitution operators like s/pattern/replacement/flags
73/// and handles various delimiter forms including:
74/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
75/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
76///
77/// Unlike `extract_substitution_parts`, this function returns an error if invalid modifiers
78/// are present instead of silently filtering them.
79///
80/// # Errors
81///
82/// Returns `Err(SubstitutionError::InvalidModifier(c))` if an invalid modifier character is found.
83/// Valid modifiers are: g, i, m, s, x, o, e, r
84pub fn extract_substitution_parts_strict(
85    text: &str,
86) -> Result<(String, String, String), SubstitutionError> {
87    // Skip 's' prefix
88    let after_s = text.strip_prefix('s').unwrap_or(text);
89    // Perl allows whitespace between 's' and its delimiter (e.g. `s { pattern } { replacement }g`)
90    let content = after_s.trim_start();
91
92    // Get delimiter - check for missing delimiter (just 's' or 's' followed by nothing)
93    let delimiter = match content.chars().next() {
94        Some(d) => d,
95        None => return Err(SubstitutionError::MissingDelimiter),
96    };
97    let closing = get_closing_delimiter(delimiter);
98    let is_paired = delimiter != closing;
99
100    // Parse first body (pattern) with strict validation
101    let (pattern, rest1, pattern_closed) =
102        extract_delimited_content_strict(content, delimiter, closing);
103
104    // For non-paired delimiters: if pattern wasn't closed, missing closing delimiter
105    if !is_paired && !pattern_closed {
106        return Err(SubstitutionError::MissingClosingDelimiter);
107    }
108
109    // For paired delimiters: if pattern wasn't closed, missing closing delimiter
110    if is_paired && !pattern_closed {
111        return Err(SubstitutionError::MissingClosingDelimiter);
112    }
113
114    // Parse second body (replacement)
115    // For paired delimiters, the replacement may use a different delimiter than the pattern
116    // e.g., s[pattern]{replacement} is valid Perl
117    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
118        // Non-paired delimiters: must have replacement section
119        if rest1.is_empty() {
120            return Err(SubstitutionError::MissingReplacement);
121        }
122
123        // Parse replacement, skipping string literals so that delimiter chars
124        // inside "foo/bar" or 'a/b' don't terminate the replacement early.
125        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
126        (body, rest, found_closing)
127    } else {
128        // Paired pattern delimiters still allow either paired or non-paired delimiters
129        // for the replacement side (e.g. s{foo}/bar/ and s[foo]{bar}).
130        let trimmed = rest1.trim_start();
131        if let Some(rd) = trimmed.chars().next() {
132            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
133                return Err(SubstitutionError::MissingReplacement);
134            }
135            let repl_closing = get_closing_delimiter(rd);
136            extract_delimited_content_strict(trimmed, rd, repl_closing)
137        } else {
138            // No more content - missing replacement
139            return Err(SubstitutionError::MissingReplacement);
140        }
141    };
142
143    // For non-paired delimiters, must have found the closing delimiter for replacement
144    if !is_paired && !replacement_closed {
145        return Err(SubstitutionError::MissingClosingDelimiter);
146    }
147
148    // For paired delimiters, must have found the closing delimiter for replacement
149    if is_paired && !replacement_closed {
150        return Err(SubstitutionError::MissingClosingDelimiter);
151    }
152
153    // Validate modifiers strictly - reject if any invalid modifiers present
154    let modifiers = validate_substitution_modifiers(modifiers_str)
155        .map_err(SubstitutionError::InvalidModifier)?;
156
157    Ok((pattern, replacement, modifiers))
158}
159
160/// Extract content between delimiters with strict tracking of whether closing was found.
161/// Returns (content, rest, found_closing).
162fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
163    let mut chars = text.char_indices();
164    let is_paired = open != close;
165
166    // Skip opening delimiter
167    if let Some((_, c)) = chars.next() {
168        if c != open {
169            return (String::new(), text, false);
170        }
171    } else {
172        return (String::new(), "", false);
173    }
174
175    let mut body = String::new();
176    let mut depth = if is_paired { 1 } else { 0 };
177    let mut escaped = false;
178    let mut end_pos = text.len();
179    let mut found_closing = false;
180
181    for (i, ch) in chars {
182        if escaped {
183            body.push(ch);
184            escaped = false;
185            continue;
186        }
187
188        match ch {
189            '\\' => {
190                body.push(ch);
191                escaped = true;
192            }
193            c if c == open && is_paired => {
194                body.push(ch);
195                depth += 1;
196            }
197            c if c == close => {
198                if is_paired {
199                    depth -= 1;
200                    if depth == 0 {
201                        end_pos = i + ch.len_utf8();
202                        found_closing = true;
203                        break;
204                    }
205                    body.push(ch);
206                } else {
207                    end_pos = i + ch.len_utf8();
208                    found_closing = true;
209                    break;
210                }
211            }
212            _ => body.push(ch),
213        }
214    }
215
216    (body, &text[end_pos..], found_closing)
217}
218
219/// Extract pattern, replacement, and modifiers from a substitution token
220///
221/// This function parses substitution operators like s/pattern/replacement/flags
222/// and handles various delimiter forms including:
223/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
224/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
225///
226/// For paired delimiters, properly handles nested delimiters within the pattern
227/// or replacement parts. Returns (pattern, replacement, modifiers) as strings.
228///
229/// Note: This function silently filters invalid modifiers. For strict validation,
230/// use `extract_substitution_parts_strict` instead.
231pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
232    // Skip 's' prefix
233    let content = text.strip_prefix('s').unwrap_or(text);
234
235    // Get delimiter - content must be non-empty to have a delimiter
236    let delimiter = match content.chars().next() {
237        Some(d) => d,
238        None => return (String::new(), String::new(), String::new()),
239    };
240    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
241        if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
242        {
243            let modifiers = extract_substitution_modifiers(&modifiers_str);
244            return (pattern, replacement, modifiers);
245        }
246
247        return (String::new(), String::new(), String::new());
248    }
249    let closing = get_closing_delimiter(delimiter);
250    let is_paired = delimiter != closing;
251
252    // Parse first body (pattern)
253    let (mut pattern, rest1, pattern_closed) = if is_paired {
254        extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
255    } else {
256        extract_delimited_content_strict(content, delimiter, closing)
257    };
258
259    // Parse second body (replacement)
260    // For paired delimiters, the replacement may use a different delimiter than the pattern
261    // e.g., s[pattern]{replacement} is valid Perl
262    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
263        // Non-paired delimiters: manually parse the replacement, skipping string literals
264        // so that delimiter chars inside "foo/bar" or 'a/b' don't end the replacement early.
265        let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
266        (body, Cow::Borrowed(rest))
267    } else if !is_paired && !pattern_closed {
268        if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
269            split_unclosed_substitution_pattern(&pattern)
270        {
271            pattern = fallback_pattern;
272            (fallback_replacement, Cow::Owned(fallback_modifiers))
273        } else {
274            (String::new(), Cow::Borrowed(rest1))
275        }
276    } else if is_paired {
277        let trimmed = rest1.trim_start();
278        if let Some(rd) = trimmed.chars().next() {
279            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
280                (String::new(), Cow::Borrowed(trimmed))
281            } else {
282                let repl_closing = get_closing_delimiter(rd);
283                let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
284                (body, Cow::Borrowed(rest))
285            }
286        } else {
287            (String::new(), Cow::Borrowed(trimmed))
288        }
289    } else {
290        (String::new(), Cow::Borrowed(rest1))
291    };
292
293    // Extract and validate only valid substitution modifiers
294    let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
295
296    (pattern, replacement, modifiers)
297}
298
299/// Extract search, replace, and modifiers from a transliteration token
300pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
301    // Skip 'tr' or 'y' prefix
302    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
303        stripped
304    } else if let Some(stripped) = text.strip_prefix('y') {
305        stripped
306    } else {
307        text
308    };
309    let content = after_op.trim_start();
310
311    // Get delimiter - content must be non-empty to have a delimiter
312    let delimiter = match content.chars().next() {
313        Some(d) => d,
314        None => return (String::new(), String::new(), String::new()),
315    };
316    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
317        return (String::new(), String::new(), String::new());
318    }
319    let closing = get_closing_delimiter(delimiter);
320    let is_paired = delimiter != closing;
321
322    // Parse first body (search pattern)
323    let (search, rest1) = extract_delimited_content(content, delimiter, closing);
324
325    // For paired delimiters, skip whitespace and allow any paired opening delimiter for the
326    // replacement list. Perl accepts forms like tr[abc]{xyz} in addition to tr[abc][xyz].
327    let rest2_owned;
328    let rest2 = if is_paired {
329        rest1.trim_start()
330    } else {
331        rest2_owned = format!("{}{}", delimiter, rest1);
332        &rest2_owned
333    };
334
335    // Parse second body (replacement pattern)
336    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
337        // Manually parse the replacement for non-paired delimiters
338        let chars = rest1.char_indices();
339        let mut body = String::new();
340        let mut escaped = false;
341        let mut end_pos = rest1.len();
342
343        for (i, ch) in chars {
344            if escaped {
345                body.push(ch);
346                escaped = false;
347                continue;
348            }
349
350            match ch {
351                '\\' => {
352                    body.push(ch);
353                    escaped = true;
354                }
355                c if c == closing => {
356                    end_pos = i + ch.len_utf8();
357                    break;
358                }
359                _ => body.push(ch),
360            }
361        }
362
363        (body, &rest1[end_pos..])
364    } else if is_paired {
365        if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
366            let repl_closing = get_closing_delimiter(repl_delimiter);
367            extract_delimited_content(rest2, repl_delimiter, repl_closing)
368        } else if let Some(repl_delimiter) = rest2.chars().next() {
369            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
370                (String::new(), rest2)
371            } else {
372                extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
373            }
374        } else {
375            (String::new(), rest2)
376        }
377    } else {
378        (String::new(), rest1)
379    };
380
381    // Extract and validate only valid transliteration modifiers
382    // Security fix: Apply consistent validation for all delimiter types
383    let modifiers = modifiers_str
384        .chars()
385        .take_while(|c| c.is_ascii_alphabetic())
386        .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
387        .collect();
388
389    (search, replacement, modifiers)
390}
391
392/// Extract search, replace, and modifiers from a transliteration token with strict validation.
393///
394/// Supports both `tr///` and `y///` syntax, including optional whitespace between
395/// the operator and delimiter (e.g. `tr /a/b/`).
396///
397/// # Errors
398///
399/// Returns `Err(TransliterationError::InvalidModifier(c))` if an invalid modifier
400/// character is encountered. Valid modifiers are: `c`, `d`, `s`, `r`.
401pub fn extract_transliteration_parts_strict(
402    text: &str,
403) -> Result<(String, String, String), TransliterationError> {
404    // Skip `tr` or `y` prefix, then allow optional whitespace before delimiter.
405    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
406        stripped
407    } else if let Some(stripped) = text.strip_prefix('y') {
408        stripped
409    } else {
410        text
411    };
412    let content = after_op.trim_start();
413
414    // Get delimiter.
415    let delimiter = match content.chars().next() {
416        Some(d) => d,
417        None => return Err(TransliterationError::MissingDelimiter),
418    };
419    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
420        return Err(TransliterationError::InvalidDelimiter(delimiter));
421    }
422    let closing = get_closing_delimiter(delimiter);
423    let is_paired = delimiter != closing;
424
425    // Parse first body (search).
426    let (search, rest1, search_closed) =
427        extract_delimited_content_strict(content, delimiter, closing);
428    if !search_closed {
429        return Err(TransliterationError::MissingClosingDelimiter);
430    }
431
432    // Parse second body (replacement).
433    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
434        if rest1.is_empty() {
435            return Err(TransliterationError::MissingReplacement);
436        }
437        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
438        (body, rest, found_closing)
439    } else {
440        let trimmed = rest1.trim_start();
441        if let Some(repl_delimiter) = trimmed.chars().next() {
442            // After a paired search delimiter (e.g. `{...}`), the replacement must
443            // also start with a valid non-alphanumeric, non-whitespace delimiter.
444            // An alphanumeric character here (e.g. `tr{abc}xyz`) is an invalid
445            // delimiter, not merely a missing replacement section.
446            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
447                return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
448            }
449            let repl_closing = get_closing_delimiter(repl_delimiter);
450            let (body, rest, found_closing) =
451                extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
452            (body, rest, found_closing)
453        } else {
454            return Err(TransliterationError::MissingReplacement);
455        }
456    };
457
458    if !replacement_closed {
459        return Err(TransliterationError::MissingClosingDelimiter);
460    }
461
462    if search.is_empty() {
463        return Err(TransliterationError::MissingSearch);
464    }
465
466    // Validate transliteration modifiers strictly.
467    let mut modifiers = String::new();
468    for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
469        if matches!(modifier, 'c' | 'd' | 's' | 'r') {
470            modifiers.push(modifier);
471        } else {
472            return Err(TransliterationError::InvalidModifier(modifier));
473        }
474    }
475
476    Ok((search, replacement, modifiers))
477}
478
479/// Get the closing delimiter for a given opening delimiter
480fn get_closing_delimiter(open: char) -> char {
481    match open {
482        '(' => ')',
483        '[' => ']',
484        '{' => '}',
485        '<' => '>',
486        _ => open,
487    }
488}
489
490fn is_paired_open(ch: char) -> bool {
491    matches!(ch, '{' | '[' | '(' | '<')
492}
493
494fn starts_with_paired_delimiter(text: &str) -> Option<char> {
495    let trimmed = text.trim_start();
496    match trimmed.chars().next() {
497        Some(ch) if is_paired_open(ch) => Some(ch),
498        _ => None,
499    }
500}
501
502/// Extract content between delimiters and return (content, rest)
503fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
504    let mut chars = text.char_indices();
505    let is_paired = open != close;
506
507    // Skip opening delimiter
508    if let Some((_, c)) = chars.next() {
509        if c != open {
510            return (String::new(), text);
511        }
512    } else {
513        return (String::new(), "");
514    }
515
516    let mut body = String::new();
517    let mut depth = if is_paired { 1 } else { 0 };
518    let mut escaped = false;
519    let mut end_pos = text.len();
520
521    for (i, ch) in chars {
522        if escaped {
523            body.push(ch);
524            escaped = false;
525            continue;
526        }
527
528        match ch {
529            '\\' => {
530                body.push(ch);
531                escaped = true;
532            }
533            c if c == open && is_paired => {
534                body.push(ch);
535                depth += 1;
536            }
537            c if c == close => {
538                if is_paired {
539                    depth -= 1;
540                    if depth == 0 {
541                        end_pos = i + ch.len_utf8();
542                        break;
543                    }
544                    body.push(ch);
545                } else {
546                    end_pos = i + ch.len_utf8();
547                    break;
548                }
549            }
550            _ => body.push(ch),
551        }
552    }
553
554    (body, &text[end_pos..])
555}
556
557/// Lookahead helper: determine whether a `quote` char at byte `pos` in `text` is the
558/// opening of a genuine inner string literal that protects `closing` delimiter chars.
559///
560/// Returns `Some((end_pos, true))` when:
561///   - A matching closing `quote` is found on the SAME LINE (no `\n` crossed), AND
562///   - The content between the two `quote` chars contains `closing`.
563///   - `end_pos` is the byte offset just after the closing `quote`.
564///
565/// Returns `None` (or `Some((_, false))`) when:
566///   - A newline or end of `text` is reached before the matching closing `quote`, OR
567///   - The string content does not contain `closing`.
568///
569/// Stopping at newlines prevents cross-statement false positives in multiline source.
570fn scan_inner_string(
571    text: &str,
572    pos: usize,
573    quote: char,
574    delimiter: char,
575) -> Option<(usize, bool)> {
576    let start = pos + quote.len_utf8();
577    let rest = text.get(start..)?;
578    let mut escaped = false;
579    let mut contains_delim = false;
580    let mut end_of_string = None;
581    let mut local_pos = start;
582    for ch in rest.chars() {
583        if escaped {
584            escaped = false;
585            local_pos += ch.len_utf8();
586            continue;
587        }
588        if ch == '\\' {
589            escaped = true;
590            local_pos += ch.len_utf8();
591            continue;
592        }
593        // Newline terminates the scan: inner string literals don't span lines.
594        if ch == '\n' {
595            return None;
596        }
597        if ch == delimiter {
598            contains_delim = true;
599        }
600        if ch == quote {
601            end_of_string = Some(local_pos + ch.len_utf8());
602            break;
603        }
604        local_pos += ch.len_utf8();
605    }
606    end_of_string.map(|end| (end, contains_delim))
607}
608
609/// Like `extract_unpaired_body` but skips over string literals (`"..."` / `'...'`)
610/// so that the closing delimiter character inside a string is not mistaken for the
611/// end of the replacement section.  Returns `(body, rest, found_closing)`.
612///
613/// Uses lookahead to determine whether a `'` or `"` is actually an inner string:
614/// only enters string-skip mode when the candidate string (a) has a matching closing
615/// quote on the same line AND (b) contains the closing delimiter in its content.
616/// This prevents lone apostrophes (e.g. the `'` in `s/''/'/g`) from triggering
617/// string-skip, which would cause replacement scanning to cross statement boundaries.
618fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
619    let mut body = String::new();
620    let mut end_pos = text.len();
621    let mut found_closing = false;
622    let mut pos = 0usize;
623    let mut escaped = false;
624
625    while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
626        if escaped {
627            body.push(ch);
628            escaped = false;
629            pos += ch.len_utf8();
630            continue;
631        }
632
633        match ch {
634            '\\' => {
635                body.push(ch);
636                escaped = true;
637                pos += ch.len_utf8();
638            }
639            // Skip over string literals to avoid treating delimiter chars inside
640            // "foo/bar" or 'a/b' as the closing delimiter of the replacement.
641            //
642            // Guard: only enter string-skip when lookahead confirms a matching closing
643            // quote exists on the same line AND the content contains the closing delimiter.
644            '"' | '\'' if ch != closing => {
645                let quote = ch;
646                match scan_inner_string(text, pos, quote, closing) {
647                    Some((string_end, true)) => {
648                        // String content contains the closing delimiter → skip the string.
649                        let string_text = &text[pos..string_end];
650                        body.push_str(string_text);
651                        pos = string_end;
652                    }
653                    _ => {
654                        // No closing quote on same line, or content has no delimiter:
655                        // treat the opening quote as a literal character.
656                        body.push(ch);
657                        pos += ch.len_utf8();
658                    }
659                }
660            }
661            c if c == closing => {
662                end_pos = pos + ch.len_utf8();
663                found_closing = true;
664                break;
665            }
666            _ => {
667                body.push(ch);
668                pos += ch.len_utf8();
669            }
670        }
671    }
672
673    (body, &text[end_pos..], found_closing)
674}
675
676fn extract_substitution_pattern_with_replacement_hint(
677    text: &str,
678    open: char,
679    close: char,
680) -> (String, &str, bool) {
681    let mut chars = text.char_indices();
682
683    // Skip opening delimiter
684    if let Some((_, c)) = chars.next() {
685        if c != open {
686            return (String::new(), text, false);
687        }
688    } else {
689        return (String::new(), "", false);
690    }
691
692    let mut body = String::new();
693    let mut depth = 1usize;
694    let mut escaped = false;
695    let mut first_close_pos: Option<usize> = None;
696    let mut first_body_len: usize = 0;
697
698    for (i, ch) in chars {
699        if escaped {
700            body.push(ch);
701            escaped = false;
702            continue;
703        }
704
705        match ch {
706            '\\' => {
707                body.push(ch);
708                escaped = true;
709            }
710            c if c == open => {
711                body.push(ch);
712                depth += 1;
713            }
714            c if c == close => {
715                if depth > 1 {
716                    depth -= 1;
717                    body.push(ch);
718                    continue;
719                }
720
721                let rest = &text[i + ch.len_utf8()..];
722                if first_close_pos.is_none() {
723                    first_close_pos = Some(i + ch.len_utf8());
724                    first_body_len = body.len();
725                }
726
727                if starts_with_paired_delimiter(rest).is_some() {
728                    return (body, rest, true);
729                }
730
731                body.push(ch);
732            }
733            _ => body.push(ch),
734        }
735    }
736
737    if let Some(pos) = first_close_pos {
738        body.truncate(first_body_len);
739        return (body, &text[pos..], true);
740    }
741
742    (body, "", false)
743}
744
745fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
746    let mut escaped = false;
747
748    for (idx, ch) in pattern.char_indices() {
749        if escaped {
750            escaped = false;
751            continue;
752        }
753
754        if ch == '\\' {
755            escaped = true;
756            continue;
757        }
758
759        if is_paired_open(ch) {
760            let closing = get_closing_delimiter(ch);
761            let (replacement, rest, found_closing) =
762                extract_delimited_content_strict(&pattern[idx..], ch, closing);
763            if found_closing {
764                let leading = pattern[..idx].to_string();
765                return Some((leading, replacement, rest.to_string()));
766            }
767        }
768    }
769
770    None
771}
772
773fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
774    let mut escaped = false;
775    let mut candidates = Vec::new();
776
777    for (idx, ch) in text.char_indices() {
778        if escaped {
779            escaped = false;
780            continue;
781        }
782
783        if ch == '\\' {
784            escaped = true;
785            continue;
786        }
787
788        if is_paired_open(ch) {
789            candidates.push((idx, ch));
790        }
791    }
792
793    for (idx, ch) in candidates.into_iter().rev() {
794        let closing = get_closing_delimiter(ch);
795        let (replacement, rest, found_closing) =
796            extract_delimited_content_strict(&text[idx..], ch, closing);
797        if found_closing {
798            let leading = text[..idx].to_string();
799            return Some((leading, replacement, rest.to_string()));
800        }
801    }
802
803    None
804}
805
806/// Extract and validate substitution modifiers, returning only valid ones
807///
808/// Valid Perl substitution modifiers include:
809/// - Core modifiers: g, i, m, s, x, o, e, r
810/// - Charset modifiers (Perl 5.14+): a, d, l, u
811/// - Additional modifiers: n (5.22+), p, c
812///
813/// This function provides panic-safe modifier validation for substitution operators,
814/// filtering out invalid modifiers to prevent security vulnerabilities.
815fn extract_substitution_modifiers(text: &str) -> String {
816    text.chars()
817        .take_while(|c| c.is_ascii_alphabetic())
818        .filter(|&c| {
819            matches!(
820                c,
821                'g' | 'i'
822                    | 'm'
823                    | 's'
824                    | 'x'
825                    | 'o'
826                    | 'e'
827                    | 'r'
828                    | 'a'
829                    | 'd'
830                    | 'l'
831                    | 'u'
832                    | 'n'
833                    | 'p'
834                    | 'c'
835            )
836        })
837        .collect()
838}
839
840/// Validate substitution modifiers and return an error if any are invalid
841///
842/// Valid Perl substitution modifiers include:
843/// - Core modifiers: g, i, m, s, x, o, e, r
844/// - Charset modifiers (Perl 5.14+): a, d, l, u
845/// - Additional modifiers: n (5.22+), p, c
846///
847/// # Arguments
848///
849/// * `modifiers_str` - The raw modifier string following the substitution operator
850///
851/// # Returns
852///
853/// * `Ok(String)` - The validated modifiers if all are valid
854/// * `Err(char)` - The first invalid modifier character encountered
855///
856/// # Examples
857///
858/// ```ignore
859/// assert!(validate_substitution_modifiers("gi").is_ok());
860/// assert!(validate_substitution_modifiers("gia").is_ok());  // 'a' for ASCII mode
861/// assert!(validate_substitution_modifiers("giz").is_err()); // 'z' is invalid
862/// ```
863pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
864    let mut valid_modifiers = String::new();
865
866    for c in modifiers_str.chars() {
867        // Stop at non-alphabetic characters (end of modifiers)
868        if !c.is_ascii_alphabetic() {
869            // If it's whitespace or end of input, that's ok
870            if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
871                break;
872            }
873            // Non-alphabetic, non-whitespace character in modifier position is invalid
874            return Err(c);
875        }
876
877        // Check if it's a valid substitution modifier
878        if matches!(
879            c,
880            'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
881        ) {
882            valid_modifiers.push(c);
883        } else {
884            // Invalid alphabetic modifier
885            return Err(c);
886        }
887    }
888
889    Ok(valid_modifiers)
890}