Skip to main content

perl_parser_core/syntax/
quote.rs

1//! Uniform quote operator parsing for the Perl parser.
2//!
3//! This module provides consistent parsing for quote-like operators,
4//! properly extracting patterns, bodies, and modifiers.
5
6use std::borrow::Cow;
7
8/// Extract pattern and modifiers from a regex-like token (qr, m, or bare //)
9pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10    // Handle different prefixes
11    let content = if let Some(stripped) = text.strip_prefix("qr") {
12        stripped
13    } else if text.starts_with('m')
14        && text.len() > 1
15        && text.chars().nth(1).is_some_and(|c| !c.is_alphabetic())
16    {
17        &text[1..]
18    } else {
19        text
20    };
21
22    // Get delimiter - content must be non-empty to have a delimiter
23    let delimiter = match content.chars().next() {
24        Some(d) => d,
25        None => return (String::new(), String::new(), String::new()),
26    };
27    let closing = get_closing_delimiter(delimiter);
28
29    // Extract body and modifiers
30    let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
31
32    // Include delimiters in the pattern string for compatibility
33    let pattern = format!("{}{}{}", delimiter, body, closing);
34
35    (pattern, body, modifiers.to_string())
36}
37
38/// Error type for substitution operator parsing failures
39#[derive(Debug, Clone, PartialEq)]
40pub enum SubstitutionError {
41    /// Invalid modifier character found
42    InvalidModifier(char),
43    /// Missing delimiter after 's'
44    MissingDelimiter,
45    /// Pattern is missing or empty (just `s/`)
46    MissingPattern,
47    /// Replacement section is missing (e.g., `s/pattern` without replacement part)
48    MissingReplacement,
49    /// Closing delimiter is missing after replacement (e.g., `s/pattern/replacement` without final `/`)
50    MissingClosingDelimiter,
51}
52
53/// Error type for transliteration operator parsing failures
54#[derive(Debug, Clone, PartialEq)]
55pub enum TransliterationError {
56    /// Invalid modifier character found
57    InvalidModifier(char),
58    /// Invalid delimiter after `tr`/`y`
59    InvalidDelimiter(char),
60    /// Missing delimiter after `tr`/`y`
61    MissingDelimiter,
62    /// Search list section is missing
63    MissingSearch,
64    /// Replacement list section is missing
65    MissingReplacement,
66    /// Closing delimiter is missing
67    MissingClosingDelimiter,
68}
69
70/// Extract pattern, replacement, and modifiers from a substitution token with strict validation
71///
72/// This function parses substitution operators like s/pattern/replacement/flags
73/// and handles various delimiter forms including:
74/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
75/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
76///
77/// Unlike `extract_substitution_parts`, this function returns an error if invalid modifiers
78/// are present instead of silently filtering them.
79///
80/// # Errors
81///
82/// Returns `Err(SubstitutionError::InvalidModifier(c))` if an invalid modifier character is found.
83/// Valid modifiers are: g, i, m, s, x, o, e, r
84pub fn extract_substitution_parts_strict(
85    text: &str,
86) -> Result<(String, String, String), SubstitutionError> {
87    // Skip 's' prefix
88    let after_s = text.strip_prefix('s').unwrap_or(text);
89    // Perl allows whitespace between 's' and its delimiter (e.g. `s { pattern } { replacement }g`)
90    let content = after_s.trim_start();
91
92    // Get delimiter - check for missing delimiter (just 's' or 's' followed by nothing)
93    let delimiter = match content.chars().next() {
94        Some(d) => d,
95        None => return Err(SubstitutionError::MissingDelimiter),
96    };
97    let closing = get_closing_delimiter(delimiter);
98    let is_paired = delimiter != closing;
99
100    // Parse first body (pattern) with strict validation
101    let (pattern, rest1, pattern_closed) =
102        extract_delimited_content_strict(content, delimiter, closing);
103
104    // For non-paired delimiters: if pattern wasn't closed, missing closing delimiter
105    if !is_paired && !pattern_closed {
106        return Err(SubstitutionError::MissingClosingDelimiter);
107    }
108
109    // For paired delimiters: if pattern wasn't closed, missing closing delimiter
110    if is_paired && !pattern_closed {
111        return Err(SubstitutionError::MissingClosingDelimiter);
112    }
113
114    // Parse second body (replacement)
115    // For paired delimiters, the replacement may use a different delimiter than the pattern
116    // e.g., s[pattern]{replacement} is valid Perl
117    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
118        // Non-paired delimiters: must have replacement section
119        if rest1.is_empty() {
120            return Err(SubstitutionError::MissingReplacement);
121        }
122
123        // Parse replacement, skipping string literals so that delimiter chars
124        // inside "foo/bar" or 'a/b' don't terminate the replacement early.
125        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
126        (body, rest, found_closing)
127    } else {
128        // Paired pattern delimiters still allow either paired or non-paired delimiters
129        // for the replacement side (e.g. s{foo}/bar/ and s[foo]{bar}).
130        let trimmed = skip_paired_replacement_gap(rest1);
131        if let Some(rd) = trimmed.chars().next() {
132            let repl_closing = get_closing_delimiter(rd);
133            extract_delimited_content_strict(trimmed, rd, repl_closing)
134        } else {
135            // No more content - missing replacement
136            return Err(SubstitutionError::MissingReplacement);
137        }
138    };
139
140    // For non-paired delimiters, must have found the closing delimiter for replacement
141    if !is_paired && !replacement_closed {
142        return Err(SubstitutionError::MissingClosingDelimiter);
143    }
144
145    // For paired delimiters, must have found the closing delimiter for replacement
146    if is_paired && !replacement_closed {
147        return Err(SubstitutionError::MissingClosingDelimiter);
148    }
149
150    // Validate modifiers strictly - reject if any invalid modifiers present
151    let modifiers = validate_substitution_modifiers(modifiers_str)
152        .map_err(SubstitutionError::InvalidModifier)?;
153
154    Ok((pattern, replacement, modifiers))
155}
156
157fn skip_paired_replacement_gap(mut text: &str) -> &str {
158    let mut comment_eligible = false;
159    loop {
160        let trimmed = text.trim_start_matches(char::is_whitespace);
161        let saw_whitespace = trimmed.len() != text.len();
162        text = trimmed;
163        comment_eligible |= saw_whitespace;
164
165        if comment_eligible && text.starts_with('#') {
166            text = after_line_comment(text);
167            comment_eligible = true;
168            continue;
169        }
170
171        return text;
172    }
173}
174
175fn after_line_comment(text: &str) -> &str {
176    for (idx, ch) in text.char_indices() {
177        if matches!(ch, '\n' | '\r') {
178            return &text[idx + ch.len_utf8()..];
179        }
180    }
181    ""
182}
183
184/// Extract content between delimiters with strict tracking of whether closing was found.
185/// Returns (content, rest, found_closing).
186fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
187    let mut chars = text.char_indices();
188    let is_paired = open != close;
189
190    // Skip opening delimiter
191    if let Some((_, c)) = chars.next() {
192        if c != open {
193            return (String::new(), text, false);
194        }
195    } else {
196        return (String::new(), "", false);
197    }
198
199    let mut body = String::new();
200    let mut depth = if is_paired { 1 } else { 0 };
201    let mut escaped = false;
202    let mut end_pos = text.len();
203    let mut found_closing = false;
204
205    for (i, ch) in chars {
206        if escaped {
207            body.push(ch);
208            escaped = false;
209            continue;
210        }
211
212        match ch {
213            '\\' => {
214                body.push(ch);
215                escaped = true;
216            }
217            c if c == open && is_paired => {
218                body.push(ch);
219                depth += 1;
220            }
221            c if c == close => {
222                if is_paired {
223                    depth -= 1;
224                    if depth == 0 {
225                        end_pos = i + ch.len_utf8();
226                        found_closing = true;
227                        break;
228                    }
229                    body.push(ch);
230                } else {
231                    end_pos = i + ch.len_utf8();
232                    found_closing = true;
233                    break;
234                }
235            }
236            _ => body.push(ch),
237        }
238    }
239
240    (body, &text[end_pos..], found_closing)
241}
242
243/// Extract pattern, replacement, and modifiers from a substitution token
244///
245/// This function parses substitution operators like s/pattern/replacement/flags
246/// and handles various delimiter forms including:
247/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
248/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
249///
250/// For paired delimiters, properly handles nested delimiters within the pattern
251/// or replacement parts. Returns (pattern, replacement, modifiers) as strings.
252///
253/// Note: This function silently filters invalid modifiers. For strict validation,
254/// use `extract_substitution_parts_strict` instead.
255pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
256    // Skip 's' prefix
257    let content = text.strip_prefix('s').unwrap_or(text);
258
259    // Get delimiter - content must be non-empty to have a delimiter
260    let delimiter = match content.chars().next() {
261        Some(d) => d,
262        None => return (String::new(), String::new(), String::new()),
263    };
264    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
265        if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
266        {
267            let modifiers = extract_substitution_modifiers(&modifiers_str);
268            return (pattern, replacement, modifiers);
269        }
270
271        return (String::new(), String::new(), String::new());
272    }
273    let closing = get_closing_delimiter(delimiter);
274    let is_paired = delimiter != closing;
275
276    // Parse first body (pattern)
277    let (mut pattern, rest1, pattern_closed) = if is_paired {
278        extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
279    } else {
280        extract_delimited_content_strict(content, delimiter, closing)
281    };
282
283    // Parse second body (replacement)
284    // For paired delimiters, the replacement may use a different delimiter than the pattern
285    // e.g., s[pattern]{replacement} is valid Perl
286    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
287        // Non-paired delimiters: manually parse the replacement, skipping string literals
288        // so that delimiter chars inside "foo/bar" or 'a/b' don't end the replacement early.
289        let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
290        (body, Cow::Borrowed(rest))
291    } else if !is_paired && !pattern_closed {
292        if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
293            split_unclosed_substitution_pattern(&pattern)
294        {
295            pattern = fallback_pattern;
296            (fallback_replacement, Cow::Owned(fallback_modifiers))
297        } else {
298            (String::new(), Cow::Borrowed(rest1))
299        }
300    } else if is_paired {
301        let trimmed = rest1.trim_start();
302        if let Some(rd) = trimmed.chars().next() {
303            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
304                (String::new(), Cow::Borrowed(trimmed))
305            } else {
306                let repl_closing = get_closing_delimiter(rd);
307                let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
308                (body, Cow::Borrowed(rest))
309            }
310        } else {
311            (String::new(), Cow::Borrowed(trimmed))
312        }
313    } else {
314        (String::new(), Cow::Borrowed(rest1))
315    };
316
317    // Extract and validate only valid substitution modifiers
318    let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
319
320    (pattern, replacement, modifiers)
321}
322
323/// Extract search, replace, and modifiers from a transliteration token
324pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
325    // Skip 'tr' or 'y' prefix
326    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
327        stripped
328    } else if let Some(stripped) = text.strip_prefix('y') {
329        stripped
330    } else {
331        text
332    };
333    let content = after_op.trim_start();
334
335    // Get delimiter - content must be non-empty to have a delimiter
336    let delimiter = match content.chars().next() {
337        Some(d) => d,
338        None => return (String::new(), String::new(), String::new()),
339    };
340    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
341        return (String::new(), String::new(), String::new());
342    }
343    let closing = get_closing_delimiter(delimiter);
344    let is_paired = delimiter != closing;
345
346    // Parse first body (search pattern)
347    let (search, rest1) = extract_delimited_content(content, delimiter, closing);
348
349    // For paired delimiters, skip whitespace and allow any paired opening delimiter for the
350    // replacement list. Perl accepts forms like tr[abc]{xyz} in addition to tr[abc][xyz].
351    let rest2_owned;
352    let rest2 = if is_paired {
353        rest1.trim_start()
354    } else {
355        rest2_owned = format!("{}{}", delimiter, rest1);
356        &rest2_owned
357    };
358
359    // Parse second body (replacement pattern)
360    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
361        // Manually parse the replacement for non-paired delimiters
362        let chars = rest1.char_indices();
363        let mut body = String::new();
364        let mut escaped = false;
365        let mut end_pos = rest1.len();
366
367        for (i, ch) in chars {
368            if escaped {
369                body.push(ch);
370                escaped = false;
371                continue;
372            }
373
374            match ch {
375                '\\' => {
376                    body.push(ch);
377                    escaped = true;
378                }
379                c if c == closing => {
380                    end_pos = i + ch.len_utf8();
381                    break;
382                }
383                _ => body.push(ch),
384            }
385        }
386
387        (body, &rest1[end_pos..])
388    } else if is_paired {
389        if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
390            let repl_closing = get_closing_delimiter(repl_delimiter);
391            extract_delimited_content(rest2, repl_delimiter, repl_closing)
392        } else if let Some(repl_delimiter) = rest2.chars().next() {
393            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
394                (String::new(), rest2)
395            } else {
396                extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
397            }
398        } else {
399            (String::new(), rest2)
400        }
401    } else {
402        (String::new(), rest1)
403    };
404
405    // Extract and validate only valid transliteration modifiers
406    // Security fix: Apply consistent validation for all delimiter types
407    let modifiers = modifiers_str
408        .chars()
409        .take_while(|c| c.is_ascii_alphabetic())
410        .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
411        .collect();
412
413    (search, replacement, modifiers)
414}
415
416/// Extract search, replace, and modifiers from a transliteration token with strict validation.
417///
418/// Supports both `tr///` and `y///` syntax, including optional whitespace between
419/// the operator and delimiter (e.g. `tr /a/b/`).
420///
421/// # Errors
422///
423/// Returns `Err(TransliterationError::InvalidModifier(c))` if an invalid modifier
424/// character is encountered. Valid modifiers are: `c`, `d`, `s`, `r`.
425pub fn extract_transliteration_parts_strict(
426    text: &str,
427) -> Result<(String, String, String), TransliterationError> {
428    // Skip `tr` or `y` prefix, then allow optional whitespace before delimiter.
429    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
430        stripped
431    } else if let Some(stripped) = text.strip_prefix('y') {
432        stripped
433    } else {
434        text
435    };
436    let content = after_op.trim_start();
437
438    // Get delimiter.
439    let delimiter = match content.chars().next() {
440        Some(d) => d,
441        None => return Err(TransliterationError::MissingDelimiter),
442    };
443    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
444        return Err(TransliterationError::InvalidDelimiter(delimiter));
445    }
446    let closing = get_closing_delimiter(delimiter);
447    let is_paired = delimiter != closing;
448
449    // Parse first body (search).
450    let (search, rest1, search_closed) =
451        extract_delimited_content_strict(content, delimiter, closing);
452    if !search_closed {
453        return Err(TransliterationError::MissingClosingDelimiter);
454    }
455
456    // Parse second body (replacement).
457    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
458        if rest1.is_empty() {
459            return Err(TransliterationError::MissingReplacement);
460        }
461        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
462        (body, rest, found_closing)
463    } else {
464        let trimmed = skip_paired_replacement_gap(rest1);
465        if let Some(repl_delimiter) = trimmed.chars().next() {
466            // After a paired search delimiter (e.g. `{...}`), the replacement must
467            // also start with a valid non-alphanumeric, non-whitespace delimiter.
468            // An alphanumeric character here (e.g. `tr{abc}xyz`) is an invalid
469            // delimiter, not merely a missing replacement section.
470            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
471                return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
472            }
473            let repl_closing = get_closing_delimiter(repl_delimiter);
474            let (body, rest, found_closing) =
475                extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
476            (body, rest, found_closing)
477        } else {
478            return Err(TransliterationError::MissingReplacement);
479        }
480    };
481
482    if !replacement_closed {
483        return Err(TransliterationError::MissingClosingDelimiter);
484    }
485
486    if search.is_empty() {
487        return Err(TransliterationError::MissingSearch);
488    }
489
490    // Validate transliteration modifiers strictly.
491    let mut modifiers = String::new();
492    for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
493        if matches!(modifier, 'c' | 'd' | 's' | 'r') {
494            modifiers.push(modifier);
495        } else {
496            return Err(TransliterationError::InvalidModifier(modifier));
497        }
498    }
499
500    Ok((search, replacement, modifiers))
501}
502
503/// Get the closing delimiter for a given opening delimiter
504fn get_closing_delimiter(open: char) -> char {
505    match open {
506        '(' => ')',
507        '[' => ']',
508        '{' => '}',
509        '<' => '>',
510        _ => open,
511    }
512}
513
514fn is_paired_open(ch: char) -> bool {
515    matches!(ch, '{' | '[' | '(' | '<')
516}
517
518fn starts_with_paired_delimiter(text: &str) -> Option<char> {
519    let trimmed = text.trim_start();
520    match trimmed.chars().next() {
521        Some(ch) if is_paired_open(ch) => Some(ch),
522        _ => None,
523    }
524}
525
526/// Extract content between delimiters and return (content, rest)
527fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
528    let mut chars = text.char_indices();
529    let is_paired = open != close;
530
531    // Skip opening delimiter
532    if let Some((_, c)) = chars.next() {
533        if c != open {
534            return (String::new(), text);
535        }
536    } else {
537        return (String::new(), "");
538    }
539
540    let mut body = String::new();
541    let mut depth = if is_paired { 1 } else { 0 };
542    let mut escaped = false;
543    let mut end_pos = text.len();
544
545    for (i, ch) in chars {
546        if escaped {
547            body.push(ch);
548            escaped = false;
549            continue;
550        }
551
552        match ch {
553            '\\' => {
554                body.push(ch);
555                escaped = true;
556            }
557            c if c == open && is_paired => {
558                body.push(ch);
559                depth += 1;
560            }
561            c if c == close => {
562                if is_paired {
563                    depth -= 1;
564                    if depth == 0 {
565                        end_pos = i + ch.len_utf8();
566                        break;
567                    }
568                    body.push(ch);
569                } else {
570                    end_pos = i + ch.len_utf8();
571                    break;
572                }
573            }
574            _ => body.push(ch),
575        }
576    }
577
578    (body, &text[end_pos..])
579}
580
581/// Lookahead helper: determine whether a `quote` char at byte `pos` in `text` is the
582/// opening of a genuine inner string literal that protects `closing` delimiter chars.
583///
584/// Returns `Some((end_pos, true))` when:
585///   - A matching closing `quote` is found on the SAME LINE (no `\n` crossed), AND
586///   - The content between the two `quote` chars contains `closing`.
587///   - `end_pos` is the byte offset just after the closing `quote`.
588///
589/// Returns `None` (or `Some((_, false))`) when:
590///   - A newline or end of `text` is reached before the matching closing `quote`, OR
591///   - The string content does not contain `closing`.
592///
593/// Stopping at newlines prevents cross-statement false positives in multiline source.
594fn scan_inner_string(
595    text: &str,
596    pos: usize,
597    quote: char,
598    delimiter: char,
599) -> Option<(usize, bool)> {
600    if is_word_apostrophe(text, pos, quote) {
601        return None;
602    }
603    // Adjacent quotes are literal replacement text (for example s/"/""/g),
604    // not a string literal to skip while hunting for the replacement delimiter.
605    if text.get(..pos).and_then(|prefix| prefix.chars().next_back()) == Some(quote) {
606        return None;
607    }
608    let start = pos + quote.len_utf8();
609    let rest = text.get(start..)?;
610    if rest.starts_with(quote) {
611        return None;
612    }
613    let mut escaped = false;
614    let mut contains_delim = false;
615    let mut end_of_string = None;
616    let mut local_pos = start;
617    for ch in rest.chars() {
618        if escaped {
619            escaped = false;
620            local_pos += ch.len_utf8();
621            continue;
622        }
623        if ch == '\\' {
624            escaped = true;
625            local_pos += ch.len_utf8();
626            continue;
627        }
628        // Newline terminates the scan: inner string literals don't span lines.
629        if ch == '\n' {
630            return None;
631        }
632        if ch == delimiter {
633            contains_delim = true;
634        }
635        if ch == quote {
636            end_of_string = Some(local_pos + ch.len_utf8());
637            break;
638        }
639        local_pos += ch.len_utf8();
640    }
641    end_of_string.map(|end| (end, contains_delim))
642}
643
644fn is_word_apostrophe(text: &str, pos: usize, quote: char) -> bool {
645    quote == '\''
646        && text
647            .get(..pos)
648            .and_then(|prefix| prefix.chars().next_back())
649            .is_some_and(|ch| ch.is_ascii_alphanumeric() || ch == '_')
650}
651
652/// Like `extract_unpaired_body` but skips over string literals (`"..."` / `'...'`)
653/// so that the closing delimiter character inside a string is not mistaken for the
654/// end of the replacement section.  Returns `(body, rest, found_closing)`.
655///
656/// Uses lookahead to determine whether a `'` or `"` is actually an inner string:
657/// only enters string-skip mode when the candidate string (a) has a matching closing
658/// quote on the same line AND (b) contains the closing delimiter in its content.
659/// This prevents lone apostrophes (e.g. the `'` in `s/''/'/g`) from triggering
660/// string-skip, which would cause replacement scanning to cross statement boundaries.
661fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
662    let mut body = String::new();
663    let mut end_pos = text.len();
664    let mut found_closing = false;
665    let mut pos = 0usize;
666    let mut escaped = false;
667
668    while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
669        if escaped {
670            body.push(ch);
671            escaped = false;
672            pos += ch.len_utf8();
673            continue;
674        }
675
676        match ch {
677            '\\' => {
678                body.push(ch);
679                escaped = true;
680                pos += ch.len_utf8();
681            }
682            // Skip over string literals to avoid treating delimiter chars inside
683            // "foo/bar" or 'a/b' as the closing delimiter of the replacement.
684            //
685            // Guard: only enter string-skip when lookahead confirms a matching closing
686            // quote exists on the same line AND the content contains the closing delimiter.
687            '"' | '\'' if ch != closing => {
688                let quote = ch;
689                match scan_inner_string(text, pos, quote, closing) {
690                    Some((string_end, true)) => {
691                        // String content contains the closing delimiter → skip the string.
692                        let string_text = &text[pos..string_end];
693                        body.push_str(string_text);
694                        pos = string_end;
695                    }
696                    _ => {
697                        // No closing quote on same line, or content has no delimiter:
698                        // treat the opening quote as a literal character.
699                        body.push(ch);
700                        pos += ch.len_utf8();
701                    }
702                }
703            }
704            c if c == closing => {
705                end_pos = pos + ch.len_utf8();
706                found_closing = true;
707                break;
708            }
709            _ => {
710                body.push(ch);
711                pos += ch.len_utf8();
712            }
713        }
714    }
715
716    (body, &text[end_pos..], found_closing)
717}
718
719fn extract_substitution_pattern_with_replacement_hint(
720    text: &str,
721    open: char,
722    close: char,
723) -> (String, &str, bool) {
724    let mut chars = text.char_indices();
725
726    // Skip opening delimiter
727    if let Some((_, c)) = chars.next() {
728        if c != open {
729            return (String::new(), text, false);
730        }
731    } else {
732        return (String::new(), "", false);
733    }
734
735    let mut body = String::new();
736    let mut depth = 1usize;
737    let mut escaped = false;
738    let mut first_close_pos: Option<usize> = None;
739    let mut first_body_len: usize = 0;
740
741    for (i, ch) in chars {
742        if escaped {
743            body.push(ch);
744            escaped = false;
745            continue;
746        }
747
748        match ch {
749            '\\' => {
750                body.push(ch);
751                escaped = true;
752            }
753            c if c == open => {
754                body.push(ch);
755                depth += 1;
756            }
757            c if c == close => {
758                if depth > 1 {
759                    depth -= 1;
760                    body.push(ch);
761                    continue;
762                }
763
764                let rest = &text[i + ch.len_utf8()..];
765                if first_close_pos.is_none() {
766                    first_close_pos = Some(i + ch.len_utf8());
767                    first_body_len = body.len();
768                }
769
770                if starts_with_paired_delimiter(rest).is_some() {
771                    return (body, rest, true);
772                }
773
774                body.push(ch);
775            }
776            _ => body.push(ch),
777        }
778    }
779
780    if let Some(pos) = first_close_pos {
781        body.truncate(first_body_len);
782        return (body, &text[pos..], true);
783    }
784
785    (body, "", false)
786}
787
788fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
789    let mut escaped = false;
790
791    for (idx, ch) in pattern.char_indices() {
792        if escaped {
793            escaped = false;
794            continue;
795        }
796
797        if ch == '\\' {
798            escaped = true;
799            continue;
800        }
801
802        if is_paired_open(ch) {
803            let closing = get_closing_delimiter(ch);
804            let (replacement, rest, found_closing) =
805                extract_delimited_content_strict(&pattern[idx..], ch, closing);
806            if found_closing {
807                let leading = pattern[..idx].to_string();
808                return Some((leading, replacement, rest.to_string()));
809            }
810        }
811    }
812
813    None
814}
815
816fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
817    let mut escaped = false;
818    let mut candidates = Vec::new();
819
820    for (idx, ch) in text.char_indices() {
821        if escaped {
822            escaped = false;
823            continue;
824        }
825
826        if ch == '\\' {
827            escaped = true;
828            continue;
829        }
830
831        if is_paired_open(ch) {
832            candidates.push((idx, ch));
833        }
834    }
835
836    for (idx, ch) in candidates.into_iter().rev() {
837        let closing = get_closing_delimiter(ch);
838        let (replacement, rest, found_closing) =
839            extract_delimited_content_strict(&text[idx..], ch, closing);
840        if found_closing {
841            let leading = text[..idx].to_string();
842            return Some((leading, replacement, rest.to_string()));
843        }
844    }
845
846    None
847}
848
849/// Extract and validate substitution modifiers, returning only valid ones
850///
851/// Valid Perl substitution modifiers include:
852/// - Core modifiers: g, i, m, s, x, o, e, r
853/// - Charset modifiers (Perl 5.14+): a, d, l, u
854/// - Additional modifiers: n (5.22+), p, c
855///
856/// This function provides panic-safe modifier validation for substitution operators,
857/// filtering out invalid modifiers to prevent security vulnerabilities.
858fn extract_substitution_modifiers(text: &str) -> String {
859    text.chars()
860        .take_while(|c| c.is_ascii_alphabetic())
861        .filter(|&c| {
862            matches!(
863                c,
864                'g' | 'i'
865                    | 'm'
866                    | 's'
867                    | 'x'
868                    | 'o'
869                    | 'e'
870                    | 'r'
871                    | 'a'
872                    | 'd'
873                    | 'l'
874                    | 'u'
875                    | 'n'
876                    | 'p'
877                    | 'c'
878            )
879        })
880        .collect()
881}
882
883/// Validate substitution modifiers and return an error if any are invalid
884///
885/// Valid Perl substitution modifiers include:
886/// - Core modifiers: g, i, m, s, x, o, e, r
887/// - Charset modifiers (Perl 5.14+): a, d, l, u
888/// - Additional modifiers: n (5.22+), p, c
889///
890/// # Arguments
891///
892/// * `modifiers_str` - The raw modifier string following the substitution operator
893///
894/// # Returns
895///
896/// * `Ok(String)` - The validated modifiers if all are valid
897/// * `Err(char)` - The first invalid modifier character encountered
898///
899/// # Examples
900///
901/// ```ignore
902/// assert!(validate_substitution_modifiers("gi").is_ok());
903/// assert!(validate_substitution_modifiers("gia").is_ok());  // 'a' for ASCII mode
904/// assert!(validate_substitution_modifiers("giz").is_err()); // 'z' is invalid
905/// ```
906pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
907    let mut valid_modifiers = String::new();
908
909    for c in modifiers_str.chars() {
910        // Stop at non-alphabetic characters (end of modifiers)
911        if !c.is_ascii_alphabetic() {
912            // If it's whitespace or end of input, that's ok
913            if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
914                break;
915            }
916            // Non-alphabetic, non-whitespace character in modifier position is invalid
917            return Err(c);
918        }
919
920        // Check if it's a valid substitution modifier
921        if matches!(
922            c,
923            'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
924        ) {
925            valid_modifiers.push(c);
926        } else {
927            // Invalid alphabetic modifier
928            return Err(c);
929        }
930    }
931
932    Ok(valid_modifiers)
933}