Skip to main content

perl_parser_core/syntax/
quote.rs

1//! Uniform quote operator parsing for the Perl parser.
2//!
3//! This module provides consistent parsing for quote-like operators,
4//! properly extracting patterns, bodies, and modifiers.
5
6use std::borrow::Cow;
7
8/// Extract pattern and modifiers from a regex-like token (qr, m, or bare //)
9pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10    // Handle different prefixes
11    let content = if let Some(stripped) = text.strip_prefix("qr") {
12        stripped
13    } else if let Some(stripped) = strip_match_prefix(text) {
14        stripped
15    } else {
16        text
17    };
18
19    // Get delimiter - content must be non-empty to have a delimiter
20    let delimiter = match content.chars().next() {
21        Some(d) => d,
22        None => return (String::new(), String::new(), String::new()),
23    };
24    let closing = get_closing_delimiter(delimiter);
25
26    // Extract body and modifiers
27    let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
28
29    // Include delimiters in the pattern string for compatibility
30    let pattern = format!("{}{}{}", delimiter, body, closing);
31
32    (pattern, body, modifiers.to_string())
33}
34
35fn strip_match_prefix(text: &str) -> Option<&str> {
36    let stripped = text.strip_prefix('m')?;
37    let delimiter = stripped.chars().next()?;
38    (!delimiter.is_alphabetic()).then_some(stripped)
39}
40
41/// Error type for substitution operator parsing failures
42#[derive(Debug, Clone, PartialEq)]
43pub enum SubstitutionError {
44    /// Invalid modifier character found
45    InvalidModifier(char),
46    /// Missing delimiter after 's'
47    MissingDelimiter,
48    /// Pattern is missing or empty (just `s/`)
49    MissingPattern,
50    /// Replacement section is missing (e.g., `s/pattern` without replacement part)
51    MissingReplacement,
52    /// Closing delimiter is missing after replacement (e.g., `s/pattern/replacement` without final `/`)
53    MissingClosingDelimiter,
54}
55
56/// Error type for transliteration operator parsing failures
57#[derive(Debug, Clone, PartialEq)]
58pub enum TransliterationError {
59    /// Invalid modifier character found
60    InvalidModifier(char),
61    /// Invalid delimiter after `tr`/`y`
62    InvalidDelimiter(char),
63    /// Missing delimiter after `tr`/`y`
64    MissingDelimiter,
65    /// Search list section is missing
66    MissingSearch,
67    /// Replacement list section is missing
68    MissingReplacement,
69    /// Closing delimiter is missing
70    MissingClosingDelimiter,
71}
72
73/// Extract pattern, replacement, and modifiers from a substitution token with strict validation
74///
75/// This function parses substitution operators like s/pattern/replacement/flags
76/// and handles various delimiter forms including:
77/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
78/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
79///
80/// Unlike `extract_substitution_parts`, this function returns an error if invalid modifiers
81/// are present instead of silently filtering them.
82///
83/// # Errors
84///
85/// Returns `Err(SubstitutionError::InvalidModifier(c))` if an invalid modifier character is found.
86/// Valid modifiers are: g, i, m, s, x, o, e, r
87pub fn extract_substitution_parts_strict(
88    text: &str,
89) -> Result<(String, String, String), SubstitutionError> {
90    // Skip 's' prefix
91    let after_s = text.strip_prefix('s').unwrap_or(text);
92    // Perl allows whitespace between 's' and its delimiter (e.g. `s { pattern } { replacement }g`)
93    let content = after_s.trim_start();
94
95    // Get delimiter - check for missing delimiter (just 's' or 's' followed by nothing)
96    let delimiter = match content.chars().next() {
97        Some(d) => d,
98        None => return Err(SubstitutionError::MissingDelimiter),
99    };
100    let closing = get_closing_delimiter(delimiter);
101    let is_paired = delimiter != closing;
102
103    // Parse first body (pattern) with strict validation
104    let (pattern, rest1, pattern_closed) =
105        extract_delimited_content_strict(content, delimiter, closing);
106
107    // For non-paired delimiters: if pattern wasn't closed, missing closing delimiter
108    if !is_paired && !pattern_closed {
109        return Err(SubstitutionError::MissingClosingDelimiter);
110    }
111
112    // For paired delimiters: if pattern wasn't closed, missing closing delimiter
113    if is_paired && !pattern_closed {
114        return Err(SubstitutionError::MissingClosingDelimiter);
115    }
116
117    // Parse second body (replacement)
118    // For paired delimiters, the replacement may use a different delimiter than the pattern
119    // e.g., s[pattern]{replacement} is valid Perl
120    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
121        // Non-paired delimiters: must have replacement section
122        if rest1.is_empty() {
123            return Err(SubstitutionError::MissingReplacement);
124        }
125
126        // Parse replacement, skipping string literals so that delimiter chars
127        // inside "foo/bar" or 'a/b' don't terminate the replacement early.
128        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
129        (body, rest, found_closing)
130    } else {
131        // Paired pattern delimiters still allow either paired or non-paired delimiters
132        // for the replacement side (e.g. s{foo}/bar/ and s[foo]{bar}).
133        let trimmed = skip_paired_replacement_gap(rest1);
134        if let Some(rd) = trimmed.chars().next() {
135            let repl_closing = get_closing_delimiter(rd);
136            extract_delimited_content_strict(trimmed, rd, repl_closing)
137        } else {
138            // No more content - missing replacement
139            return Err(SubstitutionError::MissingReplacement);
140        }
141    };
142
143    // For non-paired delimiters, must have found the closing delimiter for replacement
144    if !is_paired && !replacement_closed {
145        return Err(SubstitutionError::MissingClosingDelimiter);
146    }
147
148    // For paired delimiters, must have found the closing delimiter for replacement
149    if is_paired && !replacement_closed {
150        return Err(SubstitutionError::MissingClosingDelimiter);
151    }
152
153    // Validate modifiers strictly - reject if any invalid modifiers present
154    let modifiers = validate_substitution_modifiers(modifiers_str)
155        .map_err(SubstitutionError::InvalidModifier)?;
156
157    Ok((pattern, replacement, modifiers))
158}
159
160fn skip_paired_replacement_gap(mut text: &str) -> &str {
161    let mut comment_eligible = false;
162    loop {
163        let trimmed = text.trim_start_matches(char::is_whitespace);
164        let saw_whitespace = trimmed.len() != text.len();
165        text = trimmed;
166        comment_eligible |= saw_whitespace;
167
168        if comment_eligible && text.starts_with('#') {
169            text = after_line_comment(text);
170            comment_eligible = true;
171            continue;
172        }
173
174        return text;
175    }
176}
177
178fn after_line_comment(text: &str) -> &str {
179    for (idx, ch) in text.char_indices() {
180        if matches!(ch, '\n' | '\r') {
181            return &text[idx + ch.len_utf8()..];
182        }
183    }
184    ""
185}
186
187/// Extract content between delimiters with strict tracking of whether closing was found.
188/// Returns (content, rest, found_closing).
189fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
190    let mut chars = text.char_indices();
191    let is_paired = open != close;
192
193    // Skip opening delimiter
194    if let Some((_, c)) = chars.next() {
195        if c != open {
196            return (String::new(), text, false);
197        }
198    } else {
199        return (String::new(), "", false);
200    }
201
202    let mut body = String::new();
203    let mut depth = if is_paired { 1 } else { 0 };
204    let mut escaped = false;
205    let mut end_pos = text.len();
206    let mut found_closing = false;
207
208    for (i, ch) in chars {
209        if escaped {
210            body.push(ch);
211            escaped = false;
212            continue;
213        }
214
215        match ch {
216            '\\' => {
217                body.push(ch);
218                escaped = true;
219            }
220            c if c == open && is_paired => {
221                body.push(ch);
222                depth += 1;
223            }
224            c if c == close => {
225                if is_paired {
226                    depth -= 1;
227                    if depth == 0 {
228                        end_pos = i + ch.len_utf8();
229                        found_closing = true;
230                        break;
231                    }
232                    body.push(ch);
233                } else {
234                    end_pos = i + ch.len_utf8();
235                    found_closing = true;
236                    break;
237                }
238            }
239            _ => body.push(ch),
240        }
241    }
242
243    (body, &text[end_pos..], found_closing)
244}
245
246/// Extract pattern, replacement, and modifiers from a substitution token
247///
248/// This function parses substitution operators like s/pattern/replacement/flags
249/// and handles various delimiter forms including:
250/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
251/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
252///
253/// For paired delimiters, properly handles nested delimiters within the pattern
254/// or replacement parts. Returns (pattern, replacement, modifiers) as strings.
255///
256/// Note: This function silently filters invalid modifiers. For strict validation,
257/// use `extract_substitution_parts_strict` instead.
258pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
259    // Skip 's' prefix
260    let content = text.strip_prefix('s').unwrap_or(text);
261
262    // Get delimiter - content must be non-empty to have a delimiter
263    let delimiter = match content.chars().next() {
264        Some(d) => d,
265        None => return (String::new(), String::new(), String::new()),
266    };
267    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
268        if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
269        {
270            let modifiers = extract_substitution_modifiers(&modifiers_str);
271            return (pattern, replacement, modifiers);
272        }
273
274        return (String::new(), String::new(), String::new());
275    }
276    let closing = get_closing_delimiter(delimiter);
277    let is_paired = delimiter != closing;
278
279    // Parse first body (pattern)
280    let (mut pattern, rest1, pattern_closed) = if is_paired {
281        extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
282    } else {
283        extract_delimited_content_strict(content, delimiter, closing)
284    };
285
286    // Parse second body (replacement)
287    // For paired delimiters, the replacement may use a different delimiter than the pattern
288    // e.g., s[pattern]{replacement} is valid Perl
289    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
290        // Non-paired delimiters: manually parse the replacement, skipping string literals
291        // so that delimiter chars inside "foo/bar" or 'a/b' don't end the replacement early.
292        let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
293        (body, Cow::Borrowed(rest))
294    } else if !is_paired && !pattern_closed {
295        if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
296            split_unclosed_substitution_pattern(&pattern)
297        {
298            pattern = fallback_pattern;
299            (fallback_replacement, Cow::Owned(fallback_modifiers))
300        } else {
301            (String::new(), Cow::Borrowed(rest1))
302        }
303    } else if is_paired {
304        let trimmed = rest1.trim_start();
305        if let Some(rd) = trimmed.chars().next() {
306            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
307                (String::new(), Cow::Borrowed(trimmed))
308            } else {
309                let repl_closing = get_closing_delimiter(rd);
310                let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
311                (body, Cow::Borrowed(rest))
312            }
313        } else {
314            (String::new(), Cow::Borrowed(trimmed))
315        }
316    } else {
317        (String::new(), Cow::Borrowed(rest1))
318    };
319
320    // Extract and validate only valid substitution modifiers
321    let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
322
323    (pattern, replacement, modifiers)
324}
325
326/// Extract search, replace, and modifiers from a transliteration token
327pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
328    // Skip 'tr' or 'y' prefix
329    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
330        stripped
331    } else if let Some(stripped) = text.strip_prefix('y') {
332        stripped
333    } else {
334        text
335    };
336    let content = after_op.trim_start();
337
338    // Get delimiter - content must be non-empty to have a delimiter
339    let delimiter = match content.chars().next() {
340        Some(d) => d,
341        None => return (String::new(), String::new(), String::new()),
342    };
343    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
344        return (String::new(), String::new(), String::new());
345    }
346    let closing = get_closing_delimiter(delimiter);
347    let is_paired = delimiter != closing;
348
349    // Parse first body (search pattern)
350    let (search, rest1) = extract_delimited_content(content, delimiter, closing);
351
352    // For paired delimiters, skip whitespace and allow any paired opening delimiter for the
353    // replacement list. Perl accepts forms like tr[abc]{xyz} in addition to tr[abc][xyz].
354    let rest2_owned;
355    let rest2 = if is_paired {
356        rest1.trim_start()
357    } else {
358        rest2_owned = format!("{}{}", delimiter, rest1);
359        &rest2_owned
360    };
361
362    // Parse second body (replacement pattern)
363    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
364        // Manually parse the replacement for non-paired delimiters
365        let chars = rest1.char_indices();
366        let mut body = String::new();
367        let mut escaped = false;
368        let mut end_pos = rest1.len();
369
370        for (i, ch) in chars {
371            if escaped {
372                body.push(ch);
373                escaped = false;
374                continue;
375            }
376
377            match ch {
378                '\\' => {
379                    body.push(ch);
380                    escaped = true;
381                }
382                c if c == closing => {
383                    end_pos = i + ch.len_utf8();
384                    break;
385                }
386                _ => body.push(ch),
387            }
388        }
389
390        (body, &rest1[end_pos..])
391    } else if is_paired {
392        if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
393            let repl_closing = get_closing_delimiter(repl_delimiter);
394            extract_delimited_content(rest2, repl_delimiter, repl_closing)
395        } else if let Some(repl_delimiter) = rest2.chars().next() {
396            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
397                (String::new(), rest2)
398            } else {
399                extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
400            }
401        } else {
402            (String::new(), rest2)
403        }
404    } else {
405        (String::new(), rest1)
406    };
407
408    // Extract and validate only valid transliteration modifiers
409    // Security fix: Apply consistent validation for all delimiter types
410    let modifiers = modifiers_str
411        .chars()
412        .take_while(|c| c.is_ascii_alphabetic())
413        .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
414        .collect();
415
416    (search, replacement, modifiers)
417}
418
419/// Extract search, replace, and modifiers from a transliteration token with strict validation.
420///
421/// Supports both `tr///` and `y///` syntax, including optional whitespace between
422/// the operator and delimiter (e.g. `tr /a/b/`).
423///
424/// # Errors
425///
426/// Returns `Err(TransliterationError::InvalidModifier(c))` if an invalid modifier
427/// character is encountered. Valid modifiers are: `c`, `d`, `s`, `r`.
428pub fn extract_transliteration_parts_strict(
429    text: &str,
430) -> Result<(String, String, String), TransliterationError> {
431    // Skip `tr` or `y` prefix, then allow optional whitespace before delimiter.
432    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
433        stripped
434    } else if let Some(stripped) = text.strip_prefix('y') {
435        stripped
436    } else {
437        text
438    };
439    let content = after_op.trim_start();
440
441    // Get delimiter.
442    let delimiter = match content.chars().next() {
443        Some(d) => d,
444        None => return Err(TransliterationError::MissingDelimiter),
445    };
446    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
447        return Err(TransliterationError::InvalidDelimiter(delimiter));
448    }
449    let closing = get_closing_delimiter(delimiter);
450    let is_paired = delimiter != closing;
451
452    // Parse first body (search).
453    let (search, rest1, search_closed) =
454        extract_delimited_content_strict(content, delimiter, closing);
455    if !search_closed {
456        return Err(TransliterationError::MissingClosingDelimiter);
457    }
458
459    // Parse second body (replacement).
460    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
461        if rest1.is_empty() {
462            return Err(TransliterationError::MissingReplacement);
463        }
464        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
465        (body, rest, found_closing)
466    } else {
467        let trimmed = skip_paired_replacement_gap(rest1);
468        if let Some(repl_delimiter) = trimmed.chars().next() {
469            // After a paired search delimiter (e.g. `{...}`), the replacement must
470            // also start with a valid non-alphanumeric, non-whitespace delimiter.
471            // An alphanumeric character here (e.g. `tr{abc}xyz`) is an invalid
472            // delimiter, not merely a missing replacement section.
473            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
474                return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
475            }
476            let repl_closing = get_closing_delimiter(repl_delimiter);
477            let (body, rest, found_closing) =
478                extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
479            (body, rest, found_closing)
480        } else {
481            return Err(TransliterationError::MissingReplacement);
482        }
483    };
484
485    if !replacement_closed {
486        return Err(TransliterationError::MissingClosingDelimiter);
487    }
488
489    // Note: an empty search list is valid Perl — `tr///` counts characters
490    // (the "$count = ($str =~ tr///)" idiom). Do not reject empty search.
491
492    // Validate transliteration modifiers strictly.
493    let mut modifiers = String::new();
494    for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
495        if matches!(modifier, 'c' | 'd' | 's' | 'r') {
496            modifiers.push(modifier);
497        } else {
498            return Err(TransliterationError::InvalidModifier(modifier));
499        }
500    }
501
502    Ok((search, replacement, modifiers))
503}
504
505/// Get the closing delimiter for a given opening delimiter
506fn get_closing_delimiter(open: char) -> char {
507    match open {
508        '(' => ')',
509        '[' => ']',
510        '{' => '}',
511        '<' => '>',
512        _ => open,
513    }
514}
515
516fn is_paired_open(ch: char) -> bool {
517    matches!(ch, '{' | '[' | '(' | '<')
518}
519
520fn starts_with_paired_delimiter(text: &str) -> Option<char> {
521    let trimmed = text.trim_start();
522    match trimmed.chars().next() {
523        Some(ch) if is_paired_open(ch) => Some(ch),
524        _ => None,
525    }
526}
527
528/// Extract content between delimiters and return (content, rest)
529fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
530    let mut chars = text.char_indices();
531    let is_paired = open != close;
532
533    // Skip opening delimiter
534    if let Some((_, c)) = chars.next() {
535        if c != open {
536            return (String::new(), text);
537        }
538    } else {
539        return (String::new(), "");
540    }
541
542    let mut body = String::new();
543    let mut depth = if is_paired { 1 } else { 0 };
544    let mut escaped = false;
545    let mut end_pos = text.len();
546
547    for (i, ch) in chars {
548        if escaped {
549            body.push(ch);
550            escaped = false;
551            continue;
552        }
553
554        match ch {
555            '\\' => {
556                body.push(ch);
557                escaped = true;
558            }
559            c if c == open && is_paired => {
560                body.push(ch);
561                depth += 1;
562            }
563            c if c == close => {
564                if is_paired {
565                    depth -= 1;
566                    if depth == 0 {
567                        end_pos = i + ch.len_utf8();
568                        break;
569                    }
570                    body.push(ch);
571                } else {
572                    end_pos = i + ch.len_utf8();
573                    break;
574                }
575            }
576            _ => body.push(ch),
577        }
578    }
579
580    (body, &text[end_pos..])
581}
582
583/// Lookahead helper: determine whether a `quote` char at byte `pos` in `text` is the
584/// opening of a genuine inner string literal that protects `closing` delimiter chars.
585///
586/// Returns `Some((end_pos, true))` when:
587///   - A matching closing `quote` is found on the SAME LINE (no `\n` crossed), AND
588///   - The content between the two `quote` chars contains `closing`.
589///   - `end_pos` is the byte offset just after the closing `quote`.
590///
591/// Returns `None` (or `Some((_, false))`) when:
592///   - A newline or end of `text` is reached before the matching closing `quote`, OR
593///   - The string content does not contain `closing`.
594///
595/// Stopping at newlines prevents cross-statement false positives in multiline source.
596fn scan_inner_string(
597    text: &str,
598    pos: usize,
599    quote: char,
600    delimiter: char,
601) -> Option<(usize, bool)> {
602    if is_word_apostrophe(text, pos, quote) {
603        return None;
604    }
605    // Adjacent quotes are literal replacement text (for example s/"/""/g),
606    // not a string literal to skip while hunting for the replacement delimiter.
607    if text.get(..pos).and_then(|prefix| prefix.chars().next_back()) == Some(quote) {
608        return None;
609    }
610    let start = pos + quote.len_utf8();
611    let rest = text.get(start..)?;
612    if rest.starts_with(quote) {
613        return None;
614    }
615    let mut escaped = false;
616    let mut contains_delim = false;
617    let mut end_of_string = None;
618    let mut local_pos = start;
619    for ch in rest.chars() {
620        if escaped {
621            escaped = false;
622            local_pos += ch.len_utf8();
623            continue;
624        }
625        if ch == '\\' {
626            escaped = true;
627            local_pos += ch.len_utf8();
628            continue;
629        }
630        // Newline terminates the scan: inner string literals don't span lines.
631        if ch == '\n' {
632            return None;
633        }
634        if ch == delimiter {
635            contains_delim = true;
636        }
637        if ch == quote {
638            end_of_string = Some(local_pos + ch.len_utf8());
639            break;
640        }
641        local_pos += ch.len_utf8();
642    }
643    end_of_string.map(|end| (end, contains_delim))
644}
645
646fn is_word_apostrophe(text: &str, pos: usize, quote: char) -> bool {
647    quote == '\''
648        && text
649            .get(..pos)
650            .and_then(|prefix| prefix.chars().next_back())
651            .is_some_and(|ch| ch.is_ascii_alphanumeric() || ch == '_')
652}
653
654/// Like `extract_unpaired_body` but skips over string literals (`"..."` / `'...'`)
655/// so that the closing delimiter character inside a string is not mistaken for the
656/// end of the replacement section.  Returns `(body, rest, found_closing)`.
657///
658/// Uses lookahead to determine whether a `'` or `"` is actually an inner string:
659/// only enters string-skip mode when the candidate string (a) has a matching closing
660/// quote on the same line AND (b) contains the closing delimiter in its content.
661/// This prevents lone apostrophes (e.g. the `'` in `s/''/'/g`) from triggering
662/// string-skip, which would cause replacement scanning to cross statement boundaries.
663fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
664    let mut body = String::new();
665    let mut end_pos = text.len();
666    let mut found_closing = false;
667    let mut pos = 0usize;
668    let mut escaped = false;
669
670    while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
671        if escaped {
672            body.push(ch);
673            escaped = false;
674            pos += ch.len_utf8();
675            continue;
676        }
677
678        match ch {
679            '\\' => {
680                body.push(ch);
681                escaped = true;
682                pos += ch.len_utf8();
683            }
684            // Skip over string literals to avoid treating delimiter chars inside
685            // "foo/bar" or 'a/b' as the closing delimiter of the replacement.
686            //
687            // Guard: only enter string-skip when lookahead confirms a matching closing
688            // quote exists on the same line AND the content contains the closing delimiter.
689            '"' | '\'' if ch != closing => {
690                let quote = ch;
691                match scan_inner_string(text, pos, quote, closing) {
692                    Some((string_end, true)) => {
693                        // String content contains the closing delimiter → skip the string.
694                        let string_text = &text[pos..string_end];
695                        body.push_str(string_text);
696                        pos = string_end;
697                    }
698                    _ => {
699                        // No closing quote on same line, or content has no delimiter:
700                        // treat the opening quote as a literal character.
701                        body.push(ch);
702                        pos += ch.len_utf8();
703                    }
704                }
705            }
706            c if c == closing => {
707                end_pos = pos + ch.len_utf8();
708                found_closing = true;
709                break;
710            }
711            _ => {
712                body.push(ch);
713                pos += ch.len_utf8();
714            }
715        }
716    }
717
718    (body, &text[end_pos..], found_closing)
719}
720
721fn extract_substitution_pattern_with_replacement_hint(
722    text: &str,
723    open: char,
724    close: char,
725) -> (String, &str, bool) {
726    let mut chars = text.char_indices();
727
728    // Skip opening delimiter
729    if let Some((_, c)) = chars.next() {
730        if c != open {
731            return (String::new(), text, false);
732        }
733    } else {
734        return (String::new(), "", false);
735    }
736
737    let mut body = String::new();
738    let mut depth = 1usize;
739    let mut escaped = false;
740    let mut first_close_pos: Option<usize> = None;
741    let mut first_body_len: usize = 0;
742
743    for (i, ch) in chars {
744        if escaped {
745            body.push(ch);
746            escaped = false;
747            continue;
748        }
749
750        match ch {
751            '\\' => {
752                body.push(ch);
753                escaped = true;
754            }
755            c if c == open => {
756                body.push(ch);
757                depth += 1;
758            }
759            c if c == close => {
760                if depth > 1 {
761                    depth -= 1;
762                    body.push(ch);
763                    continue;
764                }
765
766                let rest = &text[i + ch.len_utf8()..];
767                if first_close_pos.is_none() {
768                    first_close_pos = Some(i + ch.len_utf8());
769                    first_body_len = body.len();
770                }
771
772                if starts_with_paired_delimiter(rest).is_some() {
773                    return (body, rest, true);
774                }
775
776                body.push(ch);
777            }
778            _ => body.push(ch),
779        }
780    }
781
782    if let Some(pos) = first_close_pos {
783        body.truncate(first_body_len);
784        return (body, &text[pos..], true);
785    }
786
787    (body, "", false)
788}
789
790fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
791    let mut escaped = false;
792
793    for (idx, ch) in pattern.char_indices() {
794        if escaped {
795            escaped = false;
796            continue;
797        }
798
799        if ch == '\\' {
800            escaped = true;
801            continue;
802        }
803
804        if is_paired_open(ch) {
805            let closing = get_closing_delimiter(ch);
806            let (replacement, rest, found_closing) =
807                extract_delimited_content_strict(&pattern[idx..], ch, closing);
808            if found_closing {
809                let leading = pattern[..idx].to_string();
810                return Some((leading, replacement, rest.to_string()));
811            }
812        }
813    }
814
815    None
816}
817
818fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
819    let mut escaped = false;
820    let mut candidates = Vec::new();
821
822    for (idx, ch) in text.char_indices() {
823        if escaped {
824            escaped = false;
825            continue;
826        }
827
828        if ch == '\\' {
829            escaped = true;
830            continue;
831        }
832
833        if is_paired_open(ch) {
834            candidates.push((idx, ch));
835        }
836    }
837
838    for (idx, ch) in candidates.into_iter().rev() {
839        let closing = get_closing_delimiter(ch);
840        let (replacement, rest, found_closing) =
841            extract_delimited_content_strict(&text[idx..], ch, closing);
842        if found_closing {
843            let leading = text[..idx].to_string();
844            return Some((leading, replacement, rest.to_string()));
845        }
846    }
847
848    None
849}
850
851/// Extract and validate substitution modifiers, returning only valid ones
852///
853/// Valid Perl substitution modifiers include:
854/// - Core modifiers: g, i, m, s, x, o, e, r
855/// - Charset modifiers (Perl 5.14+): a, d, l, u
856/// - Additional modifiers: n (5.22+), p, c
857///
858/// This function provides panic-safe modifier validation for substitution operators,
859/// filtering out invalid modifiers to prevent security vulnerabilities.
860fn extract_substitution_modifiers(text: &str) -> String {
861    text.chars()
862        .take_while(|c| c.is_ascii_alphabetic())
863        .filter(|&c| {
864            matches!(
865                c,
866                'g' | 'i'
867                    | 'm'
868                    | 's'
869                    | 'x'
870                    | 'o'
871                    | 'e'
872                    | 'r'
873                    | 'a'
874                    | 'd'
875                    | 'l'
876                    | 'u'
877                    | 'n'
878                    | 'p'
879                    | 'c'
880            )
881        })
882        .collect()
883}
884
885/// Validate substitution modifiers and return an error if any are invalid
886///
887/// Valid Perl substitution modifiers include:
888/// - Core modifiers: g, i, m, s, x, o, e, r
889/// - Charset modifiers (Perl 5.14+): a, d, l, u
890/// - Additional modifiers: n (5.22+), p, c
891///
892/// # Arguments
893///
894/// * `modifiers_str` - The raw modifier string following the substitution operator
895///
896/// # Returns
897///
898/// * `Ok(String)` - The validated modifiers if all are valid
899/// * `Err(char)` - The first invalid modifier character encountered
900///
901/// # Examples
902///
903/// ```ignore
904/// assert!(validate_substitution_modifiers("gi").is_ok());
905/// assert!(validate_substitution_modifiers("gia").is_ok());  // 'a' for ASCII mode
906/// assert!(validate_substitution_modifiers("giz").is_err()); // 'z' is invalid
907/// ```
908pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
909    let mut valid_modifiers = String::new();
910
911    for c in modifiers_str.chars() {
912        // Stop at non-alphabetic characters (end of modifiers)
913        if !c.is_ascii_alphabetic() {
914            // If it's whitespace or end of input, that's ok
915            if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
916                break;
917            }
918            // Non-alphabetic, non-whitespace character in modifier position is invalid
919            return Err(c);
920        }
921
922        // Check if it's a valid substitution modifier
923        if matches!(
924            c,
925            'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
926        ) {
927            valid_modifiers.push(c);
928        } else {
929            // Invalid alphabetic modifier
930            return Err(c);
931        }
932    }
933
934    Ok(valid_modifiers)
935}