Skip to main content

perl_parser_core/syntax/
quote.rs

1//! Uniform quote operator parsing for the Perl parser.
2//!
3//! This module provides consistent parsing for quote-like operators,
4//! properly extracting patterns, bodies, and modifiers.
5
6use std::borrow::Cow;
7
8/// Extract pattern and modifiers from a regex-like token (qr, m, or bare //)
9pub fn extract_regex_parts(text: &str) -> (String, String, String) {
10    // Handle different prefixes
11    let content = if let Some(stripped) = text.strip_prefix("qr") {
12        stripped
13    } else if let Some(stripped) = strip_match_prefix(text) {
14        stripped
15    } else {
16        text
17    };
18
19    // Get delimiter - content must be non-empty to have a delimiter
20    let delimiter = match content.chars().next() {
21        Some(d) => d,
22        None => return (String::new(), String::new(), String::new()),
23    };
24    let closing = get_closing_delimiter(delimiter);
25
26    // Extract body and modifiers
27    let (body, modifiers) = extract_delimited_content(content, delimiter, closing);
28
29    // Include delimiters in the pattern string for compatibility
30    let pattern = format!("{}{}{}", delimiter, body, closing);
31
32    (pattern, body, modifiers.to_string())
33}
34
35fn strip_match_prefix(text: &str) -> Option<&str> {
36    let stripped = text.strip_prefix('m')?;
37    let delimiter = stripped.chars().next()?;
38    (!delimiter.is_alphabetic()).then_some(stripped)
39}
40
41/// Error type for substitution operator parsing failures
42#[derive(Debug, Clone, PartialEq)]
43pub enum SubstitutionError {
44    /// Invalid modifier character found
45    InvalidModifier(char),
46    /// Missing delimiter after 's'
47    MissingDelimiter,
48    /// Pattern is missing or empty (just `s/`)
49    MissingPattern,
50    /// Replacement section is missing (e.g., `s/pattern` without replacement part)
51    MissingReplacement,
52    /// Closing delimiter is missing after replacement (e.g., `s/pattern/replacement` without final `/`)
53    MissingClosingDelimiter,
54}
55
56/// Error type for transliteration operator parsing failures
57#[derive(Debug, Clone, PartialEq)]
58pub enum TransliterationError {
59    /// Invalid modifier character found
60    InvalidModifier(char),
61    /// Invalid delimiter after `tr`/`y`
62    InvalidDelimiter(char),
63    /// Missing delimiter after `tr`/`y`
64    MissingDelimiter,
65    /// Search list section is missing
66    MissingSearch,
67    /// Replacement list section is missing
68    MissingReplacement,
69    /// Closing delimiter is missing
70    MissingClosingDelimiter,
71}
72
73/// Extract pattern, replacement, and modifiers from a substitution token with strict validation
74///
75/// This function parses substitution operators like s/pattern/replacement/flags
76/// and handles various delimiter forms including:
77/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
78/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
79///
80/// Unlike `extract_substitution_parts`, this function returns an error if invalid modifiers
81/// are present instead of silently filtering them.
82///
83/// # Errors
84///
85/// Returns `Err(SubstitutionError::InvalidModifier(c))` if an invalid modifier character is found.
86/// Valid modifiers are: g, i, m, s, x, o, e, r
87pub fn extract_substitution_parts_strict(
88    text: &str,
89) -> Result<(String, String, String), SubstitutionError> {
90    // Skip 's' prefix
91    let after_s = text.strip_prefix('s').unwrap_or(text);
92    // Perl allows whitespace between 's' and its delimiter (e.g. `s { pattern } { replacement }g`)
93    let content = after_s.trim_start();
94
95    // Get delimiter - check for missing delimiter (just 's' or 's' followed by nothing)
96    let delimiter = match content.chars().next() {
97        Some(d) => d,
98        None => return Err(SubstitutionError::MissingDelimiter),
99    };
100    let closing = get_closing_delimiter(delimiter);
101    let is_paired = delimiter != closing;
102
103    // Parse first body (pattern) with strict validation
104    let (pattern, rest1, pattern_closed) =
105        extract_delimited_content_strict(content, delimiter, closing);
106
107    // For non-paired delimiters: if pattern wasn't closed, missing closing delimiter
108    if !is_paired && !pattern_closed {
109        return Err(SubstitutionError::MissingClosingDelimiter);
110    }
111
112    // For paired delimiters: if pattern wasn't closed, missing closing delimiter
113    if is_paired && !pattern_closed {
114        return Err(SubstitutionError::MissingClosingDelimiter);
115    }
116
117    // Parse second body (replacement)
118    // For paired delimiters, the replacement may use a different delimiter than the pattern
119    // e.g., s[pattern]{replacement} is valid Perl
120    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
121        // Non-paired delimiters: must have replacement section
122        if rest1.is_empty() {
123            return Err(SubstitutionError::MissingReplacement);
124        }
125
126        // Parse replacement, skipping string literals so that delimiter chars
127        // inside "foo/bar" or 'a/b' don't terminate the replacement early.
128        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
129        (body, rest, found_closing)
130    } else {
131        // Paired pattern delimiters still allow either paired or non-paired delimiters
132        // for the replacement side (e.g. s{foo}/bar/ and s[foo]{bar}).
133        let trimmed = skip_paired_replacement_gap(rest1);
134        if let Some(rd) = trimmed.chars().next() {
135            let repl_closing = get_closing_delimiter(rd);
136            extract_delimited_content_strict(trimmed, rd, repl_closing)
137        } else {
138            // No more content - missing replacement
139            return Err(SubstitutionError::MissingReplacement);
140        }
141    };
142
143    // For non-paired delimiters, must have found the closing delimiter for replacement
144    if !is_paired && !replacement_closed {
145        return Err(SubstitutionError::MissingClosingDelimiter);
146    }
147
148    // For paired delimiters, must have found the closing delimiter for replacement
149    if is_paired && !replacement_closed {
150        return Err(SubstitutionError::MissingClosingDelimiter);
151    }
152
153    // Validate modifiers strictly - reject if any invalid modifiers present
154    let modifiers = validate_substitution_modifiers(modifiers_str)
155        .map_err(SubstitutionError::InvalidModifier)?;
156
157    Ok((pattern, replacement, modifiers))
158}
159
160fn skip_paired_replacement_gap(mut text: &str) -> &str {
161    let mut comment_eligible = false;
162    loop {
163        let trimmed = text.trim_start_matches(char::is_whitespace);
164        let saw_whitespace = trimmed.len() != text.len();
165        text = trimmed;
166        comment_eligible |= saw_whitespace;
167
168        if comment_eligible && text.starts_with('#') {
169            text = after_line_comment(text);
170            comment_eligible = true;
171            continue;
172        }
173
174        return text;
175    }
176}
177
178fn after_line_comment(text: &str) -> &str {
179    for (idx, ch) in text.char_indices() {
180        if matches!(ch, '\n' | '\r') {
181            return &text[idx + ch.len_utf8()..];
182        }
183    }
184    ""
185}
186
187/// Extract content between delimiters with strict tracking of whether closing was found.
188/// Returns (content, rest, found_closing).
189fn extract_delimited_content_strict(text: &str, open: char, close: char) -> (String, &str, bool) {
190    let mut chars = text.char_indices();
191    let is_paired = open != close;
192
193    // Skip opening delimiter
194    if let Some((_, c)) = chars.next() {
195        if c != open {
196            return (String::new(), text, false);
197        }
198    } else {
199        return (String::new(), "", false);
200    }
201
202    let mut body = String::new();
203    let mut depth = if is_paired { 1 } else { 0 };
204    let mut escaped = false;
205    let mut end_pos = text.len();
206    let mut found_closing = false;
207
208    for (i, ch) in chars {
209        if escaped {
210            body.push(ch);
211            escaped = false;
212            continue;
213        }
214
215        match ch {
216            '\\' => {
217                body.push(ch);
218                escaped = true;
219            }
220            c if c == open && is_paired => {
221                body.push(ch);
222                depth += 1;
223            }
224            c if c == close => {
225                if is_paired {
226                    depth -= 1;
227                    if depth == 0 {
228                        end_pos = i + ch.len_utf8();
229                        found_closing = true;
230                        break;
231                    }
232                    body.push(ch);
233                } else {
234                    end_pos = i + ch.len_utf8();
235                    found_closing = true;
236                    break;
237                }
238            }
239            _ => body.push(ch),
240        }
241    }
242
243    (body, &text[end_pos..], found_closing)
244}
245
246/// Extract pattern, replacement, and modifiers from a substitution token
247///
248/// This function parses substitution operators like s/pattern/replacement/flags
249/// and handles various delimiter forms including:
250/// - Non-paired delimiters: s/pattern/replacement/ (same delimiter for all parts)
251/// - Paired delimiters: s{pattern}{replacement} (different open/close delimiters)
252///
253/// For paired delimiters, properly handles nested delimiters within the pattern
254/// or replacement parts. Returns (pattern, replacement, modifiers) as strings.
255///
256/// Note: This function silently filters invalid modifiers. For strict validation,
257/// use `extract_substitution_parts_strict` instead.
258pub fn extract_substitution_parts(text: &str) -> (String, String, String) {
259    // Skip 's' prefix
260    let content = text.strip_prefix('s').unwrap_or(text);
261
262    // Get delimiter - content must be non-empty to have a delimiter
263    let delimiter = match content.chars().next() {
264        Some(d) => d,
265        None => return (String::new(), String::new(), String::new()),
266    };
267    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
268        if let Some((pattern, replacement, modifiers_str)) = split_on_last_paired_delimiter(content)
269        {
270            let modifiers = extract_substitution_modifiers(&modifiers_str);
271            return (pattern, replacement, modifiers);
272        }
273
274        return (String::new(), String::new(), String::new());
275    }
276    let closing = get_closing_delimiter(delimiter);
277    let is_paired = delimiter != closing;
278
279    // Parse first body (pattern)
280    let (mut pattern, rest1, pattern_closed) = if is_paired {
281        extract_substitution_pattern_with_replacement_hint(content, delimiter, closing)
282    } else {
283        extract_delimited_content_strict(content, delimiter, closing)
284    };
285
286    // Parse second body (replacement)
287    // For paired delimiters, the replacement may use a different delimiter than the pattern
288    // e.g., s[pattern]{replacement} is valid Perl
289    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
290        // Non-paired delimiters: manually parse the replacement, skipping string literals
291        // so that delimiter chars inside "foo/bar" or 'a/b' don't end the replacement early.
292        let (body, rest, _found) = extract_unpaired_body_skip_strings(rest1, closing);
293        (body, Cow::Borrowed(rest))
294    } else if !is_paired && !pattern_closed {
295        if let Some((fallback_pattern, fallback_replacement, fallback_modifiers)) =
296            split_unclosed_substitution_pattern(&pattern)
297        {
298            pattern = fallback_pattern;
299            (fallback_replacement, Cow::Owned(fallback_modifiers))
300        } else {
301            (String::new(), Cow::Borrowed(rest1))
302        }
303    } else if is_paired {
304        let trimmed = rest1.trim_start();
305        if let Some(rd) = trimmed.chars().next() {
306            if rd.is_ascii_alphanumeric() || rd.is_whitespace() {
307                (String::new(), Cow::Borrowed(trimmed))
308            } else {
309                let repl_closing = get_closing_delimiter(rd);
310                let (body, rest) = extract_delimited_content(trimmed, rd, repl_closing);
311                (body, Cow::Borrowed(rest))
312            }
313        } else {
314            (String::new(), Cow::Borrowed(trimmed))
315        }
316    } else {
317        (String::new(), Cow::Borrowed(rest1))
318    };
319
320    // Extract and validate only valid substitution modifiers
321    let modifiers = extract_substitution_modifiers(modifiers_str.as_ref());
322
323    (pattern, replacement, modifiers)
324}
325
326/// Extract search, replace, and modifiers from a transliteration token
327pub fn extract_transliteration_parts(text: &str) -> (String, String, String) {
328    // Skip 'tr' or 'y' prefix
329    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
330        stripped
331    } else if let Some(stripped) = text.strip_prefix('y') {
332        stripped
333    } else {
334        text
335    };
336    let content = after_op.trim_start();
337
338    // Get delimiter - content must be non-empty to have a delimiter
339    let delimiter = match content.chars().next() {
340        Some(d) => d,
341        None => return (String::new(), String::new(), String::new()),
342    };
343    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
344        return (String::new(), String::new(), String::new());
345    }
346    let closing = get_closing_delimiter(delimiter);
347    let is_paired = delimiter != closing;
348
349    // Parse first body (search pattern)
350    let (search, rest1) = extract_delimited_content(content, delimiter, closing);
351
352    // For paired delimiters, skip whitespace and allow any paired opening delimiter for the
353    // replacement list. Perl accepts forms like tr[abc]{xyz} in addition to tr[abc][xyz].
354    let rest2_owned;
355    let rest2 = if is_paired {
356        rest1.trim_start()
357    } else {
358        rest2_owned = format!("{}{}", delimiter, rest1);
359        &rest2_owned
360    };
361
362    // Parse second body (replacement pattern)
363    let (replacement, modifiers_str) = if !is_paired && !rest1.is_empty() {
364        // Manually parse the replacement for non-paired delimiters
365        let chars = rest1.char_indices();
366        let mut body = String::new();
367        let mut escaped = false;
368        let mut end_pos = rest1.len();
369
370        for (i, ch) in chars {
371            if escaped {
372                body.push(ch);
373                escaped = false;
374                continue;
375            }
376
377            match ch {
378                '\\' => {
379                    body.push(ch);
380                    escaped = true;
381                }
382                c if c == closing => {
383                    end_pos = i + ch.len_utf8();
384                    break;
385                }
386                _ => body.push(ch),
387            }
388        }
389
390        (body, &rest1[end_pos..])
391    } else if is_paired {
392        if let Some(repl_delimiter) = starts_with_paired_delimiter(rest2) {
393            let repl_closing = get_closing_delimiter(repl_delimiter);
394            extract_delimited_content(rest2, repl_delimiter, repl_closing)
395        } else if let Some(repl_delimiter) = rest2.chars().next() {
396            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
397                (String::new(), rest2)
398            } else {
399                extract_delimited_content(rest2, repl_delimiter, repl_delimiter)
400            }
401        } else {
402            (String::new(), rest2)
403        }
404    } else {
405        (String::new(), rest1)
406    };
407
408    // Extract and validate only valid transliteration modifiers
409    // Security fix: Apply consistent validation for all delimiter types
410    let modifiers = modifiers_str
411        .chars()
412        .take_while(|c| c.is_ascii_alphabetic())
413        .filter(|&c| matches!(c, 'c' | 'd' | 's' | 'r'))
414        .collect();
415
416    (search, replacement, modifiers)
417}
418
419/// Extract search, replace, and modifiers from a transliteration token with strict validation.
420///
421/// Supports both `tr///` and `y///` syntax, including optional whitespace between
422/// the operator and delimiter (e.g. `tr /a/b/`).
423///
424/// # Errors
425///
426/// Returns `Err(TransliterationError::InvalidModifier(c))` if an invalid modifier
427/// character is encountered. Valid modifiers are: `c`, `d`, `s`, `r`.
428pub fn extract_transliteration_parts_strict(
429    text: &str,
430) -> Result<(String, String, String), TransliterationError> {
431    // Skip `tr` or `y` prefix, then allow optional whitespace before delimiter.
432    let after_op = if let Some(stripped) = text.strip_prefix("tr") {
433        stripped
434    } else if let Some(stripped) = text.strip_prefix('y') {
435        stripped
436    } else {
437        text
438    };
439    let content = after_op.trim_start();
440
441    // Get delimiter.
442    let delimiter = match content.chars().next() {
443        Some(d) => d,
444        None => return Err(TransliterationError::MissingDelimiter),
445    };
446    if delimiter.is_ascii_alphanumeric() || delimiter.is_whitespace() {
447        return Err(TransliterationError::InvalidDelimiter(delimiter));
448    }
449    let closing = get_closing_delimiter(delimiter);
450    let is_paired = delimiter != closing;
451
452    // Parse first body (search).
453    let (search, rest1, search_closed) =
454        extract_delimited_content_strict(content, delimiter, closing);
455    if !search_closed {
456        return Err(TransliterationError::MissingClosingDelimiter);
457    }
458
459    // Parse second body (replacement).
460    let (replacement, modifiers_str, replacement_closed) = if !is_paired {
461        if rest1.is_empty() {
462            return Err(TransliterationError::MissingReplacement);
463        }
464        let (body, rest, found_closing) = extract_unpaired_body_skip_strings(rest1, closing);
465        (body, rest, found_closing)
466    } else {
467        let trimmed = skip_paired_replacement_gap(rest1);
468        if let Some(repl_delimiter) = trimmed.chars().next() {
469            // After a paired search delimiter (e.g. `{...}`), the replacement must
470            // also start with a valid non-alphanumeric, non-whitespace delimiter.
471            // An alphanumeric character here (e.g. `tr{abc}xyz`) is an invalid
472            // delimiter, not merely a missing replacement section.
473            if repl_delimiter.is_ascii_alphanumeric() || repl_delimiter.is_whitespace() {
474                return Err(TransliterationError::InvalidDelimiter(repl_delimiter));
475            }
476            let repl_closing = get_closing_delimiter(repl_delimiter);
477            let (body, rest, found_closing) =
478                extract_delimited_content_strict(trimmed, repl_delimiter, repl_closing);
479            (body, rest, found_closing)
480        } else {
481            return Err(TransliterationError::MissingReplacement);
482        }
483    };
484
485    if !replacement_closed {
486        return Err(TransliterationError::MissingClosingDelimiter);
487    }
488
489    if search.is_empty() {
490        return Err(TransliterationError::MissingSearch);
491    }
492
493    // Validate transliteration modifiers strictly.
494    let mut modifiers = String::new();
495    for modifier in modifiers_str.chars().take_while(|c: &char| c.is_ascii_alphanumeric()) {
496        if matches!(modifier, 'c' | 'd' | 's' | 'r') {
497            modifiers.push(modifier);
498        } else {
499            return Err(TransliterationError::InvalidModifier(modifier));
500        }
501    }
502
503    Ok((search, replacement, modifiers))
504}
505
506/// Get the closing delimiter for a given opening delimiter
507fn get_closing_delimiter(open: char) -> char {
508    match open {
509        '(' => ')',
510        '[' => ']',
511        '{' => '}',
512        '<' => '>',
513        _ => open,
514    }
515}
516
517fn is_paired_open(ch: char) -> bool {
518    matches!(ch, '{' | '[' | '(' | '<')
519}
520
521fn starts_with_paired_delimiter(text: &str) -> Option<char> {
522    let trimmed = text.trim_start();
523    match trimmed.chars().next() {
524        Some(ch) if is_paired_open(ch) => Some(ch),
525        _ => None,
526    }
527}
528
529/// Extract content between delimiters and return (content, rest)
530fn extract_delimited_content(text: &str, open: char, close: char) -> (String, &str) {
531    let mut chars = text.char_indices();
532    let is_paired = open != close;
533
534    // Skip opening delimiter
535    if let Some((_, c)) = chars.next() {
536        if c != open {
537            return (String::new(), text);
538        }
539    } else {
540        return (String::new(), "");
541    }
542
543    let mut body = String::new();
544    let mut depth = if is_paired { 1 } else { 0 };
545    let mut escaped = false;
546    let mut end_pos = text.len();
547
548    for (i, ch) in chars {
549        if escaped {
550            body.push(ch);
551            escaped = false;
552            continue;
553        }
554
555        match ch {
556            '\\' => {
557                body.push(ch);
558                escaped = true;
559            }
560            c if c == open && is_paired => {
561                body.push(ch);
562                depth += 1;
563            }
564            c if c == close => {
565                if is_paired {
566                    depth -= 1;
567                    if depth == 0 {
568                        end_pos = i + ch.len_utf8();
569                        break;
570                    }
571                    body.push(ch);
572                } else {
573                    end_pos = i + ch.len_utf8();
574                    break;
575                }
576            }
577            _ => body.push(ch),
578        }
579    }
580
581    (body, &text[end_pos..])
582}
583
584/// Lookahead helper: determine whether a `quote` char at byte `pos` in `text` is the
585/// opening of a genuine inner string literal that protects `closing` delimiter chars.
586///
587/// Returns `Some((end_pos, true))` when:
588///   - A matching closing `quote` is found on the SAME LINE (no `\n` crossed), AND
589///   - The content between the two `quote` chars contains `closing`.
590///   - `end_pos` is the byte offset just after the closing `quote`.
591///
592/// Returns `None` (or `Some((_, false))`) when:
593///   - A newline or end of `text` is reached before the matching closing `quote`, OR
594///   - The string content does not contain `closing`.
595///
596/// Stopping at newlines prevents cross-statement false positives in multiline source.
597fn scan_inner_string(
598    text: &str,
599    pos: usize,
600    quote: char,
601    delimiter: char,
602) -> Option<(usize, bool)> {
603    if is_word_apostrophe(text, pos, quote) {
604        return None;
605    }
606    // Adjacent quotes are literal replacement text (for example s/"/""/g),
607    // not a string literal to skip while hunting for the replacement delimiter.
608    if text.get(..pos).and_then(|prefix| prefix.chars().next_back()) == Some(quote) {
609        return None;
610    }
611    let start = pos + quote.len_utf8();
612    let rest = text.get(start..)?;
613    if rest.starts_with(quote) {
614        return None;
615    }
616    let mut escaped = false;
617    let mut contains_delim = false;
618    let mut end_of_string = None;
619    let mut local_pos = start;
620    for ch in rest.chars() {
621        if escaped {
622            escaped = false;
623            local_pos += ch.len_utf8();
624            continue;
625        }
626        if ch == '\\' {
627            escaped = true;
628            local_pos += ch.len_utf8();
629            continue;
630        }
631        // Newline terminates the scan: inner string literals don't span lines.
632        if ch == '\n' {
633            return None;
634        }
635        if ch == delimiter {
636            contains_delim = true;
637        }
638        if ch == quote {
639            end_of_string = Some(local_pos + ch.len_utf8());
640            break;
641        }
642        local_pos += ch.len_utf8();
643    }
644    end_of_string.map(|end| (end, contains_delim))
645}
646
647fn is_word_apostrophe(text: &str, pos: usize, quote: char) -> bool {
648    quote == '\''
649        && text
650            .get(..pos)
651            .and_then(|prefix| prefix.chars().next_back())
652            .is_some_and(|ch| ch.is_ascii_alphanumeric() || ch == '_')
653}
654
655/// Like `extract_unpaired_body` but skips over string literals (`"..."` / `'...'`)
656/// so that the closing delimiter character inside a string is not mistaken for the
657/// end of the replacement section.  Returns `(body, rest, found_closing)`.
658///
659/// Uses lookahead to determine whether a `'` or `"` is actually an inner string:
660/// only enters string-skip mode when the candidate string (a) has a matching closing
661/// quote on the same line AND (b) contains the closing delimiter in its content.
662/// This prevents lone apostrophes (e.g. the `'` in `s/''/'/g`) from triggering
663/// string-skip, which would cause replacement scanning to cross statement boundaries.
664fn extract_unpaired_body_skip_strings(text: &str, closing: char) -> (String, &str, bool) {
665    let mut body = String::new();
666    let mut end_pos = text.len();
667    let mut found_closing = false;
668    let mut pos = 0usize;
669    let mut escaped = false;
670
671    while let Some(ch) = text.get(pos..).and_then(|s| s.chars().next()) {
672        if escaped {
673            body.push(ch);
674            escaped = false;
675            pos += ch.len_utf8();
676            continue;
677        }
678
679        match ch {
680            '\\' => {
681                body.push(ch);
682                escaped = true;
683                pos += ch.len_utf8();
684            }
685            // Skip over string literals to avoid treating delimiter chars inside
686            // "foo/bar" or 'a/b' as the closing delimiter of the replacement.
687            //
688            // Guard: only enter string-skip when lookahead confirms a matching closing
689            // quote exists on the same line AND the content contains the closing delimiter.
690            '"' | '\'' if ch != closing => {
691                let quote = ch;
692                match scan_inner_string(text, pos, quote, closing) {
693                    Some((string_end, true)) => {
694                        // String content contains the closing delimiter → skip the string.
695                        let string_text = &text[pos..string_end];
696                        body.push_str(string_text);
697                        pos = string_end;
698                    }
699                    _ => {
700                        // No closing quote on same line, or content has no delimiter:
701                        // treat the opening quote as a literal character.
702                        body.push(ch);
703                        pos += ch.len_utf8();
704                    }
705                }
706            }
707            c if c == closing => {
708                end_pos = pos + ch.len_utf8();
709                found_closing = true;
710                break;
711            }
712            _ => {
713                body.push(ch);
714                pos += ch.len_utf8();
715            }
716        }
717    }
718
719    (body, &text[end_pos..], found_closing)
720}
721
722fn extract_substitution_pattern_with_replacement_hint(
723    text: &str,
724    open: char,
725    close: char,
726) -> (String, &str, bool) {
727    let mut chars = text.char_indices();
728
729    // Skip opening delimiter
730    if let Some((_, c)) = chars.next() {
731        if c != open {
732            return (String::new(), text, false);
733        }
734    } else {
735        return (String::new(), "", false);
736    }
737
738    let mut body = String::new();
739    let mut depth = 1usize;
740    let mut escaped = false;
741    let mut first_close_pos: Option<usize> = None;
742    let mut first_body_len: usize = 0;
743
744    for (i, ch) in chars {
745        if escaped {
746            body.push(ch);
747            escaped = false;
748            continue;
749        }
750
751        match ch {
752            '\\' => {
753                body.push(ch);
754                escaped = true;
755            }
756            c if c == open => {
757                body.push(ch);
758                depth += 1;
759            }
760            c if c == close => {
761                if depth > 1 {
762                    depth -= 1;
763                    body.push(ch);
764                    continue;
765                }
766
767                let rest = &text[i + ch.len_utf8()..];
768                if first_close_pos.is_none() {
769                    first_close_pos = Some(i + ch.len_utf8());
770                    first_body_len = body.len();
771                }
772
773                if starts_with_paired_delimiter(rest).is_some() {
774                    return (body, rest, true);
775                }
776
777                body.push(ch);
778            }
779            _ => body.push(ch),
780        }
781    }
782
783    if let Some(pos) = first_close_pos {
784        body.truncate(first_body_len);
785        return (body, &text[pos..], true);
786    }
787
788    (body, "", false)
789}
790
791fn split_unclosed_substitution_pattern(pattern: &str) -> Option<(String, String, String)> {
792    let mut escaped = false;
793
794    for (idx, ch) in pattern.char_indices() {
795        if escaped {
796            escaped = false;
797            continue;
798        }
799
800        if ch == '\\' {
801            escaped = true;
802            continue;
803        }
804
805        if is_paired_open(ch) {
806            let closing = get_closing_delimiter(ch);
807            let (replacement, rest, found_closing) =
808                extract_delimited_content_strict(&pattern[idx..], ch, closing);
809            if found_closing {
810                let leading = pattern[..idx].to_string();
811                return Some((leading, replacement, rest.to_string()));
812            }
813        }
814    }
815
816    None
817}
818
819fn split_on_last_paired_delimiter(text: &str) -> Option<(String, String, String)> {
820    let mut escaped = false;
821    let mut candidates = Vec::new();
822
823    for (idx, ch) in text.char_indices() {
824        if escaped {
825            escaped = false;
826            continue;
827        }
828
829        if ch == '\\' {
830            escaped = true;
831            continue;
832        }
833
834        if is_paired_open(ch) {
835            candidates.push((idx, ch));
836        }
837    }
838
839    for (idx, ch) in candidates.into_iter().rev() {
840        let closing = get_closing_delimiter(ch);
841        let (replacement, rest, found_closing) =
842            extract_delimited_content_strict(&text[idx..], ch, closing);
843        if found_closing {
844            let leading = text[..idx].to_string();
845            return Some((leading, replacement, rest.to_string()));
846        }
847    }
848
849    None
850}
851
852/// Extract and validate substitution modifiers, returning only valid ones
853///
854/// Valid Perl substitution modifiers include:
855/// - Core modifiers: g, i, m, s, x, o, e, r
856/// - Charset modifiers (Perl 5.14+): a, d, l, u
857/// - Additional modifiers: n (5.22+), p, c
858///
859/// This function provides panic-safe modifier validation for substitution operators,
860/// filtering out invalid modifiers to prevent security vulnerabilities.
861fn extract_substitution_modifiers(text: &str) -> String {
862    text.chars()
863        .take_while(|c| c.is_ascii_alphabetic())
864        .filter(|&c| {
865            matches!(
866                c,
867                'g' | 'i'
868                    | 'm'
869                    | 's'
870                    | 'x'
871                    | 'o'
872                    | 'e'
873                    | 'r'
874                    | 'a'
875                    | 'd'
876                    | 'l'
877                    | 'u'
878                    | 'n'
879                    | 'p'
880                    | 'c'
881            )
882        })
883        .collect()
884}
885
886/// Validate substitution modifiers and return an error if any are invalid
887///
888/// Valid Perl substitution modifiers include:
889/// - Core modifiers: g, i, m, s, x, o, e, r
890/// - Charset modifiers (Perl 5.14+): a, d, l, u
891/// - Additional modifiers: n (5.22+), p, c
892///
893/// # Arguments
894///
895/// * `modifiers_str` - The raw modifier string following the substitution operator
896///
897/// # Returns
898///
899/// * `Ok(String)` - The validated modifiers if all are valid
900/// * `Err(char)` - The first invalid modifier character encountered
901///
902/// # Examples
903///
904/// ```ignore
905/// assert!(validate_substitution_modifiers("gi").is_ok());
906/// assert!(validate_substitution_modifiers("gia").is_ok());  // 'a' for ASCII mode
907/// assert!(validate_substitution_modifiers("giz").is_err()); // 'z' is invalid
908/// ```
909pub fn validate_substitution_modifiers(modifiers_str: &str) -> Result<String, char> {
910    let mut valid_modifiers = String::new();
911
912    for c in modifiers_str.chars() {
913        // Stop at non-alphabetic characters (end of modifiers)
914        if !c.is_ascii_alphabetic() {
915            // If it's whitespace or end of input, that's ok
916            if c.is_whitespace() || c == ';' || c == '\n' || c == '\r' {
917                break;
918            }
919            // Non-alphabetic, non-whitespace character in modifier position is invalid
920            return Err(c);
921        }
922
923        // Check if it's a valid substitution modifier
924        if matches!(
925            c,
926            'g' | 'i' | 'm' | 's' | 'x' | 'o' | 'e' | 'r' | 'a' | 'd' | 'l' | 'u' | 'n' | 'p' | 'c'
927        ) {
928            valid_modifiers.push(c);
929        } else {
930            // Invalid alphabetic modifier
931            return Err(c);
932        }
933    }
934
935    Ok(valid_modifiers)
936}