shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub(crate) fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub(crate) const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub(crate) fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub(crate) fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub(crate) fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub(crate) const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335    /// Token kind used by the parser.
336    pub kind: TokenKind,
337    /// Source span covered by the token.
338    pub span: Span,
339    pub(crate) flags: TokenFlags,
340    payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345        match kind {
346            TokenKind::Word => LexedWordSegmentKind::Plain,
347            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349            _ => LexedWordSegmentKind::Composite,
350        }
351    }
352
353    pub(crate) fn punctuation(kind: TokenKind) -> Self {
354        Self {
355            kind,
356            span: Span::new(),
357            flags: TokenFlags::empty(),
358            payload: TokenPayload::None,
359        }
360    }
361
362    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363        let flags = if word.has_cooked_text() {
364            TokenFlags::cooked_text()
365        } else {
366            TokenFlags::empty()
367        };
368
369        Self {
370            kind,
371            span: Span::new(),
372            flags,
373            payload: TokenPayload::Word(word),
374        }
375    }
376
377    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378        Self::with_word_payload(
379            kind,
380            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381        )
382    }
383
384    fn owned_word(kind: TokenKind, text: String) -> Self {
385        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386    }
387
388    fn comment() -> Self {
389        Self {
390            kind: TokenKind::Comment,
391            span: Span::new(),
392            flags: TokenFlags::empty(),
393            payload: TokenPayload::None,
394        }
395    }
396
397    fn fd(kind: TokenKind, fd: i32) -> Self {
398        Self {
399            kind,
400            span: Span::new(),
401            flags: TokenFlags::empty(),
402            payload: TokenPayload::Fd(fd),
403        }
404    }
405
406    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407        Self {
408            kind,
409            span: Span::new(),
410            flags: TokenFlags::empty(),
411            payload: TokenPayload::FdPair(src_fd, dst_fd),
412        }
413    }
414
415    fn error(kind: LexerErrorKind) -> Self {
416        Self {
417            kind: TokenKind::Error,
418            span: Span::new(),
419            flags: TokenFlags::empty(),
420            payload: TokenPayload::Error(kind),
421        }
422    }
423
424    pub(crate) fn with_span(mut self, span: Span) -> Self {
425        self.span = span;
426        self
427    }
428
429    pub(crate) fn rebased(mut self, base: Position) -> Self {
430        self.span = self.span.rebased(base);
431        self.payload = match self.payload {
432            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433            payload => payload,
434        };
435        self
436    }
437
438    pub(crate) fn with_synthetic_flag(mut self) -> Self {
439        self.flags = self.flags.with_synthetic();
440        self
441    }
442
443    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444        let payload = match self.payload {
445            TokenPayload::None => TokenPayload::None,
446            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449            TokenPayload::Error(kind) => TokenPayload::Error(kind),
450        };
451
452        LexedToken {
453            kind: self.kind,
454            span: self.span,
455            flags: self.flags,
456            payload,
457        }
458    }
459
460    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461        let payload = match self.payload {
462            TokenPayload::None => TokenPayload::None,
463            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466            TokenPayload::Error(kind) => TokenPayload::Error(kind),
467        };
468
469        LexedToken {
470            kind: self.kind,
471            span: self.span,
472            flags: self.flags,
473            payload,
474        }
475    }
476
477    /// Borrow the token text when it is a single-segment word token.
478    pub(crate) fn word_text(&self) -> Option<&str> {
479        self.kind
480            .is_word_like()
481            .then_some(())
482            .and_then(|_| match &self.payload {
483                TokenPayload::Word(word) => word.text(),
484                _ => None,
485            })
486    }
487
488    /// Return an owned string containing the token's word text.
489    pub(crate) fn word_string(&self) -> Option<String> {
490        self.kind
491            .is_word_like()
492            .then_some(())
493            .and_then(|_| match &self.payload {
494                TokenPayload::Word(word) => Some(word.joined_text()),
495                _ => None,
496            })
497    }
498
499    /// Borrow the structured word payload for word-like tokens.
500    pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
501        match &self.payload {
502            TokenPayload::Word(word) => Some(word),
503            _ => None,
504        }
505    }
506
507    /// Borrow the original source slice when the token is source-backed and uncooked.
508    pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510            return None;
511        }
512
513        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514            .then(|| &source[self.span.start.offset..self.span.end.offset])
515    }
516
517    /// Return the file-descriptor payload for redirection tokens that carry one.
518    pub(crate) fn fd_value(&self) -> Option<i32> {
519        match self.payload {
520            TokenPayload::Fd(fd) => Some(fd),
521            _ => None,
522        }
523    }
524
525    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
526    pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
527        match self.payload {
528            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529            _ => None,
530        }
531    }
532
533    /// Return the lexer error payload when this token represents `TokenKind::Error`.
534    pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
535        match self.payload {
536            TokenPayload::Error(kind) => Some(kind),
537            _ => None,
538        }
539    }
540}
541
542/// Result of reading a heredoc body from the source.
543#[derive(Debug, Clone, PartialEq)]
544pub(crate) struct HeredocRead {
545    /// Decoded heredoc content.
546    pub content: String,
547    /// Source span covering the heredoc body content.
548    pub content_span: Span,
549}
550
551/// Maximum nesting depth for command substitution in the lexer.
552/// Prevents stack overflow from deeply nested $() patterns.
553const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
555
556#[derive(Clone, Debug)]
557struct Cursor<'a> {
558    rest: &'a str,
559}
560
561impl<'a> Cursor<'a> {
562    fn new(source: &'a str) -> Self {
563        Self { rest: source }
564    }
565
566    fn first(&self) -> Option<char> {
567        self.rest.chars().next()
568    }
569
570    fn second(&self) -> Option<char> {
571        let mut chars = self.rest.chars();
572        chars.next()?;
573        chars.next()
574    }
575
576    fn third(&self) -> Option<char> {
577        let mut chars = self.rest.chars();
578        chars.next()?;
579        chars.next()?;
580        chars.next()
581    }
582
583    fn bump(&mut self) -> Option<char> {
584        let ch = self.first()?;
585        self.rest = &self.rest[ch.len_utf8()..];
586        Some(ch)
587    }
588
589    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
590        let start = self.rest;
591        let mut end = 0;
592
593        for ch in start.chars() {
594            if !predicate(ch) {
595                break;
596            }
597            end += ch.len_utf8();
598        }
599
600        self.rest = &start[end..];
601        &start[..end]
602    }
603
604    fn rest(&self) -> &'a str {
605        self.rest
606    }
607
608    fn skip_bytes(&mut self, count: usize) {
609        self.rest = &self.rest[count..];
610    }
611
612    fn find_byte(&self, byte: u8) -> Option<usize> {
613        memchr(byte, self.rest.as_bytes())
614    }
615}
616
617#[derive(Clone, Debug)]
618struct PositionMap<'a> {
619    source: &'a str,
620    line_starts: Arc<[usize]>,
621    cached: Position,
622}
623
624#[cfg(feature = "benchmarking")]
625#[derive(Clone, Copy, Debug, Default)]
626pub(crate) struct LexerBenchmarkCounters {
627    pub(crate) current_position_calls: u64,
628}
629
630impl<'a> PositionMap<'a> {
631    fn new(source: &'a str) -> Self {
632        let mut line_starts =
633            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
634        line_starts.push(0);
635        line_starts.extend(
636            source
637                .bytes()
638                .enumerate()
639                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
640        );
641
642        Self {
643            source,
644            line_starts: line_starts.into(),
645            cached: Position::new(),
646        }
647    }
648
649    fn position(&mut self, offset: usize) -> Position {
650        if offset == self.cached.offset {
651            return self.cached;
652        }
653
654        let position = if offset > self.cached.offset && offset <= self.source.len() {
655            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
656        } else {
657            self.position_uncached(offset)
658        };
659        self.cached = position;
660        position
661    }
662
663    fn position_uncached(&self, offset: usize) -> Position {
664        let offset = offset.min(self.source.len());
665        let line_index = self
666            .line_starts
667            .partition_point(|start| *start <= offset)
668            .saturating_sub(1);
669        let line_start = self.line_starts[line_index];
670        let line_text = &self.source[line_start..offset];
671        let column = if line_text.is_ascii() {
672            line_text.len() + 1
673        } else {
674            line_text.chars().count() + 1
675        };
676
677        Position {
678            line: line_index + 1,
679            column,
680            offset,
681        }
682    }
683
684    fn advance_from(mut position: Position, text: &str) -> Position {
685        position.offset += text.len();
686        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
687        if newline_count == 0 {
688            position.column += if text.is_ascii() {
689                text.len()
690            } else {
691                text.chars().count()
692            };
693            return position;
694        }
695
696        position.line += newline_count;
697        let tail_start = memrchr(b'\n', text.as_bytes())
698            .map(|index| index + 1)
699            .unwrap_or_default();
700        let tail = &text[tail_start..];
701        position.column = if tail.is_ascii() {
702            tail.len() + 1
703        } else {
704            tail.chars().count() + 1
705        };
706        position
707    }
708}
709
710/// Lexer for bash scripts.
711#[derive(Clone)]
712pub struct Lexer<'a> {
713    #[allow(dead_code)] // Stored for error reporting in future
714    input: &'a str,
715    /// Current byte offset in the input/reinjected stream.
716    offset: usize,
717    cursor: Cursor<'a>,
718    position_map: PositionMap<'a>,
719    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
720    /// Consumed before `cursor`.
721    reinject_buf: VecDeque<char>,
722    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
723    reinject_resume_offset: Option<usize>,
724    /// Maximum allowed nesting depth for command substitution
725    max_subst_depth: usize,
726    initial_zsh_options: Option<ZshOptionState>,
727    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
728    zsh_timeline_index: usize,
729    #[cfg(feature = "benchmarking")]
730    benchmark_counters: Option<LexerBenchmarkCounters>,
731}
732
733impl<'a> Lexer<'a> {
734    /// Create a new lexer for the given input.
735    pub fn new(input: &'a str) -> Self {
736        Self::with_max_subst_depth_and_profile(
737            input,
738            DEFAULT_MAX_SUBST_DEPTH,
739            &ShellProfile::native(super::ShellDialect::Bash),
740            None,
741        )
742    }
743
744    /// Create a new lexer with a custom max substitution nesting depth.
745    /// Limits recursion in read_command_subst_into().
746    pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
747        Self::with_max_subst_depth_and_profile(
748            input,
749            max_depth,
750            &ShellProfile::native(super::ShellDialect::Bash),
751            None,
752        )
753    }
754
755    /// Create a new lexer using the provided shell profile.
756    #[cfg(test)]
757    fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
758        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
759            .then(|| ZshOptionTimeline::build(input, shell_profile))
760            .flatten()
761            .map(Arc::new);
762        Self::with_max_subst_depth_and_profile(
763            input,
764            DEFAULT_MAX_SUBST_DEPTH,
765            shell_profile,
766            zsh_timeline,
767        )
768    }
769
770    pub(crate) fn with_max_subst_depth_and_profile(
771        input: &'a str,
772        max_depth: usize,
773        shell_profile: &ShellProfile,
774        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
775    ) -> Self {
776        Self {
777            input,
778            offset: 0,
779            cursor: Cursor::new(input),
780            position_map: PositionMap::new(input),
781            reinject_buf: VecDeque::new(),
782            reinject_resume_offset: None,
783            max_subst_depth: max_depth,
784            initial_zsh_options: shell_profile.zsh_options().cloned(),
785            zsh_timeline,
786            zsh_timeline_index: 0,
787            #[cfg(feature = "benchmarking")]
788            benchmark_counters: None,
789        }
790    }
791
792    pub(super) fn position_at_offset(&self, offset: usize) -> Position {
793        self.position_map.position_uncached(offset)
794    }
795
796    fn current_position(&mut self) -> Position {
797        #[cfg(feature = "benchmarking")]
798        self.maybe_record_current_position_call();
799        self.position_map.position(self.offset)
800    }
801
802    #[cfg(feature = "benchmarking")]
803    pub(crate) fn enable_benchmark_counters(&mut self) {
804        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
805    }
806
807    #[cfg(feature = "benchmarking")]
808    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
809        self.benchmark_counters.unwrap_or_default()
810    }
811
812    #[cfg(feature = "benchmarking")]
813    fn maybe_record_current_position_call(&mut self) {
814        if let Some(counters) = &mut self.benchmark_counters {
815            counters.current_position_calls += 1;
816        }
817    }
818
819    fn sync_offset_to_cursor(&mut self) {
820        if self.reinject_buf.is_empty()
821            && let Some(offset) = self.reinject_resume_offset.take()
822        {
823            self.offset = offset;
824        }
825    }
826
827    /// Get the next token kind from the input without decoding or materializing
828    /// any payload text.
829    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
830        self.next_lexed_token().map(|token| token.kind)
831    }
832
833    fn peek_char(&mut self) -> Option<char> {
834        self.sync_offset_to_cursor();
835        if let Some(&ch) = self.reinject_buf.front() {
836            Some(ch)
837        } else {
838            self.cursor.first()
839        }
840    }
841
842    fn advance(&mut self) -> Option<char> {
843        self.sync_offset_to_cursor();
844        let ch = if !self.reinject_buf.is_empty() {
845            self.reinject_buf.pop_front()
846        } else {
847            self.cursor.bump()
848        };
849        if let Some(c) = ch {
850            self.offset += c.len_utf8();
851        }
852        ch
853    }
854
855    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
856        self.reinject_buf
857            .iter()
858            .copied()
859            .chain(self.cursor.rest().chars())
860    }
861
862    fn second_char(&self) -> Option<char> {
863        match self.reinject_buf.len() {
864            0 => self.cursor.second(),
865            1 => self.cursor.first(),
866            _ => self.reinject_buf.get(1).copied(),
867        }
868    }
869
870    fn third_char(&self) -> Option<char> {
871        match self.reinject_buf.len() {
872            0 => self.cursor.third(),
873            1 => self.cursor.second(),
874            2 => self.cursor.first(),
875            _ => self.reinject_buf.get(2).copied(),
876        }
877    }
878
879    fn fourth_char(&self) -> Option<char> {
880        match self.reinject_buf.len() {
881            0 => self.cursor.rest().chars().nth(3),
882            1 => self.cursor.third(),
883            2 => self.cursor.second(),
884            3 => self.cursor.first(),
885            _ => self.reinject_buf.get(3).copied(),
886        }
887    }
888
889    fn consume_source_bytes(&mut self, byte_len: usize) {
890        debug_assert!(self.reinject_buf.is_empty());
891        self.sync_offset_to_cursor();
892        self.offset += byte_len;
893        self.cursor.skip_bytes(byte_len);
894    }
895
896    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
897        debug_assert!(self.reinject_buf.is_empty());
898        self.offset += byte_len;
899    }
900
901    fn consume_ascii_chars(&mut self, count: usize) {
902        if self.reinject_buf.is_empty() {
903            self.consume_source_bytes(count);
904            return;
905        }
906
907        for _ in 0..count {
908            self.advance();
909        }
910    }
911
912    fn source_horizontal_whitespace_len(&self) -> usize {
913        self.cursor
914            .rest()
915            .as_bytes()
916            .iter()
917            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
918            .count()
919    }
920
921    fn source_ascii_plain_word_len(&self) -> usize {
922        self.cursor
923            .rest()
924            .as_bytes()
925            .iter()
926            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
927            .count()
928    }
929
930    fn find_double_quote_special(source: &str) -> Option<usize> {
931        source
932            .as_bytes()
933            .iter()
934            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
935    }
936
937    fn ensure_capture_from_source(
938        &self,
939        capture: &mut Option<String>,
940        start: Position,
941        end: Position,
942    ) {
943        if capture.is_none() {
944            *capture = Some(self.input[start.offset..end.offset].to_string());
945        }
946    }
947
948    fn push_capture_char(capture: &mut Option<String>, ch: char) {
949        if let Some(text) = capture.as_mut() {
950            text.push(ch);
951        }
952    }
953
954    fn push_capture_str(capture: &mut Option<String>, text: &str) {
955        if let Some(current) = capture.as_mut() {
956            current.push_str(text);
957        }
958    }
959
960    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
961        if let Some(timeline) = self.zsh_timeline.as_ref() {
962            while self.zsh_timeline_index < timeline.entries.len()
963                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
964            {
965                self.zsh_timeline_index += 1;
966            }
967            return if self.zsh_timeline_index == 0 {
968                self.initial_zsh_options.as_ref()
969            } else {
970                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
971            };
972        }
973
974        self.initial_zsh_options.as_ref()
975    }
976
977    fn comments_enabled(&mut self) -> bool {
978        !self
979            .current_zsh_options()
980            .is_some_and(|options| options.interactive_comments.is_definitely_off())
981    }
982
983    fn rc_quotes_enabled(&mut self) -> bool {
984        self.current_zsh_options()
985            .is_some_and(|options| options.rc_quotes.is_definitely_on())
986    }
987
988    fn ignore_braces_enabled(&mut self) -> bool {
989        self.current_zsh_options()
990            .is_some_and(|options| options.ignore_braces.is_definitely_on())
991    }
992
993    fn ignore_close_braces_enabled(&mut self) -> bool {
994        self.current_zsh_options().is_some_and(|options| {
995            options.ignore_braces.is_definitely_on()
996                || options.ignore_close_braces.is_definitely_on()
997        })
998    }
999
1000    fn should_treat_hash_as_word_char(&mut self) -> bool {
1001        if !self.comments_enabled() {
1002            return true;
1003        }
1004        self.reinject_buf.is_empty()
1005            && (self
1006                .input
1007                .get(..self.offset)
1008                .and_then(|prefix| prefix.chars().next_back())
1009                .is_some_and(|prev| {
1010                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1011                })
1012                || self.is_inside_unclosed_double_paren_on_line())
1013    }
1014
1015    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1016        capture
1017            .as_deref()
1018            .unwrap_or(&self.input[start.offset..self.offset])
1019    }
1020
1021    fn current_word_surface_is_single_char(
1022        &self,
1023        start: Position,
1024        capture: &Option<String>,
1025        target: char,
1026    ) -> bool {
1027        let text = self.current_word_text(start, capture);
1028        if !text.contains('\x00') {
1029            let mut encoded = [0; 4];
1030            return text == target.encode_utf8(&mut encoded);
1031        }
1032
1033        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1034        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1035    }
1036
1037    fn current_word_surface_last_char<'b>(
1038        &'b self,
1039        start: Position,
1040        capture: &'b Option<String>,
1041    ) -> Option<char> {
1042        self.current_word_text(start, capture)
1043            .chars()
1044            .rev()
1045            .find(|&ch| ch != '\x00')
1046    }
1047
1048    fn current_word_surface_ends_with_char(
1049        &self,
1050        start: Position,
1051        capture: &Option<String>,
1052        target: char,
1053    ) -> bool {
1054        self.current_word_surface_last_char(start, capture) == Some(target)
1055    }
1056
1057    fn current_word_surface_ends_with_extglob_prefix(
1058        &self,
1059        start: Position,
1060        capture: &Option<String>,
1061    ) -> bool {
1062        self.current_word_surface_last_char(start, capture)
1063            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1064    }
1065
1066    /// Get the next source-backed token from the input, skipping line comments.
1067    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1068        self.skip_whitespace();
1069        let start = self.current_position();
1070        let token = self.next_lexed_token_inner(false)?;
1071        let end = self.current_position();
1072        Some(token.with_span(Span::from_positions(start, end)))
1073    }
1074
1075    /// Get the next source-backed token from the input, preserving line comments.
1076    pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1077        self.skip_whitespace();
1078        let start = self.current_position();
1079        let token = self.next_lexed_token_inner(true)?;
1080        let end = self.current_position();
1081        Some(token.with_span(Span::from_positions(start, end)))
1082    }
1083
1084    /// Internal: get next token without recording position (called after whitespace skip)
1085    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1086        let ch = self.peek_char()?;
1087
1088        match ch {
1089            '\n' => {
1090                self.consume_ascii_chars(1);
1091                Some(LexedToken::punctuation(TokenKind::Newline))
1092            }
1093            ';' => {
1094                if self.second_char() == Some(';') {
1095                    if self.third_char() == Some('&') {
1096                        self.consume_ascii_chars(3);
1097                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1098                    } else {
1099                        self.consume_ascii_chars(2);
1100                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1101                    }
1102                } else if self.second_char() == Some('|') {
1103                    self.consume_ascii_chars(2);
1104                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1105                } else if self.second_char() == Some('&') {
1106                    self.consume_ascii_chars(2);
1107                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1108                } else {
1109                    self.consume_ascii_chars(1);
1110                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1111                }
1112            }
1113            '|' => {
1114                if self.second_char() == Some('|') {
1115                    self.consume_ascii_chars(2);
1116                    Some(LexedToken::punctuation(TokenKind::Or))
1117                } else if self.second_char() == Some('&') {
1118                    self.consume_ascii_chars(2);
1119                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1120                } else {
1121                    self.consume_ascii_chars(1);
1122                    Some(LexedToken::punctuation(TokenKind::Pipe))
1123                }
1124            }
1125            '&' => {
1126                if self.second_char() == Some('&') {
1127                    self.consume_ascii_chars(2);
1128                    Some(LexedToken::punctuation(TokenKind::And))
1129                } else if self.second_char() == Some('>') {
1130                    if self.third_char() == Some('>') {
1131                        self.consume_ascii_chars(3);
1132                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1133                    } else {
1134                        self.consume_ascii_chars(2);
1135                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1136                    }
1137                } else if self.second_char() == Some('|') {
1138                    self.consume_ascii_chars(2);
1139                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1140                } else if self.second_char() == Some('!') {
1141                    self.consume_ascii_chars(2);
1142                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1143                } else {
1144                    self.consume_ascii_chars(1);
1145                    Some(LexedToken::punctuation(TokenKind::Background))
1146                }
1147            }
1148            '>' => {
1149                if self.second_char() == Some('>') {
1150                    if self.third_char() == Some('|') {
1151                        self.consume_ascii_chars(3);
1152                    } else {
1153                        self.consume_ascii_chars(2);
1154                    }
1155                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1156                } else if self.second_char() == Some('|') {
1157                    self.consume_ascii_chars(2);
1158                    Some(LexedToken::punctuation(TokenKind::Clobber))
1159                } else if self.second_char() == Some('(') {
1160                    self.consume_ascii_chars(2);
1161                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1162                } else if self.second_char() == Some('&') {
1163                    self.consume_ascii_chars(2);
1164                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1165                } else {
1166                    self.consume_ascii_chars(1);
1167                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1168                }
1169            }
1170            '<' => {
1171                if self.second_char() == Some('<') {
1172                    if self.third_char() == Some('<') {
1173                        self.consume_ascii_chars(3);
1174                        Some(LexedToken::punctuation(TokenKind::HereString))
1175                    } else if self.third_char() == Some('-') {
1176                        self.consume_ascii_chars(3);
1177                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1178                    } else {
1179                        self.consume_ascii_chars(2);
1180                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1181                    }
1182                } else if self.second_char() == Some('>') {
1183                    self.consume_ascii_chars(2);
1184                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1185                } else if self.second_char() == Some('(') {
1186                    self.consume_ascii_chars(2);
1187                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1188                } else if self.second_char() == Some('&') {
1189                    self.consume_ascii_chars(2);
1190                    Some(LexedToken::punctuation(TokenKind::DupInput))
1191                } else {
1192                    self.consume_ascii_chars(1);
1193                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1194                }
1195            }
1196            '(' => {
1197                if self.second_char() == Some('(') {
1198                    self.consume_ascii_chars(2);
1199                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1200                } else {
1201                    self.consume_ascii_chars(1);
1202                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1203                }
1204            }
1205            ')' => {
1206                if self.second_char() == Some(')') {
1207                    self.consume_ascii_chars(2);
1208                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1209                } else {
1210                    self.consume_ascii_chars(1);
1211                    Some(LexedToken::punctuation(TokenKind::RightParen))
1212                }
1213            }
1214            '{' => {
1215                let start = self.current_position();
1216                if self.ignore_braces_enabled() {
1217                    self.consume_ascii_chars(1);
1218                    match self.peek_char() {
1219                        Some(' ') | Some('\t') | Some('\n') | None => {
1220                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1221                        }
1222                        _ => self.read_word_starting_with("{", start),
1223                    }
1224                } else if self.looks_like_brace_expansion() {
1225                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1226                    // vs a brace group like { cmd; }
1227                    // Note: { must be followed by space/newline to be a brace group
1228                    self.read_brace_expansion_word()
1229                } else if self.is_brace_group_start() {
1230                    self.advance();
1231                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1232                } else if self.brace_literal_starts_case_pattern_delimiter() {
1233                    self.read_word_starting_with("{", start)
1234                } else {
1235                    self.read_brace_literal_word()
1236                }
1237            }
1238            '}' => {
1239                self.consume_ascii_chars(1);
1240                if self.ignore_close_braces_enabled() {
1241                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1242                } else {
1243                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1244                }
1245            }
1246            '[' => {
1247                let start = self.current_position();
1248                self.consume_ascii_chars(1);
1249                if self.peek_char() == Some('[')
1250                    && matches!(
1251                        self.second_char(),
1252                        Some(' ') | Some('\t') | Some('\n') | None
1253                    )
1254                {
1255                    self.consume_ascii_chars(1);
1256                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1257                } else {
1258                    // `[` can start the test command when followed by whitespace, or it can be
1259                    // ordinary word text such as a glob bracket expression.
1260                    //
1261                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1262                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1263                    // structural tokens mid-word.
1264                    match self.peek_char() {
1265                        Some(' ') | Some('\t') | Some('\n') | None => {
1266                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1267                        }
1268                        _ => self.read_word_starting_with("[", start),
1269                    }
1270                }
1271            }
1272            ']' => {
1273                if self.second_char() == Some(']') {
1274                    self.consume_ascii_chars(2);
1275                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1276                } else {
1277                    self.consume_ascii_chars(1);
1278                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1279                }
1280            }
1281            '\'' => self.read_single_quoted_string(),
1282            '"' => self.read_double_quoted_string(),
1283            '#' => {
1284                if self.should_treat_hash_as_word_char() {
1285                    let start = self.current_position();
1286                    return self.read_word_starting_with("#", start);
1287                }
1288                if preserve_comments {
1289                    self.read_comment();
1290                    Some(LexedToken::comment())
1291                } else {
1292                    self.skip_comment();
1293                    self.next_lexed_token_inner(false)
1294                }
1295            }
1296            // Handle file descriptor redirects like 2> or 2>&1
1297            '0'..='9' => self.read_word_or_fd_redirect(),
1298            _ => self.read_word(),
1299        }
1300    }
1301
1302    fn skip_whitespace(&mut self) {
1303        while let Some(ch) = self.peek_char() {
1304            if self.reinject_buf.is_empty() {
1305                let whitespace_len = self.source_horizontal_whitespace_len();
1306                if whitespace_len > 0 {
1307                    self.consume_source_bytes(whitespace_len);
1308                    continue;
1309                }
1310
1311                if self.cursor.rest().starts_with("\\\n") {
1312                    self.consume_source_bytes(2);
1313                    continue;
1314                }
1315            }
1316
1317            if ch == ' ' || ch == '\t' {
1318                self.consume_ascii_chars(1);
1319            } else if ch == '\\' {
1320                // Check for backslash-newline (line continuation) between tokens
1321                if self.second_char() == Some('\n') {
1322                    self.consume_ascii_chars(2);
1323                } else {
1324                    break;
1325                }
1326            } else {
1327                break;
1328            }
1329        }
1330    }
1331
1332    fn skip_comment(&mut self) {
1333        if self.reinject_buf.is_empty() {
1334            let end = self
1335                .cursor
1336                .find_byte(b'\n')
1337                .unwrap_or(self.cursor.rest().len());
1338            self.consume_source_bytes(end);
1339            return;
1340        }
1341
1342        while let Some(ch) = self.peek_char() {
1343            if ch == '\n' {
1344                break;
1345            }
1346            self.advance();
1347        }
1348    }
1349
1350    fn read_comment(&mut self) {
1351        debug_assert_eq!(self.peek_char(), Some('#'));
1352
1353        if self.reinject_buf.is_empty() {
1354            let rest = self.cursor.rest();
1355            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1356            self.consume_source_bytes(end);
1357            return;
1358        }
1359
1360        self.advance(); // consume '#'
1361
1362        while let Some(ch) = self.peek_char() {
1363            if ch == '\n' {
1364                break;
1365            }
1366            self.advance();
1367        }
1368    }
1369
1370    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1371        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1372            return false;
1373        }
1374
1375        let line_start = self.input[..self.offset]
1376            .rfind('\n')
1377            .map_or(0, |index| index + 1);
1378        let prefix = &self.input[line_start..self.offset];
1379        line_has_unclosed_double_paren(prefix)
1380    }
1381
1382    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1383    /// or just a regular word starting with a digit
1384    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1385        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1386            let Some(fd) = first_digit.to_digit(10) else {
1387                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1388            };
1389            let fd = fd as i32;
1390
1391            match (self.second_char(), self.third_char()) {
1392                (Some('>'), Some('>')) => {
1393                    if self.fourth_char() == Some('|') {
1394                        self.consume_ascii_chars(4);
1395                    } else {
1396                        self.consume_ascii_chars(3);
1397                    }
1398                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1399                }
1400                (Some('>'), Some('|')) => {
1401                    self.consume_ascii_chars(3);
1402                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1403                }
1404                (Some('>'), Some('&')) => {
1405                    self.consume_ascii_chars(3);
1406
1407                    let mut target_str = String::with_capacity(4);
1408                    while let Some(c) = self.peek_char() {
1409                        if c.is_ascii_digit() {
1410                            target_str.push(c);
1411                            self.advance();
1412                        } else {
1413                            break;
1414                        }
1415                    }
1416
1417                    if target_str.is_empty() {
1418                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1419                    }
1420
1421                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1422                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1423                }
1424                (Some('>'), _) => {
1425                    self.consume_ascii_chars(2);
1426                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1427                }
1428                (Some('<'), Some('&')) => {
1429                    self.consume_ascii_chars(3);
1430
1431                    let mut target_str = String::with_capacity(4);
1432                    while let Some(c) = self.peek_char() {
1433                        if c.is_ascii_digit() || c == '-' {
1434                            target_str.push(c);
1435                            self.advance();
1436                            if c == '-' {
1437                                break;
1438                            }
1439                        } else {
1440                            break;
1441                        }
1442                    }
1443
1444                    if target_str == "-" {
1445                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1446                    }
1447                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1448                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1449                }
1450                (Some('<'), Some('>')) => {
1451                    self.consume_ascii_chars(3);
1452                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1453                }
1454                (Some('<'), Some('<')) => {}
1455                (Some('<'), _) => {
1456                    self.consume_ascii_chars(2);
1457                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1458                }
1459                _ => {}
1460            }
1461        }
1462
1463        // Not a fd redirect pattern, read as regular word
1464        self.read_word()
1465    }
1466
1467    fn read_word_starting_with(
1468        &mut self,
1469        _prefix: &str,
1470        start: Position,
1471    ) -> Option<LexedToken<'a>> {
1472        let segment = match self.read_unquoted_segment(start) {
1473            Ok(segment) => segment,
1474            Err(kind) => return Some(LexedToken::error(kind)),
1475        };
1476        if segment.as_str().is_empty() {
1477            return None;
1478        }
1479        let mut lexed_word = LexedWord::from_segment(segment);
1480        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1481            return Some(LexedToken::error(kind));
1482        }
1483        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1484    }
1485
1486    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1487        let start = self.current_position();
1488
1489        if self.reinject_buf.is_empty() {
1490            let ascii_len = self.source_ascii_plain_word_len();
1491            let chunk = if ascii_len > 0
1492                && self
1493                    .cursor
1494                    .rest()
1495                    .as_bytes()
1496                    .get(ascii_len)
1497                    .is_none_or(|byte| byte.is_ascii())
1498            {
1499                self.consume_source_bytes(ascii_len);
1500                &self.input[start.offset..self.offset]
1501            } else {
1502                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1503                self.advance_scanned_source_bytes(chunk.len());
1504                chunk
1505            };
1506            if !chunk.is_empty() {
1507                let continues = matches!(
1508                    self.peek_char(),
1509                    Some(next)
1510                        if Self::is_word_char(next)
1511                            || next == '$'
1512                            || matches!(next, '\'' | '"')
1513                            || next == '{'
1514                            || (next == '\\' && self.second_char() == Some('\n'))
1515                            || (next == '('
1516                                && (chunk.ends_with('=')
1517                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1518                );
1519
1520                if !continues {
1521                    let end = self.current_position();
1522                    return Some(LexedToken::borrowed_word(
1523                        TokenKind::Word,
1524                        &self.input[start.offset..self.offset],
1525                        Some(Span::from_positions(start, end)),
1526                    ));
1527                }
1528
1529                if self.peek_char() == Some('(')
1530                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1531                {
1532                    return self.read_complex_word(start);
1533                }
1534
1535                let end = self.current_position();
1536                return self.finish_segmented_word(LexedWord::borrowed(
1537                    LexedWordSegmentKind::Plain,
1538                    &self.input[start.offset..self.offset],
1539                    Some(Span::from_positions(start, end)),
1540                ));
1541            }
1542        }
1543
1544        self.read_complex_word(start)
1545    }
1546
1547    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1548        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1549            return Some(LexedToken::error(kind));
1550        }
1551
1552        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1553    }
1554
1555    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1556        if self.peek_char() == Some('$') {
1557            match self.second_char() {
1558                Some('\'') => return self.read_dollar_single_quoted_string(),
1559                Some('"') => return self.read_dollar_double_quoted_string(),
1560                _ => {}
1561            }
1562        }
1563
1564        let segment = match self.read_unquoted_segment(start) {
1565            Ok(segment) => segment,
1566            Err(kind) => return Some(LexedToken::error(kind)),
1567        };
1568
1569        if segment.as_str().is_empty() {
1570            return None;
1571        }
1572
1573        self.finish_segmented_word(LexedWord::from_segment(segment))
1574    }
1575
1576    fn read_unquoted_segment(
1577        &mut self,
1578        start: Position,
1579    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1580        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1581        while let Some(ch) = self.peek_char() {
1582            if ch == '"' || ch == '\'' {
1583                break;
1584            } else if ch == '$' {
1585                if matches!(self.second_char(), Some('\'') | Some('"'))
1586                    && (self.current_position().offset > start.offset
1587                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1588                {
1589                    break;
1590                }
1591
1592                // Handle variable references and command substitution
1593                self.advance();
1594
1595                Self::push_capture_char(&mut word, ch); // push the '$'
1596
1597                // Check for $[ / $( / ${ forms before falling back to variable text.
1598                if self.peek_char() == Some('[') {
1599                    Self::push_capture_char(&mut word, '[');
1600                    self.advance();
1601                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1602                        return Err(LexerErrorKind::CommandSubstitution);
1603                    }
1604                } else if self.peek_char() == Some('(') {
1605                    if self.second_char() == Some('(') {
1606                        if !self.read_arithmetic_expansion_into(&mut word) {
1607                            return Err(LexerErrorKind::CommandSubstitution);
1608                        }
1609                    } else {
1610                        Self::push_capture_char(&mut word, '(');
1611                        self.advance();
1612                        if !self.read_command_subst_into(&mut word) {
1613                            return Err(LexerErrorKind::CommandSubstitution);
1614                        }
1615                    }
1616                } else if self.peek_char() == Some('{') {
1617                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1618                    // doesn't stop at the inner }.
1619                    Self::push_capture_char(&mut word, '{');
1620                    self.advance();
1621                    let _ = self.read_param_expansion_into(&mut word, start);
1622                } else {
1623                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1624                    if let Some(c) = self.peek_char() {
1625                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1626                            || c.is_ascii_digit()
1627                        {
1628                            Self::push_capture_char(&mut word, c);
1629                            self.advance();
1630                        } else {
1631                            // Read variable name (alphanumeric + _)
1632                            while let Some(c) = self.peek_char() {
1633                                if c.is_ascii_alphanumeric() || c == '_' {
1634                                    Self::push_capture_char(&mut word, c);
1635                                    self.advance();
1636                                } else {
1637                                    break;
1638                                }
1639                            }
1640                        }
1641                    }
1642                }
1643            } else if ch == '{' {
1644                if self.looks_like_mid_word_brace_segment() {
1645                    // Keep balanced {...} forms attached to the current word so
1646                    // plain literals like foo{bar} and brace expansions stay intact.
1647                    Self::push_capture_char(&mut word, ch);
1648                    self.advance();
1649                    self.consume_mid_word_brace_segment(&mut word);
1650                } else {
1651                    // Unmatched literal braces in regexes like ^{ should not swallow
1652                    // trailing delimiters such as ]] or then.
1653                    Self::push_capture_char(&mut word, ch);
1654                    self.advance();
1655                }
1656            } else if ch == '`' {
1657                // Preserve legacy backticks verbatim so the parser can keep the
1658                // original syntax form.
1659                let capture_end = self.current_position();
1660                self.ensure_capture_from_source(&mut word, start, capture_end);
1661                Self::push_capture_char(&mut word, ch);
1662                self.advance(); // consume opening `
1663                let mut closed = false;
1664                while let Some(c) = self.peek_char() {
1665                    Self::push_capture_char(&mut word, c);
1666                    self.advance();
1667                    if c == '`' {
1668                        closed = true;
1669                        break;
1670                    }
1671                    if c == '\\'
1672                        && let Some(next) = self.peek_char()
1673                    {
1674                        Self::push_capture_char(&mut word, next);
1675                        self.advance();
1676                    }
1677                }
1678                if !closed {
1679                    return Err(LexerErrorKind::BacktickSubstitution);
1680                }
1681            } else if ch == '\\' {
1682                let capture_end = self.current_position();
1683                self.ensure_capture_from_source(&mut word, start, capture_end);
1684                self.advance();
1685                if let Some(next) = self.peek_char() {
1686                    if next == '\n' {
1687                        // Line continuation: skip backslash + newline
1688                        self.advance();
1689                    } else {
1690                        // Escaped character: backslash quotes the next char
1691                        // (quote removal — only the literal char survives).
1692                        // Preserve source/decoded alignment with a sentinel so
1693                        // downstream word decoding keeps later spans anchored.
1694                        Self::push_capture_char(&mut word, '\x00');
1695                        Self::push_capture_char(&mut word, next);
1696                        self.advance();
1697                        if next == '{'
1698                            && self.current_word_surface_is_single_char(start, &word, '{')
1699                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1700                        {
1701                            let mut depth = 1;
1702                            while let Some(c) = self.peek_char() {
1703                                Self::push_capture_char(&mut word, c);
1704                                self.advance();
1705                                match c {
1706                                    '{' => depth += 1,
1707                                    '}' => {
1708                                        depth -= 1;
1709                                        if depth == 0 {
1710                                            break;
1711                                        }
1712                                    }
1713                                    _ => {}
1714                                }
1715                            }
1716                        }
1717                    }
1718                } else {
1719                    Self::push_capture_char(&mut word, '\\');
1720                }
1721            } else if ch == '('
1722                && self.current_word_surface_ends_with_char(start, &word, '=')
1723                && self.looks_like_assoc_assign()
1724            {
1725                // Associative compound assignment: var=([k]="v" ...) — keep entire
1726                // (...) as part of word so declare -A m=([k]="v") stays one token.
1727                Self::push_capture_char(&mut word, ch);
1728                self.advance();
1729                let mut depth = 1;
1730                while let Some(c) = self.peek_char() {
1731                    Self::push_capture_char(&mut word, c);
1732                    self.advance();
1733                    match c {
1734                        '(' => depth += 1,
1735                        ')' => {
1736                            depth -= 1;
1737                            if depth == 0 {
1738                                break;
1739                            }
1740                        }
1741                        '"' => {
1742                            while let Some(qc) = self.peek_char() {
1743                                Self::push_capture_char(&mut word, qc);
1744                                self.advance();
1745                                if qc == '"' {
1746                                    break;
1747                                }
1748                                if qc == '\\'
1749                                    && let Some(esc) = self.peek_char()
1750                                {
1751                                    Self::push_capture_char(&mut word, esc);
1752                                    self.advance();
1753                                }
1754                            }
1755                        }
1756                        '\'' => {
1757                            while let Some(qc) = self.peek_char() {
1758                                Self::push_capture_char(&mut word, qc);
1759                                self.advance();
1760                                if qc == '\'' {
1761                                    break;
1762                                }
1763                            }
1764                        }
1765                        '\\' => {
1766                            if let Some(esc) = self.peek_char() {
1767                                Self::push_capture_char(&mut word, esc);
1768                                self.advance();
1769                            }
1770                        }
1771                        _ => {}
1772                    }
1773                }
1774            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1775            {
1776                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1777                // Consume through matching ) including nested parens
1778                Self::push_capture_char(&mut word, ch);
1779                self.advance();
1780                let mut depth = 1;
1781                while let Some(c) = self.peek_char() {
1782                    Self::push_capture_char(&mut word, c);
1783                    self.advance();
1784                    match c {
1785                        '(' => depth += 1,
1786                        ')' => {
1787                            depth -= 1;
1788                            if depth == 0 {
1789                                break;
1790                            }
1791                        }
1792                        '\\' => {
1793                            if let Some(esc) = self.peek_char() {
1794                                Self::push_capture_char(&mut word, esc);
1795                                self.advance();
1796                            }
1797                        }
1798                        _ => {}
1799                    }
1800                }
1801            } else if Self::is_plain_word_char(ch) {
1802                if self.reinject_buf.is_empty() {
1803                    let ascii_len = self.source_ascii_plain_word_len();
1804                    let chunk = if ascii_len > 0
1805                        && self
1806                            .cursor
1807                            .rest()
1808                            .as_bytes()
1809                            .get(ascii_len)
1810                            .is_none_or(|byte| byte.is_ascii())
1811                    {
1812                        self.consume_source_bytes(ascii_len);
1813                        &self.input[self.offset - ascii_len..self.offset]
1814                    } else {
1815                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1816                        self.advance_scanned_source_bytes(chunk.len());
1817                        chunk
1818                    };
1819                    Self::push_capture_str(&mut word, chunk);
1820                } else {
1821                    Self::push_capture_char(&mut word, ch);
1822                    self.advance();
1823                }
1824            } else {
1825                break;
1826            }
1827        }
1828
1829        if let Some(word) = word {
1830            let span = Some(Span::from_positions(start, self.current_position()));
1831            Ok(LexedWordSegment::owned_with_spans(
1832                LexedWordSegmentKind::Plain,
1833                word,
1834                span,
1835                span,
1836            ))
1837        } else {
1838            let end = self.current_position();
1839            Ok(LexedWordSegment::borrowed(
1840                LexedWordSegmentKind::Plain,
1841                &self.input[start.offset..self.offset],
1842                Some(Span::from_positions(start, end)),
1843            ))
1844        }
1845    }
1846
1847    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1848        let segment = match self.read_single_quoted_segment() {
1849            Ok(segment) => segment,
1850            Err(kind) => return Some(LexedToken::error(kind)),
1851        };
1852        let mut word = LexedWord::from_segment(segment);
1853        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1854            return Some(LexedToken::error(kind));
1855        }
1856
1857        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1858    }
1859
1860    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1861        debug_assert_eq!(self.peek_char(), Some('\''));
1862
1863        let wrapper_start = self.current_position();
1864        self.consume_ascii_chars(1); // consume opening '
1865        let content_start = self.current_position();
1866        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1867        let mut content_end = content_start;
1868        let mut content = String::with_capacity(16);
1869        let mut closed = false;
1870
1871        if can_borrow {
1872            let rest = self.cursor.rest();
1873            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1874                self.consume_source_bytes(quote_index);
1875                content_end = self.current_position();
1876                self.consume_ascii_chars(1); // consume closing '
1877                closed = true;
1878            } else {
1879                self.consume_source_bytes(rest.len());
1880            }
1881        }
1882
1883        while let Some(ch) = self.peek_char() {
1884            if closed {
1885                break;
1886            }
1887            if ch == '\'' {
1888                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1889                    if !can_borrow {
1890                        content.push('\'');
1891                    }
1892                    self.advance();
1893                    self.advance();
1894                    continue;
1895                }
1896                content_end = self.current_position();
1897                self.consume_ascii_chars(1); // consume closing '
1898                closed = true;
1899                break;
1900            }
1901            if !can_borrow {
1902                content.push(ch);
1903            }
1904            self.advance();
1905        }
1906
1907        if !closed {
1908            return Err(LexerErrorKind::SingleQuote);
1909        }
1910
1911        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1912        let content_span = Some(Span::from_positions(content_start, content_end));
1913
1914        if can_borrow {
1915            Ok(LexedWordSegment::borrowed_with_spans(
1916                LexedWordSegmentKind::SingleQuoted,
1917                &self.input[content_start.offset..content_end.offset],
1918                content_span,
1919                wrapper_span,
1920            ))
1921        } else {
1922            Ok(LexedWordSegment::owned_with_spans(
1923                LexedWordSegmentKind::SingleQuoted,
1924                content,
1925                content_span,
1926                wrapper_span,
1927            ))
1928        }
1929    }
1930
1931    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1932        let segment = match self.read_dollar_single_quoted_segment() {
1933            Ok(segment) => segment,
1934            Err(kind) => return Some(LexedToken::error(kind)),
1935        };
1936        let mut word = LexedWord::from_segment(segment);
1937        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1938            return Some(LexedToken::error(kind));
1939        }
1940
1941        let kind = if word.single_segment().is_some() {
1942            TokenKind::LiteralWord
1943        } else {
1944            TokenKind::Word
1945        };
1946
1947        Some(LexedToken::with_word_payload(kind, word))
1948    }
1949
1950    fn read_dollar_single_quoted_segment(
1951        &mut self,
1952    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1953        debug_assert_eq!(self.peek_char(), Some('$'));
1954        debug_assert_eq!(self.second_char(), Some('\''));
1955
1956        let wrapper_start = self.current_position();
1957        self.consume_ascii_chars(2); // consume $'
1958        let content_start = self.current_position();
1959        let mut out = String::with_capacity(16);
1960
1961        while let Some(ch) = self.peek_char() {
1962            if ch == '\'' {
1963                let content_end = self.current_position();
1964                self.advance();
1965                let wrapper_span =
1966                    Some(Span::from_positions(wrapper_start, self.current_position()));
1967                let content_span = Some(Span::from_positions(content_start, content_end));
1968                return Ok(LexedWordSegment::owned_with_spans(
1969                    LexedWordSegmentKind::DollarSingleQuoted,
1970                    out,
1971                    content_span,
1972                    wrapper_span,
1973                ));
1974            }
1975
1976            if ch == '\\' {
1977                self.advance();
1978                if let Some(esc) = self.peek_char() {
1979                    self.advance();
1980                    match esc {
1981                        'n' => out.push('\n'),
1982                        't' => out.push('\t'),
1983                        'r' => out.push('\r'),
1984                        'a' => out.push('\x07'),
1985                        'b' => out.push('\x08'),
1986                        'f' => out.push('\x0C'),
1987                        'v' => out.push('\x0B'),
1988                        'e' | 'E' => out.push('\x1B'),
1989                        '\\' => out.push('\\'),
1990                        '\'' => out.push('\''),
1991                        '"' => out.push('"'),
1992                        '?' => out.push('?'),
1993                        'c' => {
1994                            if let Some(control) = self.peek_char() {
1995                                self.advance();
1996                                out.push(((control as u32 & 0x1F) as u8) as char);
1997                            } else {
1998                                out.push('\\');
1999                                out.push('c');
2000                            }
2001                        }
2002                        'x' => {
2003                            let mut hex = String::new();
2004                            for _ in 0..2 {
2005                                if let Some(h) = self.peek_char() {
2006                                    if h.is_ascii_hexdigit() {
2007                                        hex.push(h);
2008                                        self.advance();
2009                                    } else {
2010                                        break;
2011                                    }
2012                                }
2013                            }
2014                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2015                                out.push(val as char);
2016                            }
2017                        }
2018                        'u' => {
2019                            let mut hex = String::new();
2020                            for _ in 0..4 {
2021                                if let Some(h) = self.peek_char() {
2022                                    if h.is_ascii_hexdigit() {
2023                                        hex.push(h);
2024                                        self.advance();
2025                                    } else {
2026                                        break;
2027                                    }
2028                                }
2029                            }
2030                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2031                                && let Some(c) = char::from_u32(val)
2032                            {
2033                                out.push(c);
2034                            }
2035                        }
2036                        'U' => {
2037                            let mut hex = String::new();
2038                            for _ in 0..8 {
2039                                if let Some(h) = self.peek_char() {
2040                                    if h.is_ascii_hexdigit() {
2041                                        hex.push(h);
2042                                        self.advance();
2043                                    } else {
2044                                        break;
2045                                    }
2046                                }
2047                            }
2048                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2049                                && let Some(c) = char::from_u32(val)
2050                            {
2051                                out.push(c);
2052                            }
2053                        }
2054                        '0'..='7' => {
2055                            let mut oct = String::new();
2056                            oct.push(esc);
2057                            for _ in 0..2 {
2058                                if let Some(o) = self.peek_char() {
2059                                    if o.is_ascii_digit() && o < '8' {
2060                                        oct.push(o);
2061                                        self.advance();
2062                                    } else {
2063                                        break;
2064                                    }
2065                                }
2066                            }
2067                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2068                                out.push(val as char);
2069                            }
2070                        }
2071                        _ => {
2072                            out.push('\\');
2073                            out.push(esc);
2074                        }
2075                    }
2076                } else {
2077                    out.push('\\');
2078                }
2079                continue;
2080            }
2081
2082            out.push(ch);
2083            self.advance();
2084        }
2085
2086        Err(LexerErrorKind::SingleQuote)
2087    }
2088
2089    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2090        let start = self.current_position();
2091
2092        if self.reinject_buf.is_empty() {
2093            let ascii_len = self.source_ascii_plain_word_len();
2094            let chunk = if ascii_len > 0
2095                && self
2096                    .cursor
2097                    .rest()
2098                    .as_bytes()
2099                    .get(ascii_len)
2100                    .is_none_or(|byte| byte.is_ascii())
2101            {
2102                self.consume_source_bytes(ascii_len);
2103                &self.input[start.offset..self.offset]
2104            } else {
2105                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2106                self.advance_scanned_source_bytes(chunk.len());
2107                chunk
2108            };
2109            if chunk.is_empty() {
2110                return None;
2111            }
2112
2113            let end = self.current_position();
2114            return Some(LexedWordSegment::borrowed(
2115                LexedWordSegmentKind::Plain,
2116                &self.input[start.offset..self.offset],
2117                Some(Span::from_positions(start, end)),
2118            ));
2119        }
2120
2121        let ch = self.peek_char()?;
2122        if !Self::is_plain_word_char(ch) {
2123            return None;
2124        }
2125
2126        let mut text = String::with_capacity(16);
2127        while let Some(ch) = self.peek_char() {
2128            if !Self::is_plain_word_char(ch) {
2129                break;
2130            }
2131            text.push(ch);
2132            self.advance();
2133        }
2134
2135        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2136    }
2137
2138    /// After a closing quote, read any adjacent quoted or unquoted word chars
2139    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2140    fn append_segmented_continuation(
2141        &mut self,
2142        word: &mut LexedWord<'a>,
2143    ) -> Result<(), LexerErrorKind> {
2144        loop {
2145            match self.peek_char() {
2146                Some('\\') if self.second_char() == Some('\n') => {
2147                    self.advance();
2148                    self.advance();
2149                    continue;
2150                }
2151                Some('\'') => {
2152                    word.push_segment(self.read_single_quoted_segment()?);
2153                }
2154                Some('"') => {
2155                    word.push_segment(self.read_double_quoted_segment()?);
2156                }
2157                Some('$') if self.second_char() == Some('\'') => {
2158                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2159                }
2160                Some('$') if self.second_char() == Some('"') => {
2161                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2162                }
2163                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2164                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2165                        unreachable!("peeked '(' should produce a suffix segment");
2166                    };
2167                    word.push_segment(segment);
2168                }
2169                _ => {
2170                    if let Some(segment) = self.read_plain_continuation_segment() {
2171                        word.push_segment(segment);
2172                        continue;
2173                    }
2174
2175                    let start = self.current_position();
2176                    let plain = self.read_unquoted_segment(start)?;
2177                    if plain.as_str().is_empty() {
2178                        break;
2179                    }
2180                    word.push_segment(plain);
2181                }
2182            }
2183        }
2184
2185        Ok(())
2186    }
2187
2188    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2189        debug_assert_eq!(self.peek_char(), Some('('));
2190
2191        let start = self.current_position();
2192        let mut depth = 0usize;
2193        let mut escaped = false;
2194        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2195
2196        while let Some(ch) = self.peek_char() {
2197            if let Some(text) = text.as_mut() {
2198                text.push(ch);
2199            }
2200            self.advance();
2201
2202            if escaped {
2203                escaped = false;
2204                continue;
2205            }
2206
2207            match ch {
2208                '\\' => escaped = true,
2209                '(' => depth += 1,
2210                ')' => {
2211                    depth = depth.saturating_sub(1);
2212                    if depth == 0 {
2213                        break;
2214                    }
2215                }
2216                _ => {}
2217            }
2218        }
2219
2220        let end = self.current_position();
2221        let span = Some(Span::from_positions(start, end));
2222        if let Some(text) = text {
2223            Some(LexedWordSegment::owned_with_spans(
2224                LexedWordSegmentKind::Plain,
2225                text,
2226                span,
2227                span,
2228            ))
2229        } else {
2230            Some(LexedWordSegment::borrowed_with_spans(
2231                LexedWordSegmentKind::Plain,
2232                &self.input[start.offset..end.offset],
2233                span,
2234                span,
2235            ))
2236        }
2237    }
2238
2239    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2240        self.read_double_quoted_word(false)
2241    }
2242
2243    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2244        self.read_double_quoted_word(true)
2245    }
2246
2247    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2248        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2249            Ok(segment) => segment,
2250            Err(kind) => return Some(LexedToken::error(kind)),
2251        };
2252        let mut word = LexedWord::from_segment(segment);
2253        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2254            return Some(LexedToken::error(kind));
2255        }
2256
2257        let kind = if word.single_segment().is_some() {
2258            TokenKind::QuotedWord
2259        } else {
2260            TokenKind::Word
2261        };
2262
2263        Some(LexedToken::with_word_payload(kind, word))
2264    }
2265
2266    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2267        self.read_double_quoted_segment_with_dollar(false)
2268    }
2269
2270    fn read_dollar_double_quoted_segment(
2271        &mut self,
2272    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2273        self.read_double_quoted_segment_with_dollar(true)
2274    }
2275
2276    fn read_double_quoted_segment_with_dollar(
2277        &mut self,
2278        dollar: bool,
2279    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2280        if dollar {
2281            debug_assert_eq!(self.peek_char(), Some('$'));
2282            debug_assert_eq!(self.second_char(), Some('"'));
2283        } else {
2284            debug_assert_eq!(self.peek_char(), Some('"'));
2285        }
2286
2287        let wrapper_start = self.current_position();
2288        if dollar {
2289            self.consume_ascii_chars(2); // consume $"
2290        } else {
2291            self.consume_ascii_chars(1); // consume opening "
2292        }
2293        let content_start = self.current_position();
2294        let mut content_end = content_start;
2295        let mut simple = self.reinject_buf.is_empty();
2296        let mut borrowable = self.reinject_buf.is_empty();
2297        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2298        let mut closed = false;
2299
2300        while let Some(ch) = self.peek_char() {
2301            if simple {
2302                if self.reinject_buf.is_empty() {
2303                    let rest = self.cursor.rest();
2304                    match Self::find_double_quote_special(rest) {
2305                        Some(index) if index > 0 => {
2306                            self.consume_source_bytes(index);
2307                            continue;
2308                        }
2309                        None => {
2310                            self.consume_source_bytes(rest.len());
2311                            return Err(LexerErrorKind::DoubleQuote);
2312                        }
2313                        _ => {}
2314                    }
2315                }
2316
2317                match ch {
2318                    '"' => {
2319                        content_end = self.current_position();
2320                        self.consume_ascii_chars(1); // consume closing "
2321                        closed = true;
2322                        break;
2323                    }
2324                    '\\' | '$' | '`' => {
2325                        simple = false;
2326                        if ch == '`' {
2327                            borrowable = false;
2328                            let capture_end = self.current_position();
2329                            self.ensure_capture_from_source(
2330                                &mut content,
2331                                content_start,
2332                                capture_end,
2333                            );
2334                        }
2335                    }
2336                    _ => {
2337                        self.advance();
2338                    }
2339                }
2340                if simple {
2341                    continue;
2342                }
2343            }
2344
2345            match ch {
2346                '"' => {
2347                    if borrowable {
2348                        content_end = self.current_position();
2349                    }
2350                    self.consume_ascii_chars(1); // consume closing "
2351                    closed = true;
2352                    break;
2353                }
2354                '\\' => {
2355                    let escape_start = self.current_position();
2356                    self.advance();
2357                    if let Some(next) = self.peek_char() {
2358                        match next {
2359                            '\n' => {
2360                                borrowable = false;
2361                                self.ensure_capture_from_source(
2362                                    &mut content,
2363                                    content_start,
2364                                    escape_start,
2365                                );
2366                                self.advance();
2367                            }
2368                            '$' => {
2369                                borrowable = false;
2370                                self.ensure_capture_from_source(
2371                                    &mut content,
2372                                    content_start,
2373                                    escape_start,
2374                                );
2375                                Self::push_capture_char(&mut content, '\x00');
2376                                Self::push_capture_char(&mut content, '$');
2377                                self.advance();
2378                            }
2379                            '"' | '\\' | '`' => {
2380                                borrowable = false;
2381                                self.ensure_capture_from_source(
2382                                    &mut content,
2383                                    content_start,
2384                                    escape_start,
2385                                );
2386                                if next == '\\' {
2387                                    Self::push_capture_char(&mut content, '\x00');
2388                                }
2389                                if next == '`' {
2390                                    Self::push_capture_char(&mut content, '\x00');
2391                                }
2392                                Self::push_capture_char(&mut content, next);
2393                                self.advance();
2394                                content_end = self.current_position();
2395                            }
2396                            _ => {
2397                                Self::push_capture_char(&mut content, '\\');
2398                                Self::push_capture_char(&mut content, next);
2399                                self.advance();
2400                                content_end = self.current_position();
2401                            }
2402                        }
2403                    }
2404                }
2405                '$' => {
2406                    Self::push_capture_char(&mut content, '$');
2407                    self.advance();
2408                    if self.peek_char() == Some('(') {
2409                        if self.second_char() == Some('(') {
2410                            self.read_arithmetic_expansion_into(&mut content);
2411                        } else {
2412                            Self::push_capture_char(&mut content, '(');
2413                            self.advance();
2414                            self.read_command_subst_into(&mut content);
2415                        }
2416                    } else if self.peek_char() == Some('{') {
2417                        Self::push_capture_char(&mut content, '{');
2418                        self.advance();
2419                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2420                    }
2421                    content_end = self.current_position();
2422                }
2423                '`' => {
2424                    borrowable = false;
2425                    let capture_end = self.current_position();
2426                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2427                    Self::push_capture_char(&mut content, '`');
2428                    self.advance(); // consume opening `
2429                    while let Some(c) = self.peek_char() {
2430                        Self::push_capture_char(&mut content, c);
2431                        self.advance();
2432                        if c == '`' {
2433                            break;
2434                        }
2435                        if c == '\\'
2436                            && let Some(next) = self.peek_char()
2437                        {
2438                            Self::push_capture_char(&mut content, next);
2439                            self.advance();
2440                        }
2441                    }
2442                    content_end = self.current_position();
2443                }
2444                _ => {
2445                    Self::push_capture_char(&mut content, ch);
2446                    self.advance();
2447                    content_end = self.current_position();
2448                }
2449            }
2450        }
2451
2452        if !closed {
2453            return Err(LexerErrorKind::DoubleQuote);
2454        }
2455
2456        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2457        let content_span = Some(Span::from_positions(content_start, content_end));
2458
2459        if borrowable {
2460            Ok(LexedWordSegment::borrowed_with_spans(
2461                if dollar {
2462                    LexedWordSegmentKind::DollarDoubleQuoted
2463                } else {
2464                    LexedWordSegmentKind::DoubleQuoted
2465                },
2466                &self.input[content_start.offset..content_end.offset],
2467                content_span,
2468                wrapper_span,
2469            ))
2470        } else {
2471            Ok(LexedWordSegment::owned_with_spans(
2472                if dollar {
2473                    LexedWordSegmentKind::DollarDoubleQuoted
2474                } else {
2475                    LexedWordSegmentKind::DoubleQuoted
2476                },
2477                content.unwrap_or_default(),
2478                content_span,
2479                wrapper_span,
2480            ))
2481        }
2482    }
2483
2484    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2485        debug_assert_eq!(self.peek_char(), Some('('));
2486        debug_assert_eq!(self.second_char(), Some('('));
2487
2488        Self::push_capture_char(content, '(');
2489        self.advance();
2490        Self::push_capture_char(content, '(');
2491        self.advance();
2492
2493        let mut depth = 2;
2494        while let Some(c) = self.peek_char() {
2495            match c {
2496                '\\' => {
2497                    Self::push_capture_char(content, c);
2498                    self.advance();
2499                    if let Some(next) = self.peek_char() {
2500                        Self::push_capture_char(content, next);
2501                        self.advance();
2502                    }
2503                }
2504                '\'' => {
2505                    Self::push_capture_char(content, c);
2506                    self.advance();
2507                    while let Some(quoted) = self.peek_char() {
2508                        Self::push_capture_char(content, quoted);
2509                        self.advance();
2510                        if quoted == '\'' {
2511                            break;
2512                        }
2513                    }
2514                }
2515                '"' => {
2516                    let mut escaped = false;
2517                    Self::push_capture_char(content, c);
2518                    self.advance();
2519                    while let Some(quoted) = self.peek_char() {
2520                        Self::push_capture_char(content, quoted);
2521                        self.advance();
2522                        if escaped {
2523                            escaped = false;
2524                            continue;
2525                        }
2526                        match quoted {
2527                            '\\' => escaped = true,
2528                            '"' => break,
2529                            _ => {}
2530                        }
2531                    }
2532                }
2533                '`' => {
2534                    let mut escaped = false;
2535                    Self::push_capture_char(content, c);
2536                    self.advance();
2537                    while let Some(quoted) = self.peek_char() {
2538                        Self::push_capture_char(content, quoted);
2539                        self.advance();
2540                        if escaped {
2541                            escaped = false;
2542                            continue;
2543                        }
2544                        match quoted {
2545                            '\\' => escaped = true,
2546                            '`' => break,
2547                            _ => {}
2548                        }
2549                    }
2550                }
2551                '(' => {
2552                    Self::push_capture_char(content, c);
2553                    self.advance();
2554                    depth += 1;
2555                }
2556                ')' => {
2557                    Self::push_capture_char(content, c);
2558                    self.advance();
2559                    depth -= 1;
2560                    if depth == 0 {
2561                        return true;
2562                    }
2563                }
2564                _ => {
2565                    Self::push_capture_char(content, c);
2566                    self.advance();
2567                }
2568            }
2569        }
2570
2571        false
2572    }
2573
2574    fn read_legacy_arithmetic_into(
2575        &mut self,
2576        content: &mut Option<String>,
2577        segment_start: Position,
2578    ) -> bool {
2579        let mut bracket_depth = 1;
2580
2581        while let Some(c) = self.peek_char() {
2582            match c {
2583                '\\' => {
2584                    Self::push_capture_char(content, c);
2585                    self.advance();
2586                    if let Some(next) = self.peek_char() {
2587                        Self::push_capture_char(content, next);
2588                        self.advance();
2589                    }
2590                }
2591                '\'' => {
2592                    Self::push_capture_char(content, c);
2593                    self.advance();
2594                    while let Some(quoted) = self.peek_char() {
2595                        Self::push_capture_char(content, quoted);
2596                        self.advance();
2597                        if quoted == '\'' {
2598                            break;
2599                        }
2600                    }
2601                }
2602                '"' => {
2603                    let mut escaped = false;
2604                    Self::push_capture_char(content, c);
2605                    self.advance();
2606                    while let Some(quoted) = self.peek_char() {
2607                        Self::push_capture_char(content, quoted);
2608                        self.advance();
2609                        if escaped {
2610                            escaped = false;
2611                            continue;
2612                        }
2613                        match quoted {
2614                            '\\' => escaped = true,
2615                            '"' => break,
2616                            _ => {}
2617                        }
2618                    }
2619                }
2620                '`' => {
2621                    let mut escaped = false;
2622                    Self::push_capture_char(content, c);
2623                    self.advance();
2624                    while let Some(quoted) = self.peek_char() {
2625                        Self::push_capture_char(content, quoted);
2626                        self.advance();
2627                        if escaped {
2628                            escaped = false;
2629                            continue;
2630                        }
2631                        match quoted {
2632                            '\\' => escaped = true,
2633                            '`' => break,
2634                            _ => {}
2635                        }
2636                    }
2637                }
2638                '[' => {
2639                    Self::push_capture_char(content, c);
2640                    self.advance();
2641                    bracket_depth += 1;
2642                }
2643                ']' => {
2644                    Self::push_capture_char(content, c);
2645                    self.advance();
2646                    bracket_depth -= 1;
2647                    if bracket_depth == 0 {
2648                        return true;
2649                    }
2650                }
2651                '$' => {
2652                    Self::push_capture_char(content, c);
2653                    self.advance();
2654                    if self.peek_char() == Some('(') {
2655                        if self.second_char() == Some('(') {
2656                            if !self.read_arithmetic_expansion_into(content) {
2657                                return false;
2658                            }
2659                        } else {
2660                            Self::push_capture_char(content, '(');
2661                            self.advance();
2662                            if !self.read_command_subst_into(content) {
2663                                return false;
2664                            }
2665                        }
2666                    } else if self.peek_char() == Some('{') {
2667                        Self::push_capture_char(content, '{');
2668                        self.advance();
2669                        if !self.read_param_expansion_into(content, segment_start) {
2670                            return false;
2671                        }
2672                    } else if self.peek_char() == Some('[') {
2673                        Self::push_capture_char(content, '[');
2674                        self.advance();
2675                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2676                            return false;
2677                        }
2678                    }
2679                }
2680                _ => {
2681                    Self::push_capture_char(content, c);
2682                    self.advance();
2683                }
2684            }
2685        }
2686
2687        false
2688    }
2689
2690    /// Read command substitution content after `$(`, handling nested parens and quotes.
2691    /// Appends chars to `content` and adds the closing `)`.
2692    /// `subst_depth` tracks nesting to prevent stack overflow.
2693    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2694        self.read_command_subst_into_depth(content, 0)
2695    }
2696
2697    fn flush_command_subst_keyword(
2698        current_word: &mut String,
2699        pending_case_headers: &mut usize,
2700        case_clause_depths: &mut SmallVec<[usize; 4]>,
2701        depth: usize,
2702        word_started_at_command_start: &mut bool,
2703    ) {
2704        if current_word.is_empty() {
2705            *word_started_at_command_start = false;
2706            return;
2707        }
2708
2709        match current_word.as_str() {
2710            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2711            "in" if *pending_case_headers > 0 => {
2712                *pending_case_headers -= 1;
2713                case_clause_depths.push(depth);
2714            }
2715            "esac" if *word_started_at_command_start => {
2716                case_clause_depths.pop();
2717            }
2718            _ => {}
2719        }
2720
2721        current_word.clear();
2722        *word_started_at_command_start = false;
2723    }
2724
2725    fn read_command_subst_heredoc_delimiter_into(
2726        &mut self,
2727        content: &mut Option<String>,
2728    ) -> Option<String> {
2729        while let Some(ch) = self.peek_char() {
2730            if !matches!(ch, ' ' | '\t') {
2731                break;
2732            }
2733            Self::push_capture_char(content, ch);
2734            self.advance();
2735        }
2736
2737        let mut cooked = String::new();
2738        let mut in_single = false;
2739        let mut in_double = false;
2740        let mut escaped = false;
2741        let mut saw_any = false;
2742
2743        while let Some(ch) = self.peek_char() {
2744            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2745                break;
2746            }
2747
2748            saw_any = true;
2749            Self::push_capture_char(content, ch);
2750            self.advance();
2751
2752            if escaped {
2753                cooked.push(ch);
2754                escaped = false;
2755                continue;
2756            }
2757
2758            match ch {
2759                '\\' if !in_single => escaped = true,
2760                '\'' if !in_double => in_single = !in_single,
2761                '"' if !in_single => in_double = !in_double,
2762                _ => cooked.push(ch),
2763            }
2764        }
2765
2766        saw_any.then_some(cooked)
2767    }
2768
2769    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2770        Self::push_capture_char(content, '`');
2771        self.advance();
2772        while let Some(ch) = self.peek_char() {
2773            Self::push_capture_char(content, ch);
2774            self.advance();
2775            if ch == '\\' {
2776                if let Some(esc) = self.peek_char() {
2777                    Self::push_capture_char(content, esc);
2778                    self.advance();
2779                }
2780                continue;
2781            }
2782            if ch == '`' {
2783                break;
2784            }
2785        }
2786    }
2787
2788    fn read_command_subst_pending_heredoc_into(
2789        &mut self,
2790        content: &mut Option<String>,
2791        delimiter: &str,
2792        strip_tabs: bool,
2793    ) -> bool {
2794        loop {
2795            let mut line = String::new();
2796            let mut saw_newline = false;
2797
2798            while let Some(ch) = self.peek_char() {
2799                self.advance();
2800                if ch == '\n' {
2801                    saw_newline = true;
2802                    break;
2803                }
2804                line.push(ch);
2805            }
2806
2807            Self::push_capture_str(content, &line);
2808            if saw_newline {
2809                Self::push_capture_char(content, '\n');
2810            }
2811
2812            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2813                return true;
2814            }
2815        }
2816    }
2817
2818    fn read_command_subst_into_depth(
2819        &mut self,
2820        content: &mut Option<String>,
2821        subst_depth: usize,
2822    ) -> bool {
2823        if subst_depth >= self.max_subst_depth {
2824            // Depth limit exceeded — consume until matching ')' and emit error token
2825            let mut depth = 1;
2826            while let Some(c) = self.peek_char() {
2827                self.advance();
2828                match c {
2829                    '(' => depth += 1,
2830                    ')' => {
2831                        depth -= 1;
2832                        if depth == 0 {
2833                            Self::push_capture_char(content, ')');
2834                            return true;
2835                        }
2836                    }
2837                    _ => {}
2838                }
2839            }
2840            return false;
2841        }
2842
2843        let mut depth = 1;
2844        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2845        let mut pending_case_headers = 0usize;
2846        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2847        let mut current_word = String::with_capacity(16);
2848        let mut at_command_start = true;
2849        let mut expecting_redirection_target = false;
2850        let mut current_word_started_at_command_start = false;
2851        while let Some(c) = self.peek_char() {
2852            match c {
2853                '#' if !self.should_treat_hash_as_word_char() => {
2854                    let had_word = !current_word.is_empty();
2855                    Self::flush_command_subst_keyword(
2856                        &mut current_word,
2857                        &mut pending_case_headers,
2858                        &mut case_clause_depths,
2859                        depth,
2860                        &mut current_word_started_at_command_start,
2861                    );
2862                    if had_word && expecting_redirection_target {
2863                        expecting_redirection_target = false;
2864                    }
2865                    Self::push_capture_char(content, '#');
2866                    self.advance();
2867                    while let Some(comment_ch) = self.peek_char() {
2868                        Self::push_capture_char(content, comment_ch);
2869                        self.advance();
2870                        if comment_ch == '\n' {
2871                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2872                                if !self.read_command_subst_pending_heredoc_into(
2873                                    content, &delimiter, strip_tabs,
2874                                ) {
2875                                    return false;
2876                                }
2877                            }
2878                            at_command_start = true;
2879                            expecting_redirection_target = false;
2880                            break;
2881                        }
2882                    }
2883                }
2884                '(' => {
2885                    Self::flush_command_subst_keyword(
2886                        &mut current_word,
2887                        &mut pending_case_headers,
2888                        &mut case_clause_depths,
2889                        depth,
2890                        &mut current_word_started_at_command_start,
2891                    );
2892                    depth += 1;
2893                    Self::push_capture_char(content, c);
2894                    self.advance();
2895                    at_command_start = true;
2896                    expecting_redirection_target = false;
2897                }
2898                ')' => {
2899                    Self::flush_command_subst_keyword(
2900                        &mut current_word,
2901                        &mut pending_case_headers,
2902                        &mut case_clause_depths,
2903                        depth,
2904                        &mut current_word_started_at_command_start,
2905                    );
2906                    if case_clause_depths
2907                        .last()
2908                        .is_some_and(|case_depth| *case_depth == depth)
2909                    {
2910                        Self::push_capture_char(content, ')');
2911                        self.advance();
2912                        at_command_start = true;
2913                        expecting_redirection_target = false;
2914                        continue;
2915                    }
2916                    depth -= 1;
2917                    self.advance();
2918                    if depth == 0 {
2919                        Self::push_capture_char(content, ')');
2920                        return true;
2921                    }
2922                    Self::push_capture_char(content, c);
2923                    at_command_start = false;
2924                    expecting_redirection_target = false;
2925                }
2926                '"' => {
2927                    let had_word = !current_word.is_empty();
2928                    Self::flush_command_subst_keyword(
2929                        &mut current_word,
2930                        &mut pending_case_headers,
2931                        &mut case_clause_depths,
2932                        depth,
2933                        &mut current_word_started_at_command_start,
2934                    );
2935                    if had_word && expecting_redirection_target {
2936                        expecting_redirection_target = false;
2937                    }
2938                    // Nested double-quoted string inside $()
2939                    Self::push_capture_char(content, '"');
2940                    self.advance();
2941                    while let Some(qc) = self.peek_char() {
2942                        match qc {
2943                            '"' => {
2944                                Self::push_capture_char(content, '"');
2945                                self.advance();
2946                                break;
2947                            }
2948                            '\\' => {
2949                                Self::push_capture_char(content, '\\');
2950                                self.advance();
2951                                if let Some(esc) = self.peek_char() {
2952                                    Self::push_capture_char(content, esc);
2953                                    self.advance();
2954                                }
2955                            }
2956                            '$' => {
2957                                Self::push_capture_char(content, '$');
2958                                self.advance();
2959                                if self.peek_char() == Some('(') {
2960                                    if self.second_char() == Some('(') {
2961                                        if !self.read_arithmetic_expansion_into(content) {
2962                                            return false;
2963                                        }
2964                                    } else {
2965                                        Self::push_capture_char(content, '(');
2966                                        self.advance();
2967                                        if !self
2968                                            .read_command_subst_into_depth(content, subst_depth + 1)
2969                                        {
2970                                            return false;
2971                                        }
2972                                    }
2973                                }
2974                            }
2975                            _ => {
2976                                Self::push_capture_char(content, qc);
2977                                self.advance();
2978                            }
2979                        }
2980                    }
2981                    if expecting_redirection_target {
2982                        expecting_redirection_target = false;
2983                    } else {
2984                        at_command_start = false;
2985                    }
2986                }
2987                '\'' => {
2988                    let had_word = !current_word.is_empty();
2989                    Self::flush_command_subst_keyword(
2990                        &mut current_word,
2991                        &mut pending_case_headers,
2992                        &mut case_clause_depths,
2993                        depth,
2994                        &mut current_word_started_at_command_start,
2995                    );
2996                    if had_word && expecting_redirection_target {
2997                        expecting_redirection_target = false;
2998                    }
2999                    // Single-quoted string inside $()
3000                    Self::push_capture_char(content, '\'');
3001                    self.advance();
3002                    while let Some(qc) = self.peek_char() {
3003                        Self::push_capture_char(content, qc);
3004                        self.advance();
3005                        if qc == '\'' {
3006                            break;
3007                        }
3008                    }
3009                    if expecting_redirection_target {
3010                        expecting_redirection_target = false;
3011                    } else {
3012                        at_command_start = false;
3013                    }
3014                }
3015                '`' => {
3016                    let had_word = !current_word.is_empty();
3017                    Self::flush_command_subst_keyword(
3018                        &mut current_word,
3019                        &mut pending_case_headers,
3020                        &mut case_clause_depths,
3021                        depth,
3022                        &mut current_word_started_at_command_start,
3023                    );
3024                    if had_word && expecting_redirection_target {
3025                        expecting_redirection_target = false;
3026                    }
3027                    self.read_command_subst_backtick_segment_into(content);
3028                    if expecting_redirection_target {
3029                        expecting_redirection_target = false;
3030                    } else {
3031                        at_command_start = false;
3032                    }
3033                }
3034                '$' if self.second_char() == Some('\'') => {
3035                    let had_word = !current_word.is_empty();
3036                    Self::flush_command_subst_keyword(
3037                        &mut current_word,
3038                        &mut pending_case_headers,
3039                        &mut case_clause_depths,
3040                        depth,
3041                        &mut current_word_started_at_command_start,
3042                    );
3043                    if had_word && expecting_redirection_target {
3044                        expecting_redirection_target = false;
3045                    }
3046                    Self::push_capture_char(content, '$');
3047                    self.advance();
3048                    Self::push_capture_char(content, '\'');
3049                    self.advance();
3050                    while let Some(qc) = self.peek_char() {
3051                        Self::push_capture_char(content, qc);
3052                        self.advance();
3053                        if qc == '\\' {
3054                            if let Some(esc) = self.peek_char() {
3055                                Self::push_capture_char(content, esc);
3056                                self.advance();
3057                            }
3058                            continue;
3059                        }
3060                        if qc == '\'' {
3061                            break;
3062                        }
3063                    }
3064                    if expecting_redirection_target {
3065                        expecting_redirection_target = false;
3066                    } else {
3067                        at_command_start = false;
3068                    }
3069                }
3070                '\\' => {
3071                    let had_word = !current_word.is_empty();
3072                    Self::flush_command_subst_keyword(
3073                        &mut current_word,
3074                        &mut pending_case_headers,
3075                        &mut case_clause_depths,
3076                        depth,
3077                        &mut current_word_started_at_command_start,
3078                    );
3079                    if had_word && expecting_redirection_target {
3080                        expecting_redirection_target = false;
3081                    }
3082                    Self::push_capture_char(content, '\\');
3083                    self.advance();
3084                    if let Some(esc) = self.peek_char() {
3085                        Self::push_capture_char(content, esc);
3086                        self.advance();
3087                    }
3088                    if expecting_redirection_target {
3089                        expecting_redirection_target = false;
3090                    } else {
3091                        at_command_start = false;
3092                    }
3093                }
3094                '<' if self.second_char() == Some('<') => {
3095                    let word_was_redirection_fd = current_word_started_at_command_start
3096                        && !current_word.is_empty()
3097                        && current_word.chars().all(|current| current.is_ascii_digit());
3098                    Self::flush_command_subst_keyword(
3099                        &mut current_word,
3100                        &mut pending_case_headers,
3101                        &mut case_clause_depths,
3102                        depth,
3103                        &mut current_word_started_at_command_start,
3104                    );
3105                    if word_was_redirection_fd {
3106                        at_command_start = true;
3107                    }
3108
3109                    Self::push_capture_char(content, '<');
3110                    self.advance();
3111                    Self::push_capture_char(content, '<');
3112                    self.advance();
3113
3114                    if self.peek_char() == Some('<') {
3115                        Self::push_capture_char(content, '<');
3116                        self.advance();
3117                        expecting_redirection_target = true;
3118                        continue;
3119                    }
3120
3121                    let strip_tabs = if self.peek_char() == Some('-') {
3122                        Self::push_capture_char(content, '-');
3123                        self.advance();
3124                        true
3125                    } else {
3126                        false
3127                    };
3128
3129                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3130                    {
3131                        pending_heredocs.push((delimiter, strip_tabs));
3132                        expecting_redirection_target = false;
3133                    } else {
3134                        expecting_redirection_target = true;
3135                    }
3136                }
3137                '>' | '<' => {
3138                    let word_was_redirection_fd = current_word_started_at_command_start
3139                        && !current_word.is_empty()
3140                        && current_word.chars().all(|current| current.is_ascii_digit());
3141                    Self::flush_command_subst_keyword(
3142                        &mut current_word,
3143                        &mut pending_case_headers,
3144                        &mut case_clause_depths,
3145                        depth,
3146                        &mut current_word_started_at_command_start,
3147                    );
3148                    if word_was_redirection_fd {
3149                        at_command_start = true;
3150                    }
3151                    Self::push_capture_char(content, c);
3152                    self.advance();
3153                    expecting_redirection_target = true;
3154                }
3155                '\n' => {
3156                    Self::flush_command_subst_keyword(
3157                        &mut current_word,
3158                        &mut pending_case_headers,
3159                        &mut case_clause_depths,
3160                        depth,
3161                        &mut current_word_started_at_command_start,
3162                    );
3163                    Self::push_capture_char(content, '\n');
3164                    self.advance();
3165                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3166                        if !self.read_command_subst_pending_heredoc_into(
3167                            content, &delimiter, strip_tabs,
3168                        ) {
3169                            return false;
3170                        }
3171                    }
3172                    at_command_start = true;
3173                    expecting_redirection_target = false;
3174                }
3175                _ => {
3176                    if c.is_ascii_alphanumeric() || c == '_' {
3177                        if current_word.is_empty()
3178                            && !expecting_redirection_target
3179                            && at_command_start
3180                        {
3181                            current_word_started_at_command_start = true;
3182                            at_command_start = false;
3183                        }
3184                        current_word.push(c);
3185                    } else {
3186                        let had_word = !current_word.is_empty();
3187                        Self::flush_command_subst_keyword(
3188                            &mut current_word,
3189                            &mut pending_case_headers,
3190                            &mut case_clause_depths,
3191                            depth,
3192                            &mut current_word_started_at_command_start,
3193                        );
3194                        if had_word && expecting_redirection_target {
3195                            expecting_redirection_target = false;
3196                        }
3197                        match c {
3198                            ' ' | '\t' => {}
3199                            ';' | '|' | '&' => {
3200                                at_command_start = true;
3201                                expecting_redirection_target = false;
3202                            }
3203                            _ => {
3204                                if !expecting_redirection_target {
3205                                    at_command_start = false;
3206                                }
3207                            }
3208                        }
3209                    }
3210                    Self::push_capture_char(content, c);
3211                    self.advance();
3212                }
3213            }
3214        }
3215
3216        false
3217    }
3218
3219    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3220    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3221    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3222    fn read_param_expansion_into(
3223        &mut self,
3224        content: &mut Option<String>,
3225        segment_start: Position,
3226    ) -> bool {
3227        let mut borrowable = true;
3228        let mut depth = 1;
3229        let mut literal_brace_depth = 0usize;
3230        let mut in_single = false;
3231        let mut in_double = false;
3232        let mut double_quote_depth = 0usize;
3233        while let Some(c) = self.peek_char() {
3234            if in_single {
3235                match c {
3236                    '\\' => {
3237                        let escape_start = self.current_position();
3238                        if self.second_char() == Some('"') {
3239                            self.advance();
3240                            borrowable = false;
3241                            self.ensure_capture_from_source(content, segment_start, escape_start);
3242                            Self::push_capture_char(content, '"');
3243                            self.advance();
3244                        } else {
3245                            Self::push_capture_char(content, '\\');
3246                            self.advance();
3247                        }
3248                    }
3249                    '\'' => {
3250                        Self::push_capture_char(content, c);
3251                        self.advance();
3252                        in_single = false;
3253                    }
3254                    _ => {
3255                        Self::push_capture_char(content, c);
3256                        self.advance();
3257                    }
3258                }
3259                continue;
3260            }
3261
3262            match c {
3263                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3264                    self.advance();
3265                    Self::push_capture_char(content, '}');
3266                    if depth == 1
3267                        && literal_brace_depth > 0
3268                        && self.has_later_top_level_param_expansion_closer(depth)
3269                    {
3270                        literal_brace_depth -= 1;
3271                        continue;
3272                    }
3273                    depth -= 1;
3274                    if depth == 0 {
3275                        break;
3276                    }
3277                }
3278                '{' if !in_single && !in_double => {
3279                    literal_brace_depth += 1;
3280                    Self::push_capture_char(content, '{');
3281                    self.advance();
3282                }
3283                '"' => {
3284                    // Quotes inside ${...} are part of the expansion, not string delimiters
3285                    Self::push_capture_char(content, '"');
3286                    self.advance();
3287                    in_double = !in_double;
3288                    double_quote_depth = if in_double { depth } else { 0 };
3289                }
3290                '\'' => {
3291                    Self::push_capture_char(content, '\'');
3292                    self.advance();
3293                    if !in_double {
3294                        in_single = true;
3295                    }
3296                }
3297                '\\' => {
3298                    // Inside ${...} within double quotes, same escape rules apply:
3299                    // \", \\, \$, \` produce the escaped char; others keep backslash
3300                    let escape_start = self.current_position();
3301                    self.advance();
3302                    if let Some(esc) = self.peek_char() {
3303                        match esc {
3304                            '$' => {
3305                                borrowable = false;
3306                                self.ensure_capture_from_source(
3307                                    content,
3308                                    segment_start,
3309                                    escape_start,
3310                                );
3311                                Self::push_capture_char(content, '\x00');
3312                                Self::push_capture_char(content, '$');
3313                                self.advance();
3314                            }
3315                            '"' | '\\' | '`' => {
3316                                borrowable = false;
3317                                self.ensure_capture_from_source(
3318                                    content,
3319                                    segment_start,
3320                                    escape_start,
3321                                );
3322                                Self::push_capture_char(content, esc);
3323                                self.advance();
3324                            }
3325                            '}' => {
3326                                // \} should be a literal } without closing the expansion
3327                                Self::push_capture_char(content, '\\');
3328                                Self::push_capture_char(content, '}');
3329                                self.advance();
3330                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3331                            }
3332                            _ => {
3333                                Self::push_capture_char(content, '\\');
3334                                Self::push_capture_char(content, esc);
3335                                self.advance();
3336                            }
3337                        }
3338                    } else {
3339                        Self::push_capture_char(content, '\\');
3340                    }
3341                }
3342                '$' => {
3343                    Self::push_capture_char(content, '$');
3344                    self.advance();
3345                    if self.peek_char() == Some('(') {
3346                        if self.second_char() == Some('(') {
3347                            if !self.read_arithmetic_expansion_into(content) {
3348                                borrowable = false;
3349                            }
3350                        } else {
3351                            Self::push_capture_char(content, '(');
3352                            self.advance();
3353                            self.read_command_subst_into(content);
3354                        }
3355                    } else if self.peek_char() == Some('{') {
3356                        Self::push_capture_char(content, '{');
3357                        self.advance();
3358                        borrowable &= self.read_param_expansion_into(content, segment_start);
3359                    }
3360                }
3361                _ => {
3362                    Self::push_capture_char(content, c);
3363                    self.advance();
3364                }
3365            }
3366        }
3367        borrowable
3368    }
3369
3370    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3371        let mut chars = self.lookahead_chars().peekable();
3372        let mut depth = target_depth;
3373        let mut in_single = false;
3374        let mut in_double = false;
3375        let mut double_quote_depth = 0usize;
3376
3377        while let Some(ch) = chars.next() {
3378            if in_single {
3379                match ch {
3380                    '\'' => in_single = false,
3381                    '\\' if chars.peek() == Some(&'"') => {
3382                        chars.next();
3383                    }
3384                    '\\' => {}
3385                    _ => {}
3386                }
3387                continue;
3388            }
3389
3390            if in_double {
3391                match ch {
3392                    '"' => {
3393                        in_double = false;
3394                        double_quote_depth = 0;
3395                    }
3396                    '\\' => {
3397                        chars.next();
3398                    }
3399                    '$' if chars.peek() == Some(&'{') => {
3400                        chars.next();
3401                        depth += 1;
3402                    }
3403                    '}' if depth > double_quote_depth => {
3404                        depth -= 1;
3405                    }
3406                    _ => {}
3407                }
3408                continue;
3409            }
3410
3411            match ch {
3412                '\n' if depth == target_depth => return false,
3413                '\'' => in_single = true,
3414                '"' => {
3415                    in_double = true;
3416                    double_quote_depth = depth;
3417                }
3418                '\\' => {
3419                    chars.next();
3420                }
3421                '$' if chars.peek() == Some(&'{') => {
3422                    chars.next();
3423                    depth += 1;
3424                }
3425                '}' => {
3426                    if depth == target_depth {
3427                        return true;
3428                    }
3429                    depth -= 1;
3430                }
3431                _ => {}
3432            }
3433        }
3434
3435        false
3436    }
3437
3438    /// Check if the content starting with { looks like a brace expansion
3439    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3440    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3441    /// Caps lookahead to prevent O(n^2) scanning when input
3442    /// contains many unmatched `{` characters (issue #997).
3443    fn looks_like_brace_expansion(&self) -> bool {
3444        const MAX_LOOKAHEAD: usize = 10_000;
3445
3446        let mut chars = self.lookahead_chars();
3447
3448        // Skip the opening {
3449        if chars.next() != Some('{') {
3450            return false;
3451        }
3452
3453        let mut depth = 1;
3454        let mut paren_depth = 0usize;
3455        let mut has_comma = false;
3456        let mut has_dot_dot = false;
3457        let mut escaped = false;
3458        let mut in_single = false;
3459        let mut in_double = false;
3460        let mut in_backtick = false;
3461        let mut prev_char = None;
3462        let mut scanned = 0usize;
3463
3464        for ch in chars {
3465            scanned += 1;
3466            if scanned > MAX_LOOKAHEAD {
3467                return false;
3468            }
3469
3470            let brace_surface_active = !in_single && !in_double && !in_backtick;
3471            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3472
3473            match ch {
3474                _ if escaped => {
3475                    escaped = false;
3476                }
3477                '\\' if !in_single => escaped = true,
3478                '\'' if !in_double && !in_backtick => in_single = !in_single,
3479                '"' if !in_single && !in_backtick => in_double = !in_double,
3480                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3481                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3482                    paren_depth += 1
3483                }
3484                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3485                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3486                '}' if !in_single && !in_double && !in_backtick => {
3487                    depth -= 1;
3488                    if depth == 0 {
3489                        // Found matching }, check if we have brace expansion markers
3490                        return has_comma || has_dot_dot;
3491                    }
3492                }
3493                ',' if at_top_level => has_comma = true,
3494                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3495                // Brace groups have whitespace/newlines/semicolons at depth 1
3496                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3497                _ => {}
3498            }
3499            prev_char = Some(ch);
3500        }
3501
3502        false
3503    }
3504
3505    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3506        let mut brace_depth = 1usize;
3507        let mut paren_depth = 0usize;
3508        let mut escaped = false;
3509        let mut in_single = false;
3510        let mut in_double = false;
3511        let mut in_backtick = false;
3512        let mut prev_char = None;
3513
3514        while let Some(ch) = self.peek_char() {
3515            Self::push_capture_char(word, ch);
3516            self.advance();
3517
3518            if escaped {
3519                escaped = false;
3520                prev_char = Some(ch);
3521                continue;
3522            }
3523
3524            match ch {
3525                '\\' if !in_single => escaped = true,
3526                '\'' if !in_double && !in_backtick => in_single = !in_single,
3527                '"' if !in_single && !in_backtick => in_double = !in_double,
3528                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3529                '(' if !in_single
3530                    && !in_double
3531                    && !in_backtick
3532                    && (paren_depth > 0 || prev_char == Some('$')) =>
3533                {
3534                    paren_depth += 1
3535                }
3536                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3537                    paren_depth -= 1
3538                }
3539                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3540                '}' if !in_single && !in_double && !in_backtick => {
3541                    brace_depth -= 1;
3542                    if brace_depth == 0 {
3543                        break;
3544                    }
3545                }
3546                _ => {}
3547            }
3548
3549            prev_char = Some(ch);
3550        }
3551    }
3552
3553    fn consume_brace_word_body(&mut self, word: &mut String) {
3554        let mut brace_depth = 1usize;
3555        let mut paren_depth = 0usize;
3556        let mut escaped = false;
3557        let mut in_single = false;
3558        let mut in_double = false;
3559        let mut in_backtick = false;
3560        let mut prev_char = None;
3561
3562        while let Some(ch) = self.peek_char() {
3563            word.push(ch);
3564            self.advance();
3565
3566            if escaped {
3567                escaped = false;
3568                prev_char = Some(ch);
3569                continue;
3570            }
3571
3572            match ch {
3573                '\\' if !in_single => escaped = true,
3574                '\'' if !in_double && !in_backtick => in_single = !in_single,
3575                '"' if !in_single && !in_backtick => in_double = !in_double,
3576                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3577                '(' if !in_single
3578                    && !in_double
3579                    && !in_backtick
3580                    && (paren_depth > 0 || prev_char == Some('$')) =>
3581                {
3582                    paren_depth += 1
3583                }
3584                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3585                    paren_depth -= 1
3586                }
3587                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3588                '}' if !in_single && !in_double && !in_backtick => {
3589                    brace_depth -= 1;
3590                    if brace_depth == 0 {
3591                        break;
3592                    }
3593                }
3594                _ => {}
3595            }
3596
3597            prev_char = Some(ch);
3598        }
3599    }
3600
3601    /// Check whether a mid-word `{...}` segment can stay attached to the current
3602    /// word without crossing a top-level word boundary.
3603    fn looks_like_mid_word_brace_segment(&self) -> bool {
3604        const MAX_LOOKAHEAD: usize = 10_000;
3605
3606        let mut chars = self.lookahead_chars();
3607        if chars.next() != Some('{') {
3608            return false;
3609        }
3610
3611        let mut brace_depth = 1;
3612        let mut paren_depth = 0usize;
3613        let mut escaped = false;
3614        let mut in_single = false;
3615        let mut in_double = false;
3616        let mut in_backtick = false;
3617        let mut prev_char = None;
3618        let mut scanned = 0usize;
3619
3620        for ch in chars {
3621            scanned += 1;
3622            if scanned > MAX_LOOKAHEAD {
3623                return false;
3624            }
3625
3626            if !in_single
3627                && !in_double
3628                && !in_backtick
3629                && !escaped
3630                && brace_depth == 1
3631                && paren_depth == 0
3632                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3633            {
3634                return false;
3635            }
3636
3637            if escaped {
3638                escaped = false;
3639                prev_char = Some(ch);
3640                continue;
3641            }
3642
3643            match ch {
3644                '\\' => escaped = true,
3645                '\'' if !in_double && !in_backtick => in_single = !in_single,
3646                '"' if !in_single && !in_backtick => in_double = !in_double,
3647                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3648                '(' if !in_single
3649                    && !in_double
3650                    && !in_backtick
3651                    && (paren_depth > 0 || prev_char == Some('$')) =>
3652                {
3653                    paren_depth += 1
3654                }
3655                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3656                    paren_depth -= 1
3657                }
3658                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3659                '}' if !in_single && !in_double && !in_backtick => {
3660                    brace_depth -= 1;
3661                    if brace_depth == 0 {
3662                        return true;
3663                    }
3664                }
3665                _ => {}
3666            }
3667
3668            prev_char = Some(ch);
3669        }
3670
3671        false
3672    }
3673
3674    /// Check if { is followed by whitespace (brace group start)
3675    fn is_brace_group_start(&self) -> bool {
3676        let mut chars = self.lookahead_chars();
3677        // Skip the opening {
3678        if chars.next() != Some('{') {
3679            return false;
3680        }
3681        // If next char is whitespace or newline, it's a brace group
3682        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3683    }
3684
3685    /// Check whether the text after an escaped `{` looks like a brace-expansion
3686    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3687    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3688        const MAX_LOOKAHEAD: usize = 10_000;
3689
3690        let mut chars = self.lookahead_chars();
3691        let mut depth = 1;
3692        let mut has_comma = false;
3693        let mut has_dot_dot = false;
3694        let mut prev_char = None;
3695        let mut scanned = 0usize;
3696
3697        for ch in chars.by_ref() {
3698            scanned += 1;
3699            if scanned > MAX_LOOKAHEAD {
3700                return false;
3701            }
3702            match ch {
3703                '{' => depth += 1,
3704                '}' => {
3705                    depth -= 1;
3706                    if depth == 0 {
3707                        return has_comma || has_dot_dot;
3708                    }
3709                }
3710                ',' if depth == 1 => has_comma = true,
3711                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3712                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3713                _ => {}
3714            }
3715            prev_char = Some(ch);
3716        }
3717
3718        false
3719    }
3720
3721    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3722        let mut chars = self.lookahead_chars();
3723        if chars.next() != Some('{') {
3724            return false;
3725        }
3726        chars.next() == Some(')')
3727    }
3728
3729    /// Read a {literal} pattern without comma/dot-dot as a word
3730    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3731        let mut word = String::with_capacity(16);
3732
3733        if let Some('{') = self.peek_char() {
3734            word.push('{');
3735            self.advance();
3736        } else {
3737            return None;
3738        }
3739
3740        self.consume_brace_word_body(&mut word);
3741
3742        while let Some(ch) = self.peek_char() {
3743            if Self::is_word_char(ch) {
3744                if self.reinject_buf.is_empty() {
3745                    let chunk = self.cursor.eat_while(Self::is_word_char);
3746                    word.push_str(chunk);
3747                    self.advance_scanned_source_bytes(chunk.len());
3748                } else {
3749                    word.push(ch);
3750                    self.advance();
3751                }
3752            } else {
3753                break;
3754            }
3755        }
3756
3757        Some(LexedToken::owned_word(TokenKind::Word, word))
3758    }
3759
3760    /// Read a brace expansion pattern as a word
3761    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3762        let mut word = String::with_capacity(16);
3763
3764        // Read the opening {
3765        if let Some('{') = self.peek_char() {
3766            word.push('{');
3767            self.advance();
3768        } else {
3769            return None;
3770        }
3771
3772        // Read until matching }
3773        self.consume_brace_word_body(&mut word);
3774
3775        // Continue reading any suffix after the brace pattern
3776        while let Some(ch) = self.peek_char() {
3777            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3778                if ch == '{' {
3779                    // Another brace pattern - include it
3780                    word.push(ch);
3781                    self.advance();
3782                    self.consume_brace_word_body(&mut word);
3783                } else {
3784                    word.push(ch);
3785                    self.advance();
3786                }
3787            } else {
3788                break;
3789            }
3790        }
3791
3792        Some(LexedToken::owned_word(TokenKind::Word, word))
3793    }
3794
3795    /// Peek ahead (without consuming) to see if `=(` starts an associative
3796    /// compound assignment like `([key]=val ...)`.  Returns true when the
3797    /// first non-whitespace char after `(` is `[`.
3798    fn looks_like_assoc_assign(&self) -> bool {
3799        let mut chars = self.lookahead_chars();
3800        // Skip the `(` we haven't consumed yet
3801        if chars.next() != Some('(') {
3802            return false;
3803        }
3804        // Skip optional whitespace
3805        for ch in chars {
3806            match ch {
3807                ' ' | '\t' => continue,
3808                '[' => return true,
3809                _ => return false,
3810            }
3811        }
3812        false
3813    }
3814
3815    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3816        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3817    }
3818
3819    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3820        word.segments().any(|segment| {
3821            matches!(
3822                segment.kind(),
3823                LexedWordSegmentKind::SingleQuoted
3824                    | LexedWordSegmentKind::DollarSingleQuoted
3825                    | LexedWordSegmentKind::DoubleQuoted
3826                    | LexedWordSegmentKind::DollarDoubleQuoted
3827            )
3828        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3829    }
3830
3831    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3832        text.contains(['*', '?'])
3833            || text.ends_with('}') && text.contains("${")
3834            || text.ends_with(']')
3835                && text
3836                    .rfind('[')
3837                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3838    }
3839
3840    fn is_word_char(ch: char) -> bool {
3841        !matches!(
3842            ch,
3843            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3844        )
3845    }
3846
3847    const fn is_ascii_word_byte(byte: u8) -> bool {
3848        !matches!(
3849            byte,
3850            b' ' | b'\t'
3851                | b'\n'
3852                | b';'
3853                | b'|'
3854                | b'&'
3855                | b'>'
3856                | b'<'
3857                | b'('
3858                | b')'
3859                | b'{'
3860                | b'}'
3861                | b'\''
3862                | b'"'
3863        )
3864    }
3865
3866    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3867        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3868    }
3869
3870    fn is_plain_word_char(ch: char) -> bool {
3871        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3872    }
3873
3874    /// Read here document content until the delimiter line is found
3875    pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3876        let mut content = String::with_capacity(64);
3877        let mut current_line = String::with_capacity(64);
3878
3879        // Save rest of current line (after the delimiter token on the command line).
3880        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3881        // tokenize the pipe and subsequent command after the heredoc body.
3882        //
3883        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3884        // so we track quoting state and continue across newlines until quotes close.
3885        let mut rest_of_line = String::with_capacity(32);
3886        let rest_of_line_start = self.current_position();
3887        let mut in_double_quote = false;
3888        let mut in_single_quote = false;
3889        let mut in_comment = false;
3890        let mut saw_non_whitespace_tail = false;
3891        let mut consecutive_backslashes = 0usize;
3892        let mut previous_tail_char = None;
3893        while let Some(ch) = self.peek_char() {
3894            self.advance();
3895            if in_comment {
3896                if ch == '\n' {
3897                    break;
3898                }
3899                rest_of_line.push(ch);
3900                previous_tail_char = Some(ch);
3901                continue;
3902            }
3903            if ch == '#'
3904                && !in_single_quote
3905                && !in_double_quote
3906                && self.comments_enabled()
3907                && heredoc_tail_hash_starts_comment(previous_tail_char)
3908            {
3909                in_comment = true;
3910                rest_of_line.push(ch);
3911                previous_tail_char = Some(ch);
3912                consecutive_backslashes = 0;
3913                continue;
3914            }
3915            let backslash_continues_line = ch == '\\'
3916                && !in_single_quote
3917                && self.peek_char() == Some('\n')
3918                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3919                && consecutive_backslashes.is_multiple_of(2);
3920            if backslash_continues_line {
3921                rest_of_line.push(ch);
3922                rest_of_line.push('\n');
3923                self.advance();
3924                consecutive_backslashes = 0;
3925                continue;
3926            }
3927            if ch == '\n' && !in_double_quote && !in_single_quote {
3928                break;
3929            }
3930            if ch == '"' && !in_single_quote {
3931                in_double_quote = !in_double_quote;
3932            } else if ch == '\'' && !in_double_quote {
3933                in_single_quote = !in_single_quote;
3934            } else if ch == '\\' && in_double_quote {
3935                // Escaped char inside double quotes — skip the next char too
3936                rest_of_line.push(ch);
3937                if let Some(next) = self.peek_char() {
3938                    rest_of_line.push(next);
3939                    self.advance();
3940                }
3941                continue;
3942            }
3943            rest_of_line.push(ch);
3944            if !ch.is_whitespace() {
3945                saw_non_whitespace_tail = true;
3946            }
3947            if ch == '\\' && !in_single_quote {
3948                consecutive_backslashes += 1;
3949            } else {
3950                consecutive_backslashes = 0;
3951            }
3952            previous_tail_char = Some(ch);
3953        }
3954
3955        // If we just drained a heredoc replay buffer (for example when multiple
3956        // heredocs share one command line), resume tracking from the true cursor
3957        // position before we measure the body span.
3958        self.sync_offset_to_cursor();
3959        let content_start = self.current_position();
3960        let mut current_line_start = content_start;
3961        let content_end;
3962
3963        // Read lines until we find the delimiter
3964        loop {
3965            if self.reinject_buf.is_empty() {
3966                // When the body reading drains a reinject buffer (from a
3967                // previous heredoc on the same command line), the virtual
3968                // offset drifts away from the cursor. Snap it back before
3969                // any source-based work so spans and `post_heredoc_offset`
3970                // stay within bounds.
3971                self.sync_offset_to_cursor();
3972                let rest = self.cursor.rest();
3973                if rest.is_empty() {
3974                    content_end = self.current_position();
3975                    break;
3976                }
3977
3978                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3979                let line = &rest[..line_len];
3980                let has_newline = line_len < rest.len();
3981
3982                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3983                    content_end = current_line_start;
3984                    self.consume_source_bytes(line_len);
3985                    if has_newline {
3986                        self.consume_ascii_chars(1);
3987                    }
3988                    break;
3989                }
3990
3991                content.push_str(line);
3992                self.consume_source_bytes(line_len);
3993
3994                if has_newline {
3995                    self.consume_ascii_chars(1);
3996                    content.push('\n');
3997                    current_line_start = self.current_position();
3998                    continue;
3999                }
4000
4001                content_end = self.current_position();
4002                break;
4003            }
4004
4005            match self.peek_char() {
4006                Some('\n') => {
4007                    self.advance();
4008                    // Check if current line matches delimiter
4009                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4010                        content_end = current_line_start;
4011                        break;
4012                    }
4013                    content.push_str(&current_line);
4014                    content.push('\n');
4015                    current_line.clear();
4016                    current_line_start = self.current_position();
4017                }
4018                Some(ch) => {
4019                    current_line.push(ch);
4020                    self.advance();
4021                }
4022                None => {
4023                    // End of input - check last line
4024                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4025                        content_end = current_line_start;
4026                        break;
4027                    }
4028                    if !current_line.is_empty() {
4029                        content.push_str(&current_line);
4030                    }
4031                    content_end = self.current_position();
4032                    break;
4033                }
4034            }
4035        }
4036
4037        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4038        // redirects, command words, additional heredocs) stay visible to the
4039        // parser. Always replay a terminating newline so parsing stops before
4040        // tokens that originally lived on later source lines, like `}` or `do`.
4041        let post_heredoc_offset = self.offset;
4042        self.offset = rest_of_line_start.offset;
4043        for ch in rest_of_line.chars() {
4044            self.reinject_buf.push_back(ch);
4045        }
4046        self.reinject_buf.push_back('\n');
4047        self.reinject_resume_offset = Some(post_heredoc_offset);
4048
4049        HeredocRead {
4050            content,
4051            content_span: Span::from_positions(content_start, content_end),
4052        }
4053    }
4054
4055    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4056        let mut chars = self.cursor.rest().chars();
4057        if chars.next() != Some('\n') {
4058            return false;
4059        }
4060
4061        for ch in chars {
4062            if matches!(ch, ' ' | '\t') {
4063                continue;
4064            }
4065            if ch == '\n' {
4066                return false;
4067            }
4068            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4069                || (ch == '#' && self.comments_enabled());
4070        }
4071
4072        false
4073    }
4074}
4075
4076fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4077    let line = if strip_tabs {
4078        line.trim_start_matches('\t')
4079    } else {
4080        line
4081    };
4082
4083    if line == delimiter {
4084        return true;
4085    }
4086
4087    let Some(trailing) = line.strip_prefix(delimiter) else {
4088        return false;
4089    };
4090
4091    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4092}
4093
4094fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4095    previous_tail_char.is_none_or(|prev| {
4096        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4097    })
4098}
4099
4100fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4101    let ch = input.get(index..)?.chars().next()?;
4102    Some((ch, index + ch.len_utf8()))
4103}
4104
4105fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4106    let mut index = 0usize;
4107    let mut depth = 0usize;
4108    let mut in_single = false;
4109    let mut in_double = false;
4110    let mut in_backtick = false;
4111    let mut escaped = false;
4112
4113    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4114        let was_escaped = escaped;
4115        if ch == '\\' && !in_single {
4116            escaped = !escaped;
4117            index = next_index;
4118            continue;
4119        }
4120        escaped = false;
4121
4122        match ch {
4123            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4124            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4125            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4126            '(' if !in_single
4127                && !in_double
4128                && !in_backtick
4129                && !was_escaped
4130                && prefix[next_index..].starts_with('(') =>
4131            {
4132                depth += 1;
4133                index = next_index + '('.len_utf8();
4134                continue;
4135            }
4136            ')' if !in_single
4137                && !in_double
4138                && !in_backtick
4139                && !was_escaped
4140                && prefix[next_index..].starts_with(')') =>
4141            {
4142                depth = depth.saturating_sub(1);
4143                index = next_index + ')'.len_utf8();
4144                continue;
4145            }
4146            _ => {}
4147        }
4148
4149        index = next_index;
4150    }
4151
4152    depth > 0
4153}
4154
4155fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4156    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4157    let prefix = &input[line_start..index];
4158    line_has_unclosed_double_paren(prefix)
4159}
4160
4161fn hash_starts_comment(input: &str, index: usize) -> bool {
4162    if inside_unclosed_double_paren_on_line(input, index) {
4163        return false;
4164    }
4165
4166    let next = &input[index + '#'.len_utf8()..];
4167    input[..index]
4168        .chars()
4169        .next_back()
4170        .is_none_or(|prev| match prev {
4171            '(' => {
4172                let whitespace_index = next.find(char::is_whitespace);
4173                let close_index = next.find(')');
4174
4175                match (whitespace_index, close_index) {
4176                    (Some(whitespace), Some(close)) => whitespace < close,
4177                    (Some(_), None) | (None, None) => true,
4178                    (None, Some(_)) => false,
4179                }
4180            }
4181            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4182        })
4183}
4184
4185fn heredoc_delimiter_is_terminator(
4186    ch: char,
4187    in_single: bool,
4188    in_double: bool,
4189    escaped: bool,
4190) -> bool {
4191    !in_single
4192        && !in_double
4193        && !escaped
4194        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4195}
4196
4197fn scan_double_quoted_command_substitution_segment(
4198    input: &str,
4199    mut index: usize,
4200    subst_depth: usize,
4201) -> Option<usize> {
4202    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4203        match ch {
4204            '"' => return Some(next_index),
4205            '\\' => {
4206                index = next_index;
4207                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4208                    index = escaped_next;
4209                }
4210            }
4211            '$' if input[next_index..].starts_with('{') => {
4212                let consumed = scan_command_subst_parameter_expansion_len(
4213                    &input[next_index + '{'.len_utf8()..],
4214                    subst_depth,
4215                    0,
4216                )?;
4217                index = next_index + '{'.len_utf8() + consumed;
4218            }
4219            '$' if input[next_index..].starts_with('(')
4220                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4221            {
4222                let consumed = scan_command_substitution_body_len_inner(
4223                    &input[next_index + '('.len_utf8()..],
4224                    subst_depth + 1,
4225                )?;
4226                index = next_index + '('.len_utf8() + consumed;
4227            }
4228            _ => index = next_index,
4229        }
4230    }
4231
4232    None
4233}
4234
4235fn scan_command_subst_parameter_expansion_len(
4236    input: &str,
4237    subst_depth: usize,
4238    parameter_depth: usize,
4239) -> Option<usize> {
4240    if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4241        return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4242    }
4243
4244    let mut index = 0usize;
4245    let mut in_single = false;
4246    let mut in_double = false;
4247    let mut in_ansi_c_single = false;
4248    let mut in_backtick = false;
4249    let mut escaped = false;
4250    let mut ansi_c_quote_pending = false;
4251
4252    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4253        let was_escaped = escaped;
4254        if ch == '\\' && !in_single {
4255            escaped = !escaped;
4256            index = next_index;
4257            ansi_c_quote_pending = false;
4258            continue;
4259        }
4260        escaped = false;
4261
4262        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4263            if input[next_index..].starts_with('{')
4264                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4265                    &input[next_index + '{'.len_utf8()..],
4266                    subst_depth,
4267                    parameter_depth + 1,
4268                )
4269            {
4270                index = next_index + '{'.len_utf8() + consumed;
4271                ansi_c_quote_pending = false;
4272                continue;
4273            }
4274
4275            if input[next_index..].starts_with('(')
4276                && !input[next_index + '('.len_utf8()..].starts_with('(')
4277                && let Some(consumed) = scan_command_substitution_body_len_inner(
4278                    &input[next_index + '('.len_utf8()..],
4279                    subst_depth + 1,
4280                )
4281            {
4282                index = next_index + '('.len_utf8() + consumed;
4283                ansi_c_quote_pending = false;
4284                continue;
4285            }
4286        }
4287
4288        if !in_single
4289            && !in_ansi_c_single
4290            && !in_double
4291            && !in_backtick
4292            && !was_escaped
4293            && matches!(ch, '<' | '>')
4294            && input[next_index..].starts_with('(')
4295            && let Some(consumed) = scan_command_substitution_body_len_inner(
4296                &input[next_index + '('.len_utf8()..],
4297                subst_depth + 1,
4298            )
4299        {
4300            index = next_index + '('.len_utf8() + consumed;
4301            ansi_c_quote_pending = false;
4302            continue;
4303        }
4304
4305        match ch {
4306            '\'' if !in_double && !in_backtick && !was_escaped => {
4307                if in_ansi_c_single {
4308                    in_ansi_c_single = false;
4309                } else if !in_single && ansi_c_quote_pending {
4310                    in_ansi_c_single = true;
4311                } else {
4312                    in_single = !in_single;
4313                }
4314            }
4315            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4316                in_double = !in_double
4317            }
4318            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4319                in_backtick = !in_backtick
4320            }
4321            '}' if !in_single
4322                && !in_ansi_c_single
4323                && !in_double
4324                && !in_backtick
4325                && !was_escaped =>
4326            {
4327                return Some(next_index);
4328            }
4329            _ => {}
4330        }
4331
4332        ansi_c_quote_pending = ch == '$'
4333            && !in_single
4334            && !in_ansi_c_single
4335            && !in_double
4336            && !in_backtick
4337            && !was_escaped;
4338        index = next_index;
4339    }
4340
4341    None
4342}
4343
4344fn scan_command_subst_parameter_expansion_len_balanced(
4345    input: &str,
4346    subst_depth: usize,
4347) -> Option<usize> {
4348    let mut index = 0usize;
4349    let mut brace_depth = 1usize;
4350    let mut in_single = false;
4351    let mut in_double = false;
4352    let mut in_ansi_c_single = false;
4353    let mut in_backtick = false;
4354    let mut escaped = false;
4355    let mut ansi_c_quote_pending = false;
4356
4357    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4358        let was_escaped = escaped;
4359        if ch == '\\' && !in_single {
4360            escaped = !escaped;
4361            index = next_index;
4362            ansi_c_quote_pending = false;
4363            continue;
4364        }
4365        escaped = false;
4366
4367        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4368            if input[next_index..].starts_with('{') {
4369                brace_depth = brace_depth.saturating_add(1);
4370                index = next_index + '{'.len_utf8();
4371                ansi_c_quote_pending = false;
4372                continue;
4373            }
4374
4375            if input[next_index..].starts_with('(')
4376                && !input[next_index + '('.len_utf8()..].starts_with('(')
4377                && let Some(consumed) = scan_command_substitution_body_len_inner(
4378                    &input[next_index + '('.len_utf8()..],
4379                    subst_depth + 1,
4380                )
4381            {
4382                index = next_index + '('.len_utf8() + consumed;
4383                ansi_c_quote_pending = false;
4384                continue;
4385            }
4386        }
4387
4388        if !in_single
4389            && !in_ansi_c_single
4390            && !in_double
4391            && !in_backtick
4392            && !was_escaped
4393            && matches!(ch, '<' | '>')
4394            && input[next_index..].starts_with('(')
4395            && let Some(consumed) = scan_command_substitution_body_len_inner(
4396                &input[next_index + '('.len_utf8()..],
4397                subst_depth + 1,
4398            )
4399        {
4400            index = next_index + '('.len_utf8() + consumed;
4401            ansi_c_quote_pending = false;
4402            continue;
4403        }
4404
4405        match ch {
4406            '\'' if !in_double && !in_backtick && !was_escaped => {
4407                if in_ansi_c_single {
4408                    in_ansi_c_single = false;
4409                } else if !in_single && ansi_c_quote_pending {
4410                    in_ansi_c_single = true;
4411                } else {
4412                    in_single = !in_single;
4413                }
4414            }
4415            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4416                in_double = !in_double
4417            }
4418            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4419                in_backtick = !in_backtick
4420            }
4421            '}' if !in_single
4422                && !in_ansi_c_single
4423                && !in_double
4424                && !in_backtick
4425                && !was_escaped =>
4426            {
4427                brace_depth = brace_depth.saturating_sub(1);
4428                if brace_depth == 0 {
4429                    return Some(next_index);
4430                }
4431            }
4432            _ => {}
4433        }
4434
4435        ansi_c_quote_pending = ch == '$'
4436            && !in_single
4437            && !in_ansi_c_single
4438            && !in_double
4439            && !in_backtick
4440            && !was_escaped;
4441        index = next_index;
4442    }
4443
4444    None
4445}
4446
4447fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4448    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4449        if !matches!(ch, ' ' | '\t') {
4450            break;
4451        }
4452        index = next_index;
4453    }
4454
4455    let start = index;
4456    let mut cooked = String::new();
4457    let mut in_single = false;
4458    let mut in_double = false;
4459    let mut escaped = false;
4460
4461    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4462        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4463            break;
4464        }
4465
4466        index = next_index;
4467        if escaped {
4468            cooked.push(ch);
4469            escaped = false;
4470            continue;
4471        }
4472
4473        match ch {
4474            '\\' if !in_single => escaped = true,
4475            '\'' if !in_double => in_single = !in_single,
4476            '"' if !in_single => in_double = !in_double,
4477            _ => cooked.push(ch),
4478        }
4479    }
4480
4481    (index > start).then_some((index, cooked))
4482}
4483
4484fn skip_command_subst_pending_heredoc(
4485    input: &str,
4486    mut index: usize,
4487    delimiter: &str,
4488    strip_tabs: bool,
4489) -> usize {
4490    while index <= input.len() {
4491        let rest = &input[index..];
4492        let line_len = rest.find('\n').unwrap_or(rest.len());
4493        let line = &rest[..line_len];
4494        let has_newline = line_len < rest.len();
4495
4496        index += line_len;
4497        if has_newline {
4498            index += '\n'.len_utf8();
4499        }
4500
4501        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4502            return index;
4503        }
4504    }
4505
4506    index
4507}
4508
4509fn scan_command_subst_ansi_c_single_quoted_segment(
4510    input: &str,
4511    quote_index: usize,
4512) -> Option<usize> {
4513    let mut index = quote_index + '\''.len_utf8();
4514
4515    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4516        index = next_index;
4517        if ch == '\\' {
4518            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4519                index = escaped_next;
4520            }
4521            continue;
4522        }
4523
4524        if ch == '\'' {
4525            return Some(index);
4526        }
4527    }
4528
4529    None
4530}
4531
4532fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4533    let mut index = start;
4534
4535    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4536        index = next_index;
4537        if ch == '\\' {
4538            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4539                index = escaped_next;
4540            }
4541            continue;
4542        }
4543
4544        if ch == '`' {
4545            return Some(index);
4546        }
4547    }
4548
4549    None
4550}
4551
4552fn flush_scanned_command_subst_keyword(
4553    current_word: &mut String,
4554    pending_case_headers: &mut usize,
4555    case_clause_depths: &mut SmallVec<[usize; 4]>,
4556    depth: usize,
4557    word_started_at_command_start: &mut bool,
4558) {
4559    if current_word.is_empty() {
4560        *word_started_at_command_start = false;
4561        return;
4562    }
4563
4564    match current_word.as_str() {
4565        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4566        "in" if *pending_case_headers > 0 => {
4567            *pending_case_headers -= 1;
4568            case_clause_depths.push(depth);
4569        }
4570        "esac" if *word_started_at_command_start => {
4571            case_clause_depths.pop();
4572        }
4573        _ => {}
4574    }
4575
4576    current_word.clear();
4577    *word_started_at_command_start = false;
4578}
4579
4580pub(super) fn scan_command_substitution_body_len_inner(
4581    input: &str,
4582    subst_depth: usize,
4583) -> Option<usize> {
4584    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4585        return None;
4586    }
4587
4588    let mut index = 0usize;
4589    let mut depth = 1;
4590    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4591    let mut pending_case_headers = 0usize;
4592    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4593    let mut current_word = String::with_capacity(16);
4594    let mut at_command_start = true;
4595    let mut expecting_redirection_target = false;
4596    let mut current_word_started_at_command_start = false;
4597
4598    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4599        match ch {
4600            '#' if hash_starts_comment(input, index) => {
4601                let had_word = !current_word.is_empty();
4602                flush_scanned_command_subst_keyword(
4603                    &mut current_word,
4604                    &mut pending_case_headers,
4605                    &mut case_clause_depths,
4606                    depth,
4607                    &mut current_word_started_at_command_start,
4608                );
4609                if had_word && expecting_redirection_target {
4610                    expecting_redirection_target = false;
4611                }
4612                index = next_index;
4613                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4614                    index = comment_next;
4615                    if comment_ch == '\n' {
4616                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4617                            index = skip_command_subst_pending_heredoc(
4618                                input, index, &delimiter, strip_tabs,
4619                            );
4620                        }
4621                        at_command_start = true;
4622                        expecting_redirection_target = false;
4623                        break;
4624                    }
4625                }
4626            }
4627            '(' => {
4628                flush_scanned_command_subst_keyword(
4629                    &mut current_word,
4630                    &mut pending_case_headers,
4631                    &mut case_clause_depths,
4632                    depth,
4633                    &mut current_word_started_at_command_start,
4634                );
4635                depth += 1;
4636                index = next_index;
4637                at_command_start = true;
4638                expecting_redirection_target = false;
4639            }
4640            ')' => {
4641                flush_scanned_command_subst_keyword(
4642                    &mut current_word,
4643                    &mut pending_case_headers,
4644                    &mut case_clause_depths,
4645                    depth,
4646                    &mut current_word_started_at_command_start,
4647                );
4648                if case_clause_depths
4649                    .last()
4650                    .is_some_and(|case_depth| *case_depth == depth)
4651                {
4652                    index = next_index;
4653                    at_command_start = true;
4654                    expecting_redirection_target = false;
4655                    continue;
4656                }
4657                depth -= 1;
4658                index = next_index;
4659                if depth == 0 {
4660                    return Some(index);
4661                }
4662                at_command_start = false;
4663                expecting_redirection_target = false;
4664            }
4665            '"' => {
4666                let had_word = !current_word.is_empty();
4667                flush_scanned_command_subst_keyword(
4668                    &mut current_word,
4669                    &mut pending_case_headers,
4670                    &mut case_clause_depths,
4671                    depth,
4672                    &mut current_word_started_at_command_start,
4673                );
4674                if had_word && expecting_redirection_target {
4675                    expecting_redirection_target = false;
4676                }
4677                index = scan_double_quoted_command_substitution_segment(
4678                    input,
4679                    next_index,
4680                    subst_depth,
4681                )?;
4682                if expecting_redirection_target {
4683                    expecting_redirection_target = false;
4684                } else {
4685                    at_command_start = false;
4686                }
4687            }
4688            '\'' => {
4689                let had_word = !current_word.is_empty();
4690                flush_scanned_command_subst_keyword(
4691                    &mut current_word,
4692                    &mut pending_case_headers,
4693                    &mut case_clause_depths,
4694                    depth,
4695                    &mut current_word_started_at_command_start,
4696                );
4697                if had_word && expecting_redirection_target {
4698                    expecting_redirection_target = false;
4699                }
4700                index = next_index;
4701                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4702                    index = quoted_next;
4703                    if quoted_ch == '\'' {
4704                        break;
4705                    }
4706                }
4707                if expecting_redirection_target {
4708                    expecting_redirection_target = false;
4709                } else {
4710                    at_command_start = false;
4711                }
4712            }
4713            '`' => {
4714                let had_word = !current_word.is_empty();
4715                flush_scanned_command_subst_keyword(
4716                    &mut current_word,
4717                    &mut pending_case_headers,
4718                    &mut case_clause_depths,
4719                    depth,
4720                    &mut current_word_started_at_command_start,
4721                );
4722                if had_word && expecting_redirection_target {
4723                    expecting_redirection_target = false;
4724                }
4725                index = scan_command_subst_backtick_segment(input, next_index)?;
4726                if expecting_redirection_target {
4727                    expecting_redirection_target = false;
4728                } else {
4729                    at_command_start = false;
4730                }
4731            }
4732            '$' if input[next_index..].starts_with('\'') => {
4733                let had_word = !current_word.is_empty();
4734                flush_scanned_command_subst_keyword(
4735                    &mut current_word,
4736                    &mut pending_case_headers,
4737                    &mut case_clause_depths,
4738                    depth,
4739                    &mut current_word_started_at_command_start,
4740                );
4741                if had_word && expecting_redirection_target {
4742                    expecting_redirection_target = false;
4743                }
4744                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4745                if expecting_redirection_target {
4746                    expecting_redirection_target = false;
4747                } else {
4748                    at_command_start = false;
4749                }
4750            }
4751            '\\' => {
4752                let had_word = !current_word.is_empty();
4753                flush_scanned_command_subst_keyword(
4754                    &mut current_word,
4755                    &mut pending_case_headers,
4756                    &mut case_clause_depths,
4757                    depth,
4758                    &mut current_word_started_at_command_start,
4759                );
4760                if had_word && expecting_redirection_target {
4761                    expecting_redirection_target = false;
4762                }
4763                index = next_index;
4764                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4765                    index = escaped_next;
4766                }
4767                if expecting_redirection_target {
4768                    expecting_redirection_target = false;
4769                } else {
4770                    at_command_start = false;
4771                }
4772            }
4773            '>' => {
4774                let word_was_redirection_fd = current_word_started_at_command_start
4775                    && !current_word.is_empty()
4776                    && current_word.chars().all(|current| current.is_ascii_digit());
4777                flush_scanned_command_subst_keyword(
4778                    &mut current_word,
4779                    &mut pending_case_headers,
4780                    &mut case_clause_depths,
4781                    depth,
4782                    &mut current_word_started_at_command_start,
4783                );
4784                if word_was_redirection_fd {
4785                    at_command_start = true;
4786                }
4787                index = next_index;
4788                expecting_redirection_target = true;
4789            }
4790            '<' if input[next_index..].starts_with('<') => {
4791                let word_was_redirection_fd = current_word_started_at_command_start
4792                    && !current_word.is_empty()
4793                    && current_word.chars().all(|current| current.is_ascii_digit());
4794                let had_word = !current_word.is_empty();
4795                flush_scanned_command_subst_keyword(
4796                    &mut current_word,
4797                    &mut pending_case_headers,
4798                    &mut case_clause_depths,
4799                    depth,
4800                    &mut current_word_started_at_command_start,
4801                );
4802                if had_word && expecting_redirection_target {
4803                    expecting_redirection_target = false;
4804                }
4805                if word_was_redirection_fd {
4806                    at_command_start = true;
4807                }
4808                if inside_unclosed_double_paren_on_line(input, index) {
4809                    index = next_index + '<'.len_utf8();
4810                    continue;
4811                }
4812
4813                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4814                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4815                    expecting_redirection_target = true;
4816                    continue;
4817                }
4818
4819                let strip_tabs = input[next_index..].starts_with("<-");
4820                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4821                if let Some((delimiter_index, delimiter)) =
4822                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4823                {
4824                    pending_heredocs.push((delimiter, strip_tabs));
4825                    index = delimiter_index;
4826                    expecting_redirection_target = false;
4827                } else {
4828                    index = next_index;
4829                    expecting_redirection_target = true;
4830                }
4831            }
4832            '\n' => {
4833                flush_scanned_command_subst_keyword(
4834                    &mut current_word,
4835                    &mut pending_case_headers,
4836                    &mut case_clause_depths,
4837                    depth,
4838                    &mut current_word_started_at_command_start,
4839                );
4840                index = next_index;
4841                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4842                    index =
4843                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4844                }
4845                at_command_start = true;
4846                expecting_redirection_target = false;
4847            }
4848            '$' if input[next_index..].starts_with('{') => {
4849                let had_word = !current_word.is_empty();
4850                flush_scanned_command_subst_keyword(
4851                    &mut current_word,
4852                    &mut pending_case_headers,
4853                    &mut case_clause_depths,
4854                    depth,
4855                    &mut current_word_started_at_command_start,
4856                );
4857                if had_word && expecting_redirection_target {
4858                    expecting_redirection_target = false;
4859                }
4860                let consumed = scan_command_subst_parameter_expansion_len(
4861                    &input[next_index + '{'.len_utf8()..],
4862                    subst_depth,
4863                    0,
4864                )?;
4865                index = next_index + '{'.len_utf8() + consumed;
4866                if expecting_redirection_target {
4867                    expecting_redirection_target = false;
4868                } else {
4869                    at_command_start = false;
4870                }
4871            }
4872            '$' if input[next_index..].starts_with('(')
4873                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4874            {
4875                let had_word = !current_word.is_empty();
4876                flush_scanned_command_subst_keyword(
4877                    &mut current_word,
4878                    &mut pending_case_headers,
4879                    &mut case_clause_depths,
4880                    depth,
4881                    &mut current_word_started_at_command_start,
4882                );
4883                if had_word && expecting_redirection_target {
4884                    expecting_redirection_target = false;
4885                }
4886                let consumed = scan_command_substitution_body_len_inner(
4887                    &input[next_index + '('.len_utf8()..],
4888                    subst_depth + 1,
4889                )?;
4890                index = next_index + '('.len_utf8() + consumed;
4891                if expecting_redirection_target {
4892                    expecting_redirection_target = false;
4893                } else {
4894                    at_command_start = false;
4895                }
4896            }
4897            _ => {
4898                if ch.is_ascii_alphanumeric() || ch == '_' {
4899                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4900                    {
4901                        current_word_started_at_command_start = true;
4902                        at_command_start = false;
4903                    }
4904                    current_word.push(ch);
4905                } else {
4906                    let had_word = !current_word.is_empty();
4907                    flush_scanned_command_subst_keyword(
4908                        &mut current_word,
4909                        &mut pending_case_headers,
4910                        &mut case_clause_depths,
4911                        depth,
4912                        &mut current_word_started_at_command_start,
4913                    );
4914                    if had_word && expecting_redirection_target {
4915                        expecting_redirection_target = false;
4916                    }
4917                    match ch {
4918                        ' ' | '\t' => {}
4919                        ';' | '|' | '&' => {
4920                            at_command_start = true;
4921                            expecting_redirection_target = false;
4922                        }
4923                        _ => {
4924                            if !expecting_redirection_target {
4925                                at_command_start = false;
4926                            }
4927                        }
4928                    }
4929                }
4930                index = next_index;
4931            }
4932        }
4933    }
4934
4935    None
4936}
4937
4938pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4939    scan_command_substitution_body_len_inner(input, 0)
4940}
4941
4942#[cfg(test)]
4943mod tests {
4944    use super::*;
4945
4946    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4947        match token.kind {
4948            kind if kind.is_word_like() => token.word_string(),
4949            TokenKind::Comment => token
4950                .span
4951                .slice(source)
4952                .strip_prefix('#')
4953                .map(str::to_string),
4954            TokenKind::Error => token
4955                .error_kind()
4956                .map(LexerErrorKind::message)
4957                .map(str::to_string),
4958            _ => None,
4959        }
4960    }
4961
4962    fn assert_next_token(
4963        lexer: &mut Lexer<'_>,
4964        expected_kind: TokenKind,
4965        expected_text: Option<&str>,
4966    ) {
4967        let token = lexer.next_lexed_token().unwrap();
4968        assert_eq!(token.kind, expected_kind);
4969        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4970    }
4971
4972    fn assert_next_token_with_comments(
4973        lexer: &mut Lexer<'_>,
4974        expected_kind: TokenKind,
4975        expected_text: Option<&str>,
4976    ) {
4977        let token = lexer.next_lexed_token_with_comments().unwrap();
4978        assert_eq!(token.kind, expected_kind);
4979        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4980    }
4981
4982    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4983        let mut lexer = Lexer::new(input);
4984
4985        while let Some(token) = lexer.next_lexed_token() {
4986            if token.kind == TokenKind::Newline {
4987                continue;
4988            }
4989
4990            assert_eq!(
4991                token.span.start.line, token.span.end.line,
4992                "token should stay on one line: {:?}",
4993                token
4994            );
4995        }
4996    }
4997
4998    #[test]
4999    fn test_simple_words() {
5000        let mut lexer = Lexer::new("echo hello world");
5001
5002        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5003        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5004        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5005        assert!(lexer.next_lexed_token().is_none());
5006    }
5007
5008    #[test]
5009    fn test_single_quoted_string() {
5010        let mut lexer = Lexer::new("echo 'hello world'");
5011
5012        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5013        // Single-quoted strings return LiteralWord (no variable expansion)
5014        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5015        assert!(lexer.next_lexed_token().is_none());
5016    }
5017
5018    #[test]
5019    fn test_double_quoted_string() {
5020        let mut lexer = Lexer::new("echo \"hello world\"");
5021
5022        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5023        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5024        assert!(lexer.next_lexed_token().is_none());
5025    }
5026
5027    #[test]
5028    fn test_brace_expansion_token_ignores_quoted_closers() {
5029        let mut lexer = Lexer::new("echo {\"}\",a}\n");
5030
5031        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5032        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5033        assert_next_token(&mut lexer, TokenKind::Newline, None);
5034        assert!(lexer.next_lexed_token().is_none());
5035    }
5036
5037    #[test]
5038    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5039        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5040
5041        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5042        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5043        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5044        assert_next_token(&mut lexer, TokenKind::Newline, None);
5045        assert!(lexer.next_lexed_token().is_none());
5046    }
5047
5048    #[test]
5049    fn test_double_quoted_expansion_token_keeps_source_backing() {
5050        let source = r#""$bar""#;
5051        let mut lexer = Lexer::new(source);
5052
5053        let token = lexer.next_lexed_token().unwrap();
5054        assert_eq!(token.kind, TokenKind::QuotedWord);
5055        assert_eq!(token.word_text(), Some("$bar"));
5056
5057        let word = token.word().unwrap();
5058        let segment = word.single_segment().unwrap();
5059        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5060        assert_eq!(segment.span().unwrap().slice(source), "$bar");
5061    }
5062
5063    #[test]
5064    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5065        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5066        let mut lexer = Lexer::new(source);
5067
5068        let token = lexer.next_lexed_token().unwrap();
5069        assert_eq!(token.kind, TokenKind::QuotedWord);
5070        assert_eq!(
5071            token.word_text(),
5072            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5073        );
5074    }
5075
5076    #[test]
5077    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5078        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5079        let mut lexer = Lexer::new(source);
5080
5081        let token = lexer.next_lexed_token().unwrap();
5082        assert_eq!(token.kind, TokenKind::QuotedWord);
5083        assert_eq!(
5084            token.word_text(),
5085            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5086        );
5087    }
5088
5089    #[test]
5090    fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5091        let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5092        let mut lexer = Lexer::new(source);
5093
5094        let token = lexer.next_lexed_token().unwrap();
5095        assert_eq!(token.kind, TokenKind::QuotedWord);
5096        assert_eq!(
5097            token.word_text(),
5098            Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5099        );
5100    }
5101
5102    #[test]
5103    fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5104        let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5105        let mut lexer = Lexer::new(source);
5106
5107        let token = lexer.next_lexed_token().unwrap();
5108        assert_eq!(token.kind, TokenKind::QuotedWord);
5109        assert_eq!(
5110            token.word_text(),
5111            Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5112        );
5113    }
5114
5115    #[test]
5116    fn test_mixed_word_keeps_segment_kinds() {
5117        let source = r#"foo"bar"'baz'"#;
5118        let mut lexer = Lexer::new(source);
5119
5120        let token = lexer.next_lexed_token().unwrap();
5121        assert_eq!(token.kind, TokenKind::Word);
5122
5123        let word = token.word().unwrap();
5124        let segments: Vec<_> = word
5125            .segments()
5126            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5127            .collect();
5128
5129        assert_eq!(
5130            segments,
5131            vec![
5132                (LexedWordSegmentKind::Plain, "foo".to_string()),
5133                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5134                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5135            ]
5136        );
5137        assert_eq!(word.joined_text(), "foobarbaz");
5138        assert_eq!(
5139            word.segments()
5140                .next()
5141                .and_then(LexedWordSegment::span)
5142                .unwrap()
5143                .slice(source),
5144            "foo"
5145        );
5146    }
5147
5148    #[test]
5149    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5150        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5151
5152        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5153        let body = &source[..consumed];
5154
5155        assert!(body.contains("field, direction"));
5156        assert!(body.ends_with(')'));
5157    }
5158
5159    #[test]
5160    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5161        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5162
5163        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5164        let body = &source[..consumed];
5165
5166        assert!(body.contains("printf '%s' y"));
5167        assert!(body.ends_with(')'));
5168    }
5169
5170    #[test]
5171    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5172        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5173
5174        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5175        let body = &source[..consumed];
5176
5177        assert!(body.contains("printf %s 1,2"));
5178        assert!(body.ends_with(')'));
5179    }
5180
5181    #[test]
5182    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5183        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5184
5185        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5186        let body = &source[..consumed];
5187
5188        assert!(body.contains("field, direction"));
5189        assert!(body.ends_with(')'));
5190    }
5191
5192    #[test]
5193    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5194        let source = "printf %s ${x//foo/)},1)\"";
5195
5196        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5197        let body = &source[..consumed];
5198
5199        assert!(body.contains("${x//foo/)},1"));
5200        assert!(body.ends_with(')'));
5201    }
5202
5203    #[test]
5204    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5205        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5206
5207        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5208        let body = &source[..consumed];
5209
5210        assert!(body.contains("printf %s 1,2"));
5211        assert!(body.ends_with(')'));
5212    }
5213
5214    #[test]
5215    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5216        let source = "[[ \"$buf\" == (#b)(*) ]]";
5217        let index = source.find('#').expect("expected hash");
5218
5219        assert!(!hash_starts_comment(source, index));
5220    }
5221
5222    #[test]
5223    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5224        let source = "(#comment with )";
5225        let index = source.find('#').expect("expected hash");
5226
5227        assert!(hash_starts_comment(source, index));
5228    }
5229
5230    #[test]
5231    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5232        let source = "(( #c < 256 ))";
5233        let index = source.find('#').expect("expected hash");
5234
5235        assert!(!hash_starts_comment(source, index));
5236    }
5237
5238    #[test]
5239    fn test_hash_starts_comment_respects_quoted_double_parens() {
5240        let source = "printf '((' # comment";
5241        let index = source.find('#').expect("expected hash");
5242
5243        assert!(hash_starts_comment(source, index));
5244    }
5245
5246    #[test]
5247    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5248        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5249
5250        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5251        let body = &source[..consumed];
5252
5253        assert!(body.contains("printf %s 1,2"));
5254        assert!(body.ends_with(')'));
5255    }
5256
5257    #[test]
5258    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5259        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5260
5261        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5262        let body = &source[..consumed];
5263
5264        assert!(body.contains("printf %s 1,2"));
5265        assert!(body.ends_with(')'));
5266    }
5267
5268    #[test]
5269    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5270        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5271
5272        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5273        let body = &source[..consumed];
5274
5275        assert!(body.contains("printf %s 1,2"));
5276        assert!(body.ends_with(')'));
5277    }
5278
5279    #[test]
5280    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5281        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5282
5283        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5284        let body = &source[..consumed];
5285
5286        assert!(body.contains("printf %s 1,2"));
5287        assert!(body.ends_with("))"));
5288    }
5289
5290    #[test]
5291    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5292        let source = "printf %s 1,2; echo case in)\"";
5293
5294        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5295        let body = &source[..consumed];
5296
5297        assert!(body.contains("echo case in"));
5298        assert!(body.ends_with(')'));
5299    }
5300
5301    #[test]
5302    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5303        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5304
5305        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5306        let body = &source[..consumed];
5307
5308        assert!(body.contains("$'a\\'b'"));
5309        assert!(body.contains("printf %s 1,2"));
5310        assert!(body.ends_with(')'));
5311    }
5312
5313    #[test]
5314    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5315        let source = "printf %s `echo foo)`; printf %s ok)\"";
5316
5317        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5318        let body = &source[..consumed];
5319
5320        assert!(body.contains("`echo foo)`"));
5321        assert!(body.contains("printf %s ok"));
5322        assert!(body.ends_with(')'));
5323    }
5324
5325    #[test]
5326    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5327        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5328
5329        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5330        let body = &source[..consumed];
5331
5332        assert!(body.contains("${x/`echo }`/foo)},1"));
5333        assert!(body.ends_with(')'));
5334    }
5335
5336    #[test]
5337    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5338     {
5339        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5340
5341        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5342        let body = &source[..consumed];
5343
5344        assert!(body.contains("${x/<(echo })/foo)},1"));
5345        assert!(body.ends_with(')'));
5346    }
5347
5348    #[test]
5349    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5350        let source = "printf %s 1,2; echo case in)";
5351
5352        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5353        let body = &source[..consumed];
5354
5355        assert_eq!(body, source);
5356    }
5357
5358    #[test]
5359    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5360        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5361
5362        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5363        let body = &source[..consumed];
5364
5365        assert_eq!(body, source);
5366    }
5367
5368    #[test]
5369    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5370        let source = "printf %s `echo foo)`; printf %s ok)";
5371
5372        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5373        let body = &source[..consumed];
5374
5375        assert_eq!(body, source);
5376    }
5377
5378    #[test]
5379    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5380        let source = "echo \"$line\" | cut -d' ' -f2-)";
5381
5382        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5383        let body = &source[..consumed];
5384
5385        assert_eq!(body, source);
5386    }
5387
5388    #[test]
5389    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5390        let source = "echo \"${@}\" | tr -d '[:space:]')";
5391
5392        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5393        let body = &source[..consumed];
5394
5395        assert_eq!(body, source);
5396    }
5397
5398    #[test]
5399    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5400        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5401
5402        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5403        let body = &source[..consumed];
5404
5405        assert_eq!(body, source);
5406    }
5407
5408    #[test]
5409    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5410        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5411
5412        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5413        let body = &source[..consumed];
5414
5415        assert_eq!(body, source);
5416    }
5417
5418    #[test]
5419    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5420        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5421        let mut lexer = Lexer::new(source);
5422
5423        let first = lexer.next_lexed_token().expect("expected first token");
5424        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5425        assert_eq!(first.word_string().as_deref(), Some("echo"));
5426
5427        let second = lexer.next_lexed_token().expect("expected second token");
5428        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5429        assert_eq!(
5430            second.word_string().as_deref(),
5431            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5432        );
5433    }
5434
5435    #[test]
5436    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5437        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5438        let start = source.find("$(").expect("expected command substitution") + 2;
5439        let consumed =
5440            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5441        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5442    }
5443
5444    #[test]
5445    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5446        let source = "echo $(echo $(basename $filename .fuzz))";
5447        let start = source.find("$(").expect("expected command substitution") + 2;
5448        let consumed =
5449            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5450        assert_eq!(
5451            &source[start..start + consumed],
5452            "echo $(basename $filename .fuzz))"
5453        );
5454    }
5455
5456    #[test]
5457    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5458        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5459        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5460        assert_eq!(consumed, source.len());
5461    }
5462
5463    #[test]
5464    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5465        let source = "'foo'bar";
5466        let mut lexer = Lexer::new(source);
5467
5468        let token = lexer.next_lexed_token().unwrap();
5469        assert_eq!(token.kind, TokenKind::LiteralWord);
5470
5471        let word = token.word().unwrap();
5472        let segments: Vec<_> = word
5473            .segments()
5474            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5475            .collect();
5476
5477        assert_eq!(
5478            segments,
5479            vec![
5480                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5481                (LexedWordSegmentKind::Plain, "bar".to_string()),
5482            ]
5483        );
5484        assert_eq!(word.joined_text(), "foobar");
5485        assert_eq!(
5486            word.segments()
5487                .nth(1)
5488                .and_then(LexedWordSegment::span)
5489                .unwrap()
5490                .slice(source),
5491            "bar"
5492        );
5493    }
5494
5495    #[test]
5496    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5497        let source = "$(printf hi)";
5498        let mut lexer = Lexer::new(source);
5499
5500        let token = lexer.next_lexed_token().unwrap();
5501        assert_eq!(token.kind, TokenKind::Word);
5502
5503        let word = token.word().unwrap();
5504        let segment = word.single_segment().unwrap();
5505        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5506        assert_eq!(segment.as_str(), source);
5507        assert_eq!(segment.span().unwrap().slice(source), source);
5508    }
5509
5510    #[test]
5511    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5512        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5513        let mut lexer = Lexer::new(source);
5514
5515        let token = lexer.next_lexed_token().unwrap();
5516        assert_eq!(token.kind, TokenKind::Word);
5517
5518        let word = token.word().unwrap();
5519        let segment = word.single_segment().unwrap();
5520        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5521        assert_eq!(segment.as_str(), source);
5522        assert_eq!(segment.span().unwrap().slice(source), source);
5523    }
5524
5525    #[test]
5526    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5527        let source = "\"foo\"$(printf hi)";
5528        let mut lexer = Lexer::new(source);
5529
5530        let token = lexer.next_lexed_token().unwrap();
5531        assert_eq!(token.kind, TokenKind::Word);
5532
5533        let word = token.word().unwrap();
5534        let continuation = word.segments().nth(1).unwrap();
5535        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5536        assert_eq!(continuation.as_str(), "$(printf hi)");
5537        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5538    }
5539
5540    #[test]
5541    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5542        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5543        let mut lexer = Lexer::new(source);
5544
5545        let token = lexer.next_lexed_token().unwrap();
5546        assert_eq!(token.kind, TokenKind::QuotedWord);
5547
5548        let word = token.word().unwrap();
5549        let segment = word.single_segment().unwrap();
5550        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5551        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5552        assert_eq!(
5553            segment.span().unwrap().slice(source),
5554            "${arr[$RANDOM % ${#arr[@]}]}"
5555        );
5556    }
5557
5558    #[test]
5559    fn test_ansi_c_control_escape_can_consume_quote() {
5560        let mut lexer = Lexer::new("echo $'\\c''");
5561
5562        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5563        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5564        assert!(lexer.next_lexed_token().is_none());
5565    }
5566
5567    #[test]
5568    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5569        let source = r#"out_line="${out_line//'"'/'\"'}"
5570"#;
5571        let mut lexer = Lexer::new(source);
5572
5573        assert_next_token(
5574            &mut lexer,
5575            TokenKind::Word,
5576            Some(r#"out_line=${out_line//'"'/'"'}"#),
5577        );
5578        assert_next_token(&mut lexer, TokenKind::Newline, None);
5579        assert!(lexer.next_lexed_token().is_none());
5580    }
5581
5582    #[test]
5583    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5584        let source = r#"out_line="${out_line//'"'/'\"'}"
5585echo "Error: Missing python3!"
5586cat << 'EOF' > "${pywrapper}"
5587import os
5588EOF
5589"#;
5590        let mut lexer = Lexer::new(source);
5591
5592        assert_next_token(
5593            &mut lexer,
5594            TokenKind::Word,
5595            Some(r#"out_line=${out_line//'"'/'"'}"#),
5596        );
5597        assert_next_token(&mut lexer, TokenKind::Newline, None);
5598        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5599        assert_next_token(
5600            &mut lexer,
5601            TokenKind::QuotedWord,
5602            Some("Error: Missing python3!"),
5603        );
5604        assert_next_token(&mut lexer, TokenKind::Newline, None);
5605        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5606        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5607        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5608        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5609        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5610    }
5611
5612    #[test]
5613    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5614        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5615        let mut lexer = Lexer::new(source);
5616
5617        let token = lexer.next_lexed_token().unwrap();
5618        assert_eq!(token.kind, TokenKind::Word);
5619        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5620        assert!(token.source_slice(source).is_none());
5621        assert_eq!(
5622            token.word_string().as_deref(),
5623            Some("crypt=${crypt//\\/\\\\}")
5624        );
5625        assert_next_token(&mut lexer, TokenKind::Newline, None);
5626        assert!(lexer.next_lexed_token().is_none());
5627    }
5628
5629    #[test]
5630    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5631        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5632        let mut lexer = Lexer::new(source);
5633
5634        assert_next_token(
5635            &mut lexer,
5636            TokenKind::Word,
5637            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5638        );
5639        assert_next_token(&mut lexer, TokenKind::Newline, None);
5640        assert_next_token(&mut lexer, TokenKind::Newline, None);
5641        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5642        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5643        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5644        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5645        assert_next_token(&mut lexer, TokenKind::Newline, None);
5646        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5647        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5648        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5649        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5650        assert_next_token(&mut lexer, TokenKind::Newline, None);
5651        assert_next_token(
5652            &mut lexer,
5653            TokenKind::Word,
5654            Some(
5655                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5656            ),
5657        );
5658        assert_next_token(&mut lexer, TokenKind::Newline, None);
5659        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5660        assert_next_token(&mut lexer, TokenKind::Newline, None);
5661        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5662        assert_next_token(&mut lexer, TokenKind::Newline, None);
5663        assert!(lexer.next_lexed_token().is_none());
5664    }
5665
5666    #[test]
5667    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5668        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5669        let mut lexer = Lexer::new(source);
5670
5671        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5672        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5673        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5674        assert_next_token(&mut lexer, TokenKind::Newline, None);
5675        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5676        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5677        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5678        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5679        assert_next_token(&mut lexer, TokenKind::Newline, None);
5680        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5681        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5682        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5683        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5684        assert_next_token(&mut lexer, TokenKind::Newline, None);
5685        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5686        assert_next_token(&mut lexer, TokenKind::Newline, None);
5687        assert!(lexer.next_lexed_token().is_none());
5688    }
5689
5690    #[test]
5691    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5692        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5693        let mut lexer = Lexer::new(source);
5694
5695        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5696        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5697        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5698        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5699        assert_next_token(&mut lexer, TokenKind::And, None);
5700        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5701        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5702        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5703        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5704        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5705        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5706        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5707        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5708        assert_next_token(&mut lexer, TokenKind::Newline, None);
5709        assert!(lexer.next_lexed_token().is_none());
5710    }
5711
5712    #[test]
5713    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5714        let source = "echo -{$(echo a),b}-\n";
5715        let mut lexer = Lexer::new(source);
5716
5717        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5718        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5719        assert_next_token(&mut lexer, TokenKind::Newline, None);
5720        assert!(lexer.next_lexed_token().is_none());
5721    }
5722
5723    #[test]
5724    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5725        let source = "echo -{$((1 + 2)),b}-\n";
5726        let mut lexer = Lexer::new(source);
5727
5728        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5729        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5730        assert_next_token(&mut lexer, TokenKind::Newline, None);
5731        assert!(lexer.next_lexed_token().is_none());
5732    }
5733
5734    #[test]
5735    fn test_operators() {
5736        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5737
5738        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5739        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5740        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5741        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5742        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5743        assert_next_token(&mut lexer, TokenKind::And, None);
5744        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5745        assert_next_token(&mut lexer, TokenKind::Or, None);
5746        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5747        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5748        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5749        assert_next_token(&mut lexer, TokenKind::Background, None);
5750        assert!(lexer.next_lexed_token().is_none());
5751    }
5752
5753    #[test]
5754    fn test_double_left_bracket_requires_separator() {
5755        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5756
5757        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5758        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5759        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5760        assert_next_token(&mut lexer, TokenKind::Newline, None);
5761        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5762        assert_next_token(&mut lexer, TokenKind::Newline, None);
5763        assert!(lexer.next_lexed_token().is_none());
5764    }
5765
5766    #[test]
5767    fn test_redirects() {
5768        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5769
5770        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5771        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5772        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5773        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5774        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5775        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5776        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5777        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5778        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5779        let token = lexer.next_lexed_token().unwrap();
5780        assert_eq!(token.kind, TokenKind::Clobber);
5781        assert_eq!(token.fd_value(), Some(2));
5782        assert_eq!(token_text(&token, lexer.input), None);
5783        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5784        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5785        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5786        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5787        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5788        assert_next_token(&mut lexer, TokenKind::HereString, None);
5789        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5790        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5791        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5792        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5793        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5794    }
5795
5796    #[test]
5797    fn test_comment() {
5798        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5799
5800        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5801        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5802        assert_next_token(&mut lexer, TokenKind::Newline, None);
5803        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5804        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5805    }
5806
5807    #[test]
5808    fn test_comment_token_with_span() {
5809        let mut lexer = Lexer::new("# lead\necho hi # tail");
5810
5811        let comment = lexer.next_lexed_token_with_comments().unwrap();
5812        assert_eq!(comment.kind, TokenKind::Comment);
5813        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5814        assert_eq!(comment.span.start.line, 1);
5815        assert_eq!(comment.span.start.column, 1);
5816        assert_eq!(comment.span.end.line, 1);
5817        assert_eq!(comment.span.end.column, 7);
5818
5819        assert_next_token(&mut lexer, TokenKind::Newline, None);
5820        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5821        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5822
5823        let inline = lexer.next_lexed_token_with_comments().unwrap();
5824        assert_eq!(inline.kind, TokenKind::Comment);
5825        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5826        assert_eq!(inline.span.start.line, 2);
5827        assert_eq!(inline.span.start.column, 9);
5828    }
5829
5830    #[test]
5831    fn test_comment_token_preserves_hash_boundaries() {
5832        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5833
5834        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5835        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5836        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5837        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5838        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5839        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5840        assert!(lexer.next_lexed_token_with_comments().is_none());
5841    }
5842
5843    #[test]
5844    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5845        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5846
5847        let mut saw_comment = false;
5848        while let Some(token) = lexer.next_lexed_token_with_comments() {
5849            if token.kind == TokenKind::Comment {
5850                saw_comment = true;
5851                break;
5852            }
5853        }
5854
5855        assert!(
5856            !saw_comment,
5857            "zsh inline glob controls inside [[ ]] should not lex as comments"
5858        );
5859    }
5860
5861    #[test]
5862    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5863        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5864
5865        let mut saw_comment = false;
5866        while let Some(token) = lexer.next_lexed_token_with_comments() {
5867            if token.kind == TokenKind::Comment {
5868                saw_comment = true;
5869                break;
5870            }
5871        }
5872
5873        assert!(
5874            !saw_comment,
5875            "zsh arithmetic char literals inside (( )) should not lex as comments"
5876        );
5877    }
5878
5879    #[test]
5880    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5881        let mut lexer = Lexer::new(
5882            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5883        );
5884
5885        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5886        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5887        assert_next_token(
5888            &mut lexer,
5889            TokenKind::LiteralWord,
5890            Some("\\e]133;C;cmdline_url=%s\\a"),
5891        );
5892        assert_next_token(
5893            &mut lexer,
5894            TokenKind::QuotedWord,
5895            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5896        );
5897        assert_next_token(&mut lexer, TokenKind::Newline, None);
5898    }
5899
5900    #[test]
5901    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5902        let mut lexer = Lexer::new(
5903            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5904        );
5905
5906        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5907        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5908        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5909        assert_next_token(&mut lexer, TokenKind::Newline, None);
5910        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5911        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5912        assert_next_token(
5913            &mut lexer,
5914            TokenKind::LiteralWord,
5915            Some("\\e]133;C;cmdline_url=%s\\a"),
5916        );
5917        assert_next_token(
5918            &mut lexer,
5919            TokenKind::QuotedWord,
5920            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5921        );
5922        assert_next_token(&mut lexer, TokenKind::Newline, None);
5923        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5924        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5925        assert_next_token(&mut lexer, TokenKind::Newline, None);
5926    }
5927
5928    #[test]
5929    fn test_variable_words() {
5930        let mut lexer = Lexer::new("echo $HOME $USER");
5931
5932        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5933        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5934        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5935        assert!(lexer.next_lexed_token().is_none());
5936    }
5937
5938    #[test]
5939    fn test_pipeline_tokens() {
5940        let mut lexer = Lexer::new("echo hello | cat");
5941
5942        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5943        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5944        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5945        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5946        assert!(lexer.next_lexed_token().is_none());
5947    }
5948
5949    #[test]
5950    fn test_read_heredoc() {
5951        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5952        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5953        let content = lexer.read_heredoc("EOF", false);
5954        assert_eq!(content.content, "hello\nworld\n");
5955    }
5956
5957    #[test]
5958    fn test_read_heredoc_single_line() {
5959        let mut lexer = Lexer::new("\ntest\nEOF");
5960        let content = lexer.read_heredoc("EOF", false);
5961        assert_eq!(content.content, "test\n");
5962    }
5963
5964    #[test]
5965    fn test_read_heredoc_full_scenario() {
5966        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5967        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5968
5969        // Parser would read these tokens
5970        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5971        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5972        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5973
5974        // Now read heredoc content
5975        let content = lexer.read_heredoc("EOF", false);
5976        assert_eq!(content.content, "hello\nworld\n");
5977    }
5978
5979    #[test]
5980    fn test_read_heredoc_with_redirect() {
5981        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5982        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5983        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5984        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5985        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5986        let content = lexer.read_heredoc("EOF", false);
5987        assert_eq!(content.content, "hello\n");
5988        // The redirect tokens are now available from the lexer
5989        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5990        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5991    }
5992
5993    #[test]
5994    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5995        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
5996        let mut lexer = Lexer::new(source);
5997
5998        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5999        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6000        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6001
6002        let heredoc = lexer.read_heredoc("EOF", false);
6003        assert_eq!(heredoc.content, "hello\n");
6004
6005        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6006        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6007        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6008        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6009        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6010        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6011        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6012    }
6013
6014    #[test]
6015    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6016        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6017        let mut lexer = Lexer::new(source);
6018
6019        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6020        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6021        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6022
6023        let heredoc = lexer.read_heredoc("EOF", false);
6024        assert_eq!(heredoc.content, "1\n2\n3\n");
6025    }
6026
6027    #[test]
6028    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6029        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6030        let mut lexer = Lexer::new(source);
6031
6032        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6033        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6034        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6035
6036        let heredoc = lexer.read_heredoc("EOF", false);
6037        assert_eq!(heredoc.content, "body\n");
6038    }
6039
6040    #[test]
6041    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6042        let source = "cat <<EOF # note \\\nbody\nEOF\n";
6043        let mut lexer = Lexer::new(source);
6044
6045        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6046        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6047        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6048
6049        let heredoc = lexer.read_heredoc("EOF", false);
6050        assert_eq!(heredoc.content, "body\n");
6051    }
6052
6053    #[test]
6054    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6055        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6056        let mut lexer = Lexer::new(source);
6057
6058        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6059        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6060        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6061        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6062
6063        let heredoc = lexer.read_heredoc("EOF", false);
6064        assert_eq!(heredoc.content, "body\n");
6065
6066        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6067    }
6068
6069    #[test]
6070    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6071        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6072        let mut lexer = Lexer::new(source);
6073
6074        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6075        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6076        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6077
6078        let heredoc = lexer.read_heredoc("EOF", false);
6079        assert_eq!(heredoc.content, "1\n");
6080
6081        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6082        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6083    }
6084
6085    #[test]
6086    fn test_read_heredoc_with_redirect_preserves_following_spans() {
6087        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6088        let mut lexer = Lexer::new(source);
6089
6090        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6091        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6092        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6093
6094        let heredoc = lexer.read_heredoc("EOF", false);
6095        assert_eq!(heredoc.content, "hello\n");
6096
6097        let redirect = lexer.next_lexed_token_with_comments().unwrap();
6098        assert_eq!(redirect.kind, TokenKind::RedirectOut);
6099        assert_eq!(redirect.span.slice(source), ">");
6100
6101        let target = lexer.next_lexed_token_with_comments().unwrap();
6102        assert_eq!(target.kind, TokenKind::Word);
6103        assert_eq!(
6104            token_text(&target, lexer.input).as_deref(),
6105            Some("file.txt")
6106        );
6107        assert_eq!(target.span.slice(source), "file.txt");
6108
6109        let newline = lexer.next_lexed_token_with_comments().unwrap();
6110        assert_eq!(newline.kind, TokenKind::Newline);
6111        assert_eq!(newline.span.slice(source), "\n");
6112
6113        let comment = lexer.next_lexed_token_with_comments().unwrap();
6114        assert_eq!(comment.kind, TokenKind::Comment);
6115        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6116        assert_eq!(comment.span.slice(source), "# done");
6117    }
6118
6119    #[test]
6120    fn test_comment_with_unicode() {
6121        // Comment containing multi-byte UTF-8 characters
6122        let source = "# café résumé\necho ok";
6123        let mut lexer = Lexer::new(source);
6124
6125        let comment = lexer.next_lexed_token_with_comments().unwrap();
6126        assert_eq!(comment.kind, TokenKind::Comment);
6127        assert_eq!(
6128            token_text(&comment, lexer.input).as_deref(),
6129            Some(" café résumé")
6130        );
6131        // Span should cover exactly the comment bytes (including #)
6132        let start = comment.span.start.offset;
6133        let end = comment.span.end.offset;
6134        assert_eq!(start, 0);
6135        assert_eq!(&source[start..end], "# café résumé");
6136        assert!(source.is_char_boundary(start));
6137        assert!(source.is_char_boundary(end));
6138
6139        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6140        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6141    }
6142
6143    #[test]
6144    fn test_comment_with_cjk_characters() {
6145        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
6146        let source = "# 你好世界\necho ok";
6147        let mut lexer = Lexer::new(source);
6148
6149        let comment = lexer.next_lexed_token_with_comments().unwrap();
6150        assert_eq!(comment.kind, TokenKind::Comment);
6151        assert_eq!(
6152            token_text(&comment, lexer.input).as_deref(),
6153            Some(" 你好世界")
6154        );
6155        let start = comment.span.start.offset;
6156        let end = comment.span.end.offset;
6157        assert_eq!(&source[start..end], "# 你好世界");
6158        assert!(source.is_char_boundary(start));
6159        assert!(source.is_char_boundary(end));
6160    }
6161
6162    #[test]
6163    fn test_heredoc_with_comments_inside() {
6164        // Comments inside heredoc body should NOT appear as comment tokens
6165        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6166        let mut lexer = Lexer::new(source);
6167
6168        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6169        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6170        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6171
6172        let heredoc = lexer.read_heredoc("EOF", false);
6173        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6174
6175        // After heredoc, replayed line termination should appear before
6176        // tokens from following source lines.
6177        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6178        let comment = lexer.next_lexed_token_with_comments().unwrap();
6179        assert_eq!(comment.kind, TokenKind::Comment);
6180        assert_eq!(
6181            token_text(&comment, lexer.input).as_deref(),
6182            Some(" real comment")
6183        );
6184    }
6185
6186    #[test]
6187    fn test_heredoc_with_hash_in_variable() {
6188        // ${var#pattern} inside heredoc should not produce comment tokens
6189        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6190        let mut lexer = Lexer::new(source);
6191
6192        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6193        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6194        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6195
6196        let heredoc = lexer.read_heredoc("EOF", false);
6197        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6198    }
6199
6200    #[test]
6201    fn test_heredoc_span_does_not_leak() {
6202        // Heredoc content span must be within source bounds and must not
6203        // overlap with content before or after.
6204        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6205        let mut lexer = Lexer::new(source);
6206
6207        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6208        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6209        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6210
6211        let heredoc = lexer.read_heredoc("EOF", false);
6212        let start = heredoc.content_span.start.offset;
6213        let end = heredoc.content_span.end.offset;
6214        assert!(
6215            end <= source.len(),
6216            "heredoc span end ({end}) exceeds source length ({})",
6217            source.len()
6218        );
6219        assert_eq!(&source[start..end], "hello\nworld\n");
6220
6221        // Tokens after heredoc should still parse correctly
6222        assert_next_token(&mut lexer, TokenKind::Newline, None);
6223        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6224        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6225    }
6226
6227    #[test]
6228    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6229        let source = "\
6230cat <<\\_ACEOF
6231Use these variables to override the choices made by `configure' or to help
6232it to find libraries and programs with nonstandard names/locations.
6233_ACEOF
6234ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6235ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6236";
6237        let mut lexer = Lexer::new(source);
6238
6239        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6240        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6241        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6242        assert_eq!(delimiter.kind, TokenKind::Word);
6243        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6244
6245        let heredoc = lexer.read_heredoc("_ACEOF", false);
6246        assert_eq!(
6247            heredoc.content,
6248            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6249        );
6250
6251        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6252
6253        let first = lexer.next_lexed_token_with_comments().unwrap();
6254        assert_eq!(first.kind, TokenKind::Word);
6255        assert_eq!(
6256            first.span.slice(source),
6257            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6258        );
6259        let first_segments = first
6260            .word()
6261            .unwrap()
6262            .segments()
6263            .map(|segment| {
6264                (
6265                    segment.kind(),
6266                    segment.as_str().to_string(),
6267                    segment.span().map(|span| span.slice(source).to_string()),
6268                )
6269            })
6270            .collect::<Vec<_>>();
6271        assert_eq!(
6272            first_segments,
6273            vec![
6274                (
6275                    LexedWordSegmentKind::Plain,
6276                    "ac_dir_suffix=/".to_string(),
6277                    Some("ac_dir_suffix=/".to_string()),
6278                ),
6279                (
6280                    LexedWordSegmentKind::Plain,
6281                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6282                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6283                ),
6284            ]
6285        );
6286
6287        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6288
6289        let second = lexer.next_lexed_token_with_comments().unwrap();
6290        assert_eq!(second.kind, TokenKind::Word);
6291        assert_eq!(
6292            second.span.slice(source),
6293            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6294        );
6295        let second_segments = second
6296            .word()
6297            .unwrap()
6298            .segments()
6299            .map(|segment| {
6300                (
6301                    segment.kind(),
6302                    segment.as_str().to_string(),
6303                    segment.span().map(|span| span.slice(source).to_string()),
6304                )
6305            })
6306            .collect::<Vec<_>>();
6307        assert_eq!(
6308            second_segments,
6309            vec![
6310                (
6311                    LexedWordSegmentKind::Plain,
6312                    "ac_top_builddir_sub=".to_string(),
6313                    Some("ac_top_builddir_sub=".to_string()),
6314                ),
6315                (
6316                    LexedWordSegmentKind::Plain,
6317                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6318                    Some(
6319                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6320                            .to_string(),
6321                    ),
6322                ),
6323            ]
6324        );
6325    }
6326
6327    #[test]
6328    fn test_heredoc_with_unicode_content() {
6329        // Heredoc containing multi-byte characters; spans must be on char boundaries
6330        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6331        let mut lexer = Lexer::new(source);
6332
6333        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6334        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6335        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6336
6337        let heredoc = lexer.read_heredoc("EOF", false);
6338        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6339        let start = heredoc.content_span.start.offset;
6340        let end = heredoc.content_span.end.offset;
6341        assert!(
6342            source.is_char_boundary(start),
6343            "heredoc span start ({start}) not on char boundary"
6344        );
6345        assert!(
6346            source.is_char_boundary(end),
6347            "heredoc span end ({end}) not on char boundary"
6348        );
6349        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6350    }
6351
6352    #[test]
6353    fn test_assoc_compound_assignment() {
6354        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6355        // assignment as a single Word token
6356        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6357        assert_next_token(
6358            &mut lexer,
6359            TokenKind::Word,
6360            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6361        );
6362        assert!(lexer.next_lexed_token().is_none());
6363    }
6364
6365    #[test]
6366    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6367        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6368        let mut lexer = Lexer::new(source);
6369
6370        let token = lexer.next_lexed_token().unwrap();
6371        assert_eq!(token.kind, TokenKind::Word);
6372        assert_eq!(token.span.slice(source), source);
6373        assert!(lexer.next_lexed_token().is_none());
6374    }
6375
6376    #[test]
6377    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6378        let source = r#"foo\_bar@(baz|qux)"#;
6379        let mut lexer = Lexer::new(source);
6380
6381        let token = lexer.next_lexed_token().unwrap();
6382        assert_eq!(token.kind, TokenKind::Word);
6383        assert_eq!(token.span.slice(source), source);
6384        assert!(lexer.next_lexed_token().is_none());
6385    }
6386
6387    #[test]
6388    fn test_indexed_array_not_collapsed() {
6389        // arr=("hello world") should NOT be collapsed — parser handles
6390        // quoted elements token-by-token via the LeftParen path
6391        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6392        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6393        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6394    }
6395
6396    #[test]
6397    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6398        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6399        let mut lexer = Lexer::new(source);
6400
6401        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6402        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6403
6404        let token = lexer.next_lexed_token().unwrap();
6405        assert_eq!(token.kind, TokenKind::Word);
6406        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6407
6408        let word = token.word().unwrap();
6409        let segments: Vec<_> = word
6410            .segments()
6411            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6412            .collect();
6413        assert_eq!(
6414            segments,
6415            vec![
6416                (
6417                    LexedWordSegmentKind::DoubleQuoted,
6418                    "$plugin_dir".to_string()
6419                ),
6420                (LexedWordSegmentKind::Plain, "/*".to_string()),
6421                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6422            ]
6423        );
6424
6425        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6426        assert!(lexer.next_lexed_token().is_none());
6427    }
6428
6429    #[test]
6430    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6431        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6432        let mut lexer = Lexer::new(source);
6433
6434        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6435        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6436
6437        let token = lexer.next_lexed_token().unwrap();
6438        assert_eq!(token.kind, TokenKind::Word);
6439        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6440
6441        let word = token.word().unwrap();
6442        let segments: Vec<_> = word
6443            .segments()
6444            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6445            .collect();
6446        assert_eq!(
6447            segments,
6448            vec![
6449                (
6450                    LexedWordSegmentKind::DoubleQuoted,
6451                    "$__GREP_CACHE_FILE".to_string()
6452                ),
6453                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6454            ]
6455        );
6456
6457        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6458        assert!(lexer.next_lexed_token().is_none());
6459    }
6460
6461    #[test]
6462    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6463        let source = r#"$dir/${~pats}(N)"#;
6464        let mut lexer = Lexer::new(source);
6465
6466        let token = lexer.next_lexed_token().unwrap();
6467        assert_eq!(token.kind, TokenKind::Word);
6468        assert_eq!(token.span.slice(source), source);
6469        assert!(lexer.next_lexed_token().is_none());
6470    }
6471
6472    #[test]
6473    fn test_dollar_word_does_not_absorb_function_parens() {
6474        let mut lexer = Lexer::new(r#"foo$x()"#);
6475
6476        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6477        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6478        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6479        assert!(lexer.next_lexed_token().is_none());
6480    }
6481
6482    #[test]
6483    fn test_command_substitution_word_does_not_absorb_function_parens() {
6484        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6485
6486        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6487        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6488        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6489        assert!(lexer.next_lexed_token().is_none());
6490    }
6491
6492    /// Regression test for fuzz crash: single digit at EOF should not panic
6493    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6494    #[test]
6495    fn test_digit_at_eof_no_panic() {
6496        // A lone digit with no following redirect operator must not panic
6497        let mut lexer = Lexer::new("2");
6498        let token = lexer.next_lexed_token();
6499        assert!(token.is_some());
6500    }
6501
6502    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6503    #[test]
6504    fn test_nested_brace_expansion_single_token() {
6505        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6506        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6507        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6508        // No more tokens — everything was consumed
6509        assert!(lexer.next_lexed_token().is_none());
6510    }
6511
6512    /// Simple ${var} still works after brace depth change.
6513    #[test]
6514    fn test_simple_brace_expansion_unchanged() {
6515        let mut lexer = Lexer::new("${foo}");
6516        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6517        assert!(lexer.next_lexed_token().is_none());
6518    }
6519
6520    #[test]
6521    fn test_nvm_fixture_lexes_without_stalling() {
6522        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6523        let mut lexer = Lexer::new(input);
6524        let mut tokens = 0usize;
6525
6526        while lexer.next_lexed_token().is_some() {
6527            tokens += 1;
6528            assert!(
6529                tokens < 100_000,
6530                "lexer should continue making progress on the nvm fixture"
6531            );
6532        }
6533
6534        assert!(tokens > 0, "nvm fixture should produce at least one token");
6535    }
6536
6537    #[test]
6538    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6539        let input = concat!(
6540            "case \"${_input_type:-}\" in\n",
6541            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6542            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6543            "esac\n",
6544        );
6545
6546        assert_non_newline_tokens_stay_on_one_line(input);
6547
6548        let mut lexer = Lexer::new(input);
6549        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6550            .map(|token| (token.kind, token_text(&token, input)))
6551            .collect::<Vec<_>>();
6552        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6553        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6554    }
6555
6556    #[test]
6557    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6558        let input = concat!(
6559            "case $2 in\n",
6560            "  cygwin*) bin='cygwin32/bin' ;|\n",
6561            "esac\n",
6562        );
6563
6564        let mut lexer = Lexer::new(input);
6565        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6566            .map(|token| (token.kind, token_text(&token, input)))
6567            .collect::<Vec<_>>();
6568
6569        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6570        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6571        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6572    }
6573
6574    #[test]
6575    fn test_inline_if_with_array_append_stays_line_local() {
6576        let input = concat!(
6577            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6578            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6579        );
6580
6581        assert_non_newline_tokens_stay_on_one_line(input);
6582    }
6583
6584    #[test]
6585    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6586        let source = "unsetopt interactive_comments\n#literal\n";
6587        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6588        let mut lexer = Lexer::with_profile(source, &profile);
6589
6590        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6591        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6592        assert_next_token(&mut lexer, TokenKind::Newline, None);
6593        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6594    }
6595
6596    #[test]
6597    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6598        let source = "setopt rc_quotes\nprint 'a''b'\n";
6599        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6600        let mut lexer = Lexer::with_profile(source, &profile);
6601
6602        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6603        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6604        assert_next_token(&mut lexer, TokenKind::Newline, None);
6605        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6606        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6607    }
6608
6609    #[test]
6610    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6611        let source = "setopt ignore_braces\n{ echo }\n";
6612        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6613        let mut lexer = Lexer::with_profile(source, &profile);
6614
6615        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6616        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6617        assert_next_token(&mut lexer, TokenKind::Newline, None);
6618        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6619        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6620        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6621    }
6622
6623    #[test]
6624    fn test_heredoc_in_arithmetic_fuzz_crash() {
6625        // Regression test: the fuzzer found that heredoc re-injection inside
6626        // arithmetic context can push self.offset past self.input.len(),
6627        // causing a panic in read_unquoted_segment's borrowed-slice path.
6628        let data: &[u8] = &[
6629            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6630            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6631            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6632            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6633            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6634            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6635            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6636            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6637            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6638            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6639            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6640            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6641            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6642            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6643            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6644            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6645            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6646            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6647            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6648        ];
6649        let input = std::str::from_utf8(data).unwrap();
6650        let script = format!("echo $(({input}))\n");
6651        // Must not panic.
6652        let _ = crate::parser::Parser::new(&script).parse();
6653    }
6654}
shuck_parser/parser/lexer.rs

shuck_parser/parser/
lexer.rs