shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335    /// Token kind used by the parser.
336    pub kind: TokenKind,
337    /// Source span covered by the token.
338    pub span: Span,
339    pub(crate) flags: TokenFlags,
340    payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345        match kind {
346            TokenKind::Word => LexedWordSegmentKind::Plain,
347            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349            _ => LexedWordSegmentKind::Composite,
350        }
351    }
352
353    pub(crate) fn punctuation(kind: TokenKind) -> Self {
354        Self {
355            kind,
356            span: Span::new(),
357            flags: TokenFlags::empty(),
358            payload: TokenPayload::None,
359        }
360    }
361
362    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363        let flags = if word.has_cooked_text() {
364            TokenFlags::cooked_text()
365        } else {
366            TokenFlags::empty()
367        };
368
369        Self {
370            kind,
371            span: Span::new(),
372            flags,
373            payload: TokenPayload::Word(word),
374        }
375    }
376
377    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378        Self::with_word_payload(
379            kind,
380            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381        )
382    }
383
384    fn owned_word(kind: TokenKind, text: String) -> Self {
385        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386    }
387
388    fn comment() -> Self {
389        Self {
390            kind: TokenKind::Comment,
391            span: Span::new(),
392            flags: TokenFlags::empty(),
393            payload: TokenPayload::None,
394        }
395    }
396
397    fn fd(kind: TokenKind, fd: i32) -> Self {
398        Self {
399            kind,
400            span: Span::new(),
401            flags: TokenFlags::empty(),
402            payload: TokenPayload::Fd(fd),
403        }
404    }
405
406    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407        Self {
408            kind,
409            span: Span::new(),
410            flags: TokenFlags::empty(),
411            payload: TokenPayload::FdPair(src_fd, dst_fd),
412        }
413    }
414
415    fn error(kind: LexerErrorKind) -> Self {
416        Self {
417            kind: TokenKind::Error,
418            span: Span::new(),
419            flags: TokenFlags::empty(),
420            payload: TokenPayload::Error(kind),
421        }
422    }
423
424    pub(crate) fn with_span(mut self, span: Span) -> Self {
425        self.span = span;
426        self
427    }
428
429    pub(crate) fn rebased(mut self, base: Position) -> Self {
430        self.span = self.span.rebased(base);
431        self.payload = match self.payload {
432            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433            payload => payload,
434        };
435        self
436    }
437
438    pub(crate) fn with_synthetic_flag(mut self) -> Self {
439        self.flags = self.flags.with_synthetic();
440        self
441    }
442
443    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444        let payload = match self.payload {
445            TokenPayload::None => TokenPayload::None,
446            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449            TokenPayload::Error(kind) => TokenPayload::Error(kind),
450        };
451
452        LexedToken {
453            kind: self.kind,
454            span: self.span,
455            flags: self.flags,
456            payload,
457        }
458    }
459
460    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461        let payload = match self.payload {
462            TokenPayload::None => TokenPayload::None,
463            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466            TokenPayload::Error(kind) => TokenPayload::Error(kind),
467        };
468
469        LexedToken {
470            kind: self.kind,
471            span: self.span,
472            flags: self.flags,
473            payload,
474        }
475    }
476
477    /// Borrow the token text when it is a single-segment word token.
478    pub fn word_text(&self) -> Option<&str> {
479        self.kind
480            .is_word_like()
481            .then_some(())
482            .and_then(|_| match &self.payload {
483                TokenPayload::Word(word) => word.text(),
484                _ => None,
485            })
486    }
487
488    /// Return an owned string containing the token's word text.
489    pub fn word_string(&self) -> Option<String> {
490        self.kind
491            .is_word_like()
492            .then_some(())
493            .and_then(|_| match &self.payload {
494                TokenPayload::Word(word) => Some(word.joined_text()),
495                _ => None,
496            })
497    }
498
499    /// Borrow the structured word payload for word-like tokens.
500    pub fn word(&self) -> Option<&LexedWord<'a>> {
501        match &self.payload {
502            TokenPayload::Word(word) => Some(word),
503            _ => None,
504        }
505    }
506
507    /// Borrow the original source slice when the token is source-backed and uncooked.
508    pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510            return None;
511        }
512
513        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514            .then(|| &source[self.span.start.offset..self.span.end.offset])
515    }
516
517    /// Return the file-descriptor payload for redirection tokens that carry one.
518    pub fn fd_value(&self) -> Option<i32> {
519        match self.payload {
520            TokenPayload::Fd(fd) => Some(fd),
521            _ => None,
522        }
523    }
524
525    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
526    pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527        match self.payload {
528            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529            _ => None,
530        }
531    }
532
533    /// Return the lexer error payload when this token represents `TokenKind::Error`.
534    pub fn error_kind(&self) -> Option<LexerErrorKind> {
535        match self.payload {
536            TokenPayload::Error(kind) => Some(kind),
537            _ => None,
538        }
539    }
540}
541
542/// Result of reading a heredoc body from the source.
543#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545    /// Decoded heredoc content.
546    pub content: String,
547    /// Source span covering the heredoc body content.
548    pub content_span: Span,
549}
550
551/// Maximum nesting depth for command substitution in the lexer.
552/// Prevents stack overflow from deeply nested $() patterns.
553const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557    rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561    fn new(source: &'a str) -> Self {
562        Self { rest: source }
563    }
564
565    fn first(&self) -> Option<char> {
566        self.rest.chars().next()
567    }
568
569    fn second(&self) -> Option<char> {
570        let mut chars = self.rest.chars();
571        chars.next()?;
572        chars.next()
573    }
574
575    fn third(&self) -> Option<char> {
576        let mut chars = self.rest.chars();
577        chars.next()?;
578        chars.next()?;
579        chars.next()
580    }
581
582    fn bump(&mut self) -> Option<char> {
583        let ch = self.first()?;
584        self.rest = &self.rest[ch.len_utf8()..];
585        Some(ch)
586    }
587
588    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589        let start = self.rest;
590        let mut end = 0;
591
592        for ch in start.chars() {
593            if !predicate(ch) {
594                break;
595            }
596            end += ch.len_utf8();
597        }
598
599        self.rest = &start[end..];
600        &start[..end]
601    }
602
603    fn rest(&self) -> &'a str {
604        self.rest
605    }
606
607    fn skip_bytes(&mut self, count: usize) {
608        self.rest = &self.rest[count..];
609    }
610
611    fn find_byte(&self, byte: u8) -> Option<usize> {
612        memchr(byte, self.rest.as_bytes())
613    }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618    source: &'a str,
619    line_starts: Vec<usize>,
620    cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626    pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630    fn new(source: &'a str) -> Self {
631        let mut line_starts =
632            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633        line_starts.push(0);
634        line_starts.extend(
635            source
636                .bytes()
637                .enumerate()
638                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639        );
640
641        Self {
642            source,
643            line_starts,
644            cached: Position::new(),
645        }
646    }
647
648    fn position(&mut self, offset: usize) -> Position {
649        if offset == self.cached.offset {
650            return self.cached;
651        }
652
653        let position = if offset > self.cached.offset && offset <= self.source.len() {
654            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655        } else {
656            self.position_uncached(offset)
657        };
658        self.cached = position;
659        position
660    }
661
662    fn position_uncached(&self, offset: usize) -> Position {
663        let offset = offset.min(self.source.len());
664        let line_index = self
665            .line_starts
666            .partition_point(|start| *start <= offset)
667            .saturating_sub(1);
668        let line_start = self.line_starts[line_index];
669        let line_text = &self.source[line_start..offset];
670        let column = if line_text.is_ascii() {
671            line_text.len() + 1
672        } else {
673            line_text.chars().count() + 1
674        };
675
676        Position {
677            line: line_index + 1,
678            column,
679            offset,
680        }
681    }
682
683    fn advance_from(mut position: Position, text: &str) -> Position {
684        position.offset += text.len();
685        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686        if newline_count == 0 {
687            position.column += if text.is_ascii() {
688                text.len()
689            } else {
690                text.chars().count()
691            };
692            return position;
693        }
694
695        position.line += newline_count;
696        let tail_start = memrchr(b'\n', text.as_bytes())
697            .map(|index| index + 1)
698            .unwrap_or_default();
699        let tail = &text[tail_start..];
700        position.column = if tail.is_ascii() {
701            tail.len() + 1
702        } else {
703            tail.chars().count() + 1
704        };
705        position
706    }
707}
708
709/// Lexer for bash scripts.
710#[derive(Clone)]
711pub struct Lexer<'a> {
712    #[allow(dead_code)] // Stored for error reporting in future
713    input: &'a str,
714    /// Current byte offset in the input/reinjected stream.
715    offset: usize,
716    cursor: Cursor<'a>,
717    position_map: PositionMap<'a>,
718    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
719    /// Consumed before `cursor`.
720    reinject_buf: VecDeque<char>,
721    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
722    reinject_resume_offset: Option<usize>,
723    /// Maximum allowed nesting depth for command substitution
724    max_subst_depth: usize,
725    initial_zsh_options: Option<ZshOptionState>,
726    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727    zsh_timeline_index: usize,
728    #[cfg(feature = "benchmarking")]
729    benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733    /// Create a new lexer for the given input.
734    pub fn new(input: &'a str) -> Self {
735        Self::with_max_subst_depth_and_profile(
736            input,
737            DEFAULT_MAX_SUBST_DEPTH,
738            &ShellProfile::native(super::ShellDialect::Bash),
739            None,
740        )
741    }
742
743    /// Create a new lexer with a custom max substitution nesting depth.
744    /// Limits recursion in read_command_subst_into().
745    pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746        Self::with_max_subst_depth_and_profile(
747            input,
748            max_depth,
749            &ShellProfile::native(super::ShellDialect::Bash),
750            None,
751        )
752    }
753
754    /// Create a new lexer using the provided shell profile.
755    pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757            .then(|| ZshOptionTimeline::build(input, shell_profile))
758            .flatten()
759            .map(Arc::new);
760        Self::with_max_subst_depth_and_profile(
761            input,
762            DEFAULT_MAX_SUBST_DEPTH,
763            shell_profile,
764            zsh_timeline,
765        )
766    }
767
768    pub(crate) fn with_max_subst_depth_and_profile(
769        input: &'a str,
770        max_depth: usize,
771        shell_profile: &ShellProfile,
772        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773    ) -> Self {
774        Self {
775            input,
776            offset: 0,
777            cursor: Cursor::new(input),
778            position_map: PositionMap::new(input),
779            reinject_buf: VecDeque::new(),
780            reinject_resume_offset: None,
781            max_subst_depth: max_depth,
782            initial_zsh_options: shell_profile.zsh_options().cloned(),
783            zsh_timeline,
784            zsh_timeline_index: 0,
785            #[cfg(feature = "benchmarking")]
786            benchmark_counters: None,
787        }
788    }
789
790    /// Get the current position in the input.
791    pub fn position(&self) -> Position {
792        self.position_map.position_uncached(self.offset)
793    }
794
795    fn current_position(&mut self) -> Position {
796        #[cfg(feature = "benchmarking")]
797        self.maybe_record_current_position_call();
798        self.position_map.position(self.offset)
799    }
800
801    #[cfg(feature = "benchmarking")]
802    pub(crate) fn enable_benchmark_counters(&mut self) {
803        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804    }
805
806    #[cfg(feature = "benchmarking")]
807    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808        self.benchmark_counters.unwrap_or_default()
809    }
810
811    #[cfg(feature = "benchmarking")]
812    fn maybe_record_current_position_call(&mut self) {
813        if let Some(counters) = &mut self.benchmark_counters {
814            counters.current_position_calls += 1;
815        }
816    }
817
818    fn sync_offset_to_cursor(&mut self) {
819        if self.reinject_buf.is_empty()
820            && let Some(offset) = self.reinject_resume_offset.take()
821        {
822            self.offset = offset;
823        }
824    }
825
826    /// Get the next token kind from the input without decoding or materializing
827    /// any payload text.
828    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829        self.next_lexed_token().map(|token| token.kind)
830    }
831
832    fn peek_char(&mut self) -> Option<char> {
833        self.sync_offset_to_cursor();
834        if let Some(&ch) = self.reinject_buf.front() {
835            Some(ch)
836        } else {
837            self.cursor.first()
838        }
839    }
840
841    fn advance(&mut self) -> Option<char> {
842        self.sync_offset_to_cursor();
843        let ch = if !self.reinject_buf.is_empty() {
844            self.reinject_buf.pop_front()
845        } else {
846            self.cursor.bump()
847        };
848        if let Some(c) = ch {
849            self.offset += c.len_utf8();
850        }
851        ch
852    }
853
854    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855        self.reinject_buf
856            .iter()
857            .copied()
858            .chain(self.cursor.rest().chars())
859    }
860
861    fn second_char(&self) -> Option<char> {
862        match self.reinject_buf.len() {
863            0 => self.cursor.second(),
864            1 => self.cursor.first(),
865            _ => self.reinject_buf.get(1).copied(),
866        }
867    }
868
869    fn third_char(&self) -> Option<char> {
870        match self.reinject_buf.len() {
871            0 => self.cursor.third(),
872            1 => self.cursor.second(),
873            2 => self.cursor.first(),
874            _ => self.reinject_buf.get(2).copied(),
875        }
876    }
877
878    fn fourth_char(&self) -> Option<char> {
879        match self.reinject_buf.len() {
880            0 => self.cursor.rest().chars().nth(3),
881            1 => self.cursor.third(),
882            2 => self.cursor.second(),
883            3 => self.cursor.first(),
884            _ => self.reinject_buf.get(3).copied(),
885        }
886    }
887
888    fn consume_source_bytes(&mut self, byte_len: usize) {
889        debug_assert!(self.reinject_buf.is_empty());
890        self.sync_offset_to_cursor();
891        self.offset += byte_len;
892        self.cursor.skip_bytes(byte_len);
893    }
894
895    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896        debug_assert!(self.reinject_buf.is_empty());
897        self.offset += byte_len;
898    }
899
900    fn consume_ascii_chars(&mut self, count: usize) {
901        if self.reinject_buf.is_empty() {
902            self.consume_source_bytes(count);
903            return;
904        }
905
906        for _ in 0..count {
907            self.advance();
908        }
909    }
910
911    fn source_horizontal_whitespace_len(&self) -> usize {
912        self.cursor
913            .rest()
914            .as_bytes()
915            .iter()
916            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917            .count()
918    }
919
920    fn source_ascii_plain_word_len(&self) -> usize {
921        self.cursor
922            .rest()
923            .as_bytes()
924            .iter()
925            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926            .count()
927    }
928
929    fn find_double_quote_special(source: &str) -> Option<usize> {
930        source
931            .as_bytes()
932            .iter()
933            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934    }
935
936    fn ensure_capture_from_source(
937        &self,
938        capture: &mut Option<String>,
939        start: Position,
940        end: Position,
941    ) {
942        if capture.is_none() {
943            *capture = Some(self.input[start.offset..end.offset].to_string());
944        }
945    }
946
947    fn push_capture_char(capture: &mut Option<String>, ch: char) {
948        if let Some(text) = capture.as_mut() {
949            text.push(ch);
950        }
951    }
952
953    fn push_capture_str(capture: &mut Option<String>, text: &str) {
954        if let Some(current) = capture.as_mut() {
955            current.push_str(text);
956        }
957    }
958
959    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960        if let Some(timeline) = self.zsh_timeline.as_ref() {
961            while self.zsh_timeline_index < timeline.entries.len()
962                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963            {
964                self.zsh_timeline_index += 1;
965            }
966            return if self.zsh_timeline_index == 0 {
967                self.initial_zsh_options.as_ref()
968            } else {
969                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970            };
971        }
972
973        self.initial_zsh_options.as_ref()
974    }
975
976    fn comments_enabled(&mut self) -> bool {
977        !self
978            .current_zsh_options()
979            .is_some_and(|options| options.interactive_comments.is_definitely_off())
980    }
981
982    fn rc_quotes_enabled(&mut self) -> bool {
983        self.current_zsh_options()
984            .is_some_and(|options| options.rc_quotes.is_definitely_on())
985    }
986
987    fn ignore_braces_enabled(&mut self) -> bool {
988        self.current_zsh_options()
989            .is_some_and(|options| options.ignore_braces.is_definitely_on())
990    }
991
992    fn ignore_close_braces_enabled(&mut self) -> bool {
993        self.current_zsh_options().is_some_and(|options| {
994            options.ignore_braces.is_definitely_on()
995                || options.ignore_close_braces.is_definitely_on()
996        })
997    }
998
999    fn should_treat_hash_as_word_char(&mut self) -> bool {
1000        if !self.comments_enabled() {
1001            return true;
1002        }
1003        self.reinject_buf.is_empty()
1004            && (self
1005                .input
1006                .get(..self.offset)
1007                .and_then(|prefix| prefix.chars().next_back())
1008                .is_some_and(|prev| {
1009                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010                })
1011                || self.is_inside_unclosed_double_paren_on_line())
1012    }
1013
1014    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015        capture
1016            .as_deref()
1017            .unwrap_or(&self.input[start.offset..self.offset])
1018    }
1019
1020    fn current_word_surface_is_single_char(
1021        &self,
1022        start: Position,
1023        capture: &Option<String>,
1024        target: char,
1025    ) -> bool {
1026        let text = self.current_word_text(start, capture);
1027        if !text.contains('\x00') {
1028            let mut encoded = [0; 4];
1029            return text == target.encode_utf8(&mut encoded);
1030        }
1031
1032        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034    }
1035
1036    fn current_word_surface_last_char<'b>(
1037        &'b self,
1038        start: Position,
1039        capture: &'b Option<String>,
1040    ) -> Option<char> {
1041        self.current_word_text(start, capture)
1042            .chars()
1043            .rev()
1044            .find(|&ch| ch != '\x00')
1045    }
1046
1047    fn current_word_surface_ends_with_char(
1048        &self,
1049        start: Position,
1050        capture: &Option<String>,
1051        target: char,
1052    ) -> bool {
1053        self.current_word_surface_last_char(start, capture) == Some(target)
1054    }
1055
1056    fn current_word_surface_ends_with_extglob_prefix(
1057        &self,
1058        start: Position,
1059        capture: &Option<String>,
1060    ) -> bool {
1061        self.current_word_surface_last_char(start, capture)
1062            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063    }
1064
1065    /// Get the next source-backed token from the input, skipping line comments.
1066    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067        self.skip_whitespace();
1068        let start = self.current_position();
1069        let token = self.next_lexed_token_inner(false)?;
1070        let end = self.current_position();
1071        Some(token.with_span(Span::from_positions(start, end)))
1072    }
1073
1074    /// Get the next source-backed token from the input, preserving line comments.
1075    pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076        self.skip_whitespace();
1077        let start = self.current_position();
1078        let token = self.next_lexed_token_inner(true)?;
1079        let end = self.current_position();
1080        Some(token.with_span(Span::from_positions(start, end)))
1081    }
1082
1083    /// Internal: get next token without recording position (called after whitespace skip)
1084    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085        let ch = self.peek_char()?;
1086
1087        match ch {
1088            '\n' => {
1089                self.consume_ascii_chars(1);
1090                Some(LexedToken::punctuation(TokenKind::Newline))
1091            }
1092            ';' => {
1093                if self.second_char() == Some(';') {
1094                    if self.third_char() == Some('&') {
1095                        self.consume_ascii_chars(3);
1096                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1097                    } else {
1098                        self.consume_ascii_chars(2);
1099                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1100                    }
1101                } else if self.second_char() == Some('|') {
1102                    self.consume_ascii_chars(2);
1103                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1104                } else if self.second_char() == Some('&') {
1105                    self.consume_ascii_chars(2);
1106                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1107                } else {
1108                    self.consume_ascii_chars(1);
1109                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1110                }
1111            }
1112            '|' => {
1113                if self.second_char() == Some('|') {
1114                    self.consume_ascii_chars(2);
1115                    Some(LexedToken::punctuation(TokenKind::Or))
1116                } else if self.second_char() == Some('&') {
1117                    self.consume_ascii_chars(2);
1118                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119                } else {
1120                    self.consume_ascii_chars(1);
1121                    Some(LexedToken::punctuation(TokenKind::Pipe))
1122                }
1123            }
1124            '&' => {
1125                if self.second_char() == Some('&') {
1126                    self.consume_ascii_chars(2);
1127                    Some(LexedToken::punctuation(TokenKind::And))
1128                } else if self.second_char() == Some('>') {
1129                    if self.third_char() == Some('>') {
1130                        self.consume_ascii_chars(3);
1131                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132                    } else {
1133                        self.consume_ascii_chars(2);
1134                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135                    }
1136                } else if self.second_char() == Some('|') {
1137                    self.consume_ascii_chars(2);
1138                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139                } else if self.second_char() == Some('!') {
1140                    self.consume_ascii_chars(2);
1141                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142                } else {
1143                    self.consume_ascii_chars(1);
1144                    Some(LexedToken::punctuation(TokenKind::Background))
1145                }
1146            }
1147            '>' => {
1148                if self.second_char() == Some('>') {
1149                    if self.third_char() == Some('|') {
1150                        self.consume_ascii_chars(3);
1151                    } else {
1152                        self.consume_ascii_chars(2);
1153                    }
1154                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155                } else if self.second_char() == Some('|') {
1156                    self.consume_ascii_chars(2);
1157                    Some(LexedToken::punctuation(TokenKind::Clobber))
1158                } else if self.second_char() == Some('(') {
1159                    self.consume_ascii_chars(2);
1160                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161                } else if self.second_char() == Some('&') {
1162                    self.consume_ascii_chars(2);
1163                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1164                } else {
1165                    self.consume_ascii_chars(1);
1166                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167                }
1168            }
1169            '<' => {
1170                if self.second_char() == Some('<') {
1171                    if self.third_char() == Some('<') {
1172                        self.consume_ascii_chars(3);
1173                        Some(LexedToken::punctuation(TokenKind::HereString))
1174                    } else if self.third_char() == Some('-') {
1175                        self.consume_ascii_chars(3);
1176                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177                    } else {
1178                        self.consume_ascii_chars(2);
1179                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1180                    }
1181                } else if self.second_char() == Some('>') {
1182                    self.consume_ascii_chars(2);
1183                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184                } else if self.second_char() == Some('(') {
1185                    self.consume_ascii_chars(2);
1186                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187                } else if self.second_char() == Some('&') {
1188                    self.consume_ascii_chars(2);
1189                    Some(LexedToken::punctuation(TokenKind::DupInput))
1190                } else {
1191                    self.consume_ascii_chars(1);
1192                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193                }
1194            }
1195            '(' => {
1196                if self.second_char() == Some('(') {
1197                    self.consume_ascii_chars(2);
1198                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199                } else {
1200                    self.consume_ascii_chars(1);
1201                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1202                }
1203            }
1204            ')' => {
1205                if self.second_char() == Some(')') {
1206                    self.consume_ascii_chars(2);
1207                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208                } else {
1209                    self.consume_ascii_chars(1);
1210                    Some(LexedToken::punctuation(TokenKind::RightParen))
1211                }
1212            }
1213            '{' => {
1214                let start = self.current_position();
1215                if self.ignore_braces_enabled() {
1216                    self.consume_ascii_chars(1);
1217                    match self.peek_char() {
1218                        Some(' ') | Some('\t') | Some('\n') | None => {
1219                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220                        }
1221                        _ => self.read_word_starting_with("{", start),
1222                    }
1223                } else if self.looks_like_brace_expansion() {
1224                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1225                    // vs a brace group like { cmd; }
1226                    // Note: { must be followed by space/newline to be a brace group
1227                    self.read_brace_expansion_word()
1228                } else if self.is_brace_group_start() {
1229                    self.advance();
1230                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231                } else if self.brace_literal_starts_case_pattern_delimiter() {
1232                    self.read_word_starting_with("{", start)
1233                } else {
1234                    self.read_brace_literal_word()
1235                }
1236            }
1237            '}' => {
1238                self.consume_ascii_chars(1);
1239                if self.ignore_close_braces_enabled() {
1240                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1241                } else {
1242                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1243                }
1244            }
1245            '[' => {
1246                let start = self.current_position();
1247                self.consume_ascii_chars(1);
1248                if self.peek_char() == Some('[')
1249                    && matches!(
1250                        self.second_char(),
1251                        Some(' ') | Some('\t') | Some('\n') | None
1252                    )
1253                {
1254                    self.consume_ascii_chars(1);
1255                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1256                } else {
1257                    // `[` can start the test command when followed by whitespace, or it can be
1258                    // ordinary word text such as a glob bracket expression.
1259                    //
1260                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1261                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1262                    // structural tokens mid-word.
1263                    match self.peek_char() {
1264                        Some(' ') | Some('\t') | Some('\n') | None => {
1265                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1266                        }
1267                        _ => self.read_word_starting_with("[", start),
1268                    }
1269                }
1270            }
1271            ']' => {
1272                if self.second_char() == Some(']') {
1273                    self.consume_ascii_chars(2);
1274                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1275                } else {
1276                    self.consume_ascii_chars(1);
1277                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1278                }
1279            }
1280            '\'' => self.read_single_quoted_string(),
1281            '"' => self.read_double_quoted_string(),
1282            '#' => {
1283                if self.should_treat_hash_as_word_char() {
1284                    let start = self.current_position();
1285                    return self.read_word_starting_with("#", start);
1286                }
1287                if preserve_comments {
1288                    self.read_comment();
1289                    Some(LexedToken::comment())
1290                } else {
1291                    self.skip_comment();
1292                    self.next_lexed_token_inner(false)
1293                }
1294            }
1295            // Handle file descriptor redirects like 2> or 2>&1
1296            '0'..='9' => self.read_word_or_fd_redirect(),
1297            _ => self.read_word(),
1298        }
1299    }
1300
1301    fn skip_whitespace(&mut self) {
1302        while let Some(ch) = self.peek_char() {
1303            if self.reinject_buf.is_empty() {
1304                let whitespace_len = self.source_horizontal_whitespace_len();
1305                if whitespace_len > 0 {
1306                    self.consume_source_bytes(whitespace_len);
1307                    continue;
1308                }
1309
1310                if self.cursor.rest().starts_with("\\\n") {
1311                    self.consume_source_bytes(2);
1312                    continue;
1313                }
1314            }
1315
1316            if ch == ' ' || ch == '\t' {
1317                self.consume_ascii_chars(1);
1318            } else if ch == '\\' {
1319                // Check for backslash-newline (line continuation) between tokens
1320                if self.second_char() == Some('\n') {
1321                    self.consume_ascii_chars(2);
1322                } else {
1323                    break;
1324                }
1325            } else {
1326                break;
1327            }
1328        }
1329    }
1330
1331    fn skip_comment(&mut self) {
1332        if self.reinject_buf.is_empty() {
1333            let end = self
1334                .cursor
1335                .find_byte(b'\n')
1336                .unwrap_or(self.cursor.rest().len());
1337            self.consume_source_bytes(end);
1338            return;
1339        }
1340
1341        while let Some(ch) = self.peek_char() {
1342            if ch == '\n' {
1343                break;
1344            }
1345            self.advance();
1346        }
1347    }
1348
1349    fn read_comment(&mut self) {
1350        debug_assert_eq!(self.peek_char(), Some('#'));
1351
1352        if self.reinject_buf.is_empty() {
1353            let rest = self.cursor.rest();
1354            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1355            self.consume_source_bytes(end);
1356            return;
1357        }
1358
1359        self.advance(); // consume '#'
1360
1361        while let Some(ch) = self.peek_char() {
1362            if ch == '\n' {
1363                break;
1364            }
1365            self.advance();
1366        }
1367    }
1368
1369    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1370        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1371            return false;
1372        }
1373
1374        let line_start = self.input[..self.offset]
1375            .rfind('\n')
1376            .map_or(0, |index| index + 1);
1377        let prefix = &self.input[line_start..self.offset];
1378        line_has_unclosed_double_paren(prefix)
1379    }
1380
1381    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1382    /// or just a regular word starting with a digit
1383    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1384        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1385            let Some(fd) = first_digit.to_digit(10) else {
1386                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1387            };
1388            let fd = fd as i32;
1389
1390            match (self.second_char(), self.third_char()) {
1391                (Some('>'), Some('>')) => {
1392                    if self.fourth_char() == Some('|') {
1393                        self.consume_ascii_chars(4);
1394                    } else {
1395                        self.consume_ascii_chars(3);
1396                    }
1397                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1398                }
1399                (Some('>'), Some('|')) => {
1400                    self.consume_ascii_chars(3);
1401                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1402                }
1403                (Some('>'), Some('&')) => {
1404                    self.consume_ascii_chars(3);
1405
1406                    let mut target_str = String::with_capacity(4);
1407                    while let Some(c) = self.peek_char() {
1408                        if c.is_ascii_digit() {
1409                            target_str.push(c);
1410                            self.advance();
1411                        } else {
1412                            break;
1413                        }
1414                    }
1415
1416                    if target_str.is_empty() {
1417                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1418                    }
1419
1420                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1421                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1422                }
1423                (Some('>'), _) => {
1424                    self.consume_ascii_chars(2);
1425                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1426                }
1427                (Some('<'), Some('&')) => {
1428                    self.consume_ascii_chars(3);
1429
1430                    let mut target_str = String::with_capacity(4);
1431                    while let Some(c) = self.peek_char() {
1432                        if c.is_ascii_digit() || c == '-' {
1433                            target_str.push(c);
1434                            self.advance();
1435                            if c == '-' {
1436                                break;
1437                            }
1438                        } else {
1439                            break;
1440                        }
1441                    }
1442
1443                    if target_str == "-" {
1444                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1445                    }
1446                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1447                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1448                }
1449                (Some('<'), Some('>')) => {
1450                    self.consume_ascii_chars(3);
1451                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1452                }
1453                (Some('<'), Some('<')) => {}
1454                (Some('<'), _) => {
1455                    self.consume_ascii_chars(2);
1456                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1457                }
1458                _ => {}
1459            }
1460        }
1461
1462        // Not a fd redirect pattern, read as regular word
1463        self.read_word()
1464    }
1465
1466    fn read_word_starting_with(
1467        &mut self,
1468        _prefix: &str,
1469        start: Position,
1470    ) -> Option<LexedToken<'a>> {
1471        let segment = match self.read_unquoted_segment(start) {
1472            Ok(segment) => segment,
1473            Err(kind) => return Some(LexedToken::error(kind)),
1474        };
1475        if segment.as_str().is_empty() {
1476            return None;
1477        }
1478        let mut lexed_word = LexedWord::from_segment(segment);
1479        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1480            return Some(LexedToken::error(kind));
1481        }
1482        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1483    }
1484
1485    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1486        let start = self.current_position();
1487
1488        if self.reinject_buf.is_empty() {
1489            let ascii_len = self.source_ascii_plain_word_len();
1490            let chunk = if ascii_len > 0
1491                && self
1492                    .cursor
1493                    .rest()
1494                    .as_bytes()
1495                    .get(ascii_len)
1496                    .is_none_or(|byte| byte.is_ascii())
1497            {
1498                self.consume_source_bytes(ascii_len);
1499                &self.input[start.offset..self.offset]
1500            } else {
1501                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1502                self.advance_scanned_source_bytes(chunk.len());
1503                chunk
1504            };
1505            if !chunk.is_empty() {
1506                let continues = matches!(
1507                    self.peek_char(),
1508                    Some(next)
1509                        if Self::is_word_char(next)
1510                            || next == '$'
1511                            || matches!(next, '\'' | '"')
1512                            || next == '{'
1513                            || (next == '('
1514                                && (chunk.ends_with('=')
1515                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1516                );
1517
1518                if !continues {
1519                    let end = self.current_position();
1520                    return Some(LexedToken::borrowed_word(
1521                        TokenKind::Word,
1522                        &self.input[start.offset..self.offset],
1523                        Some(Span::from_positions(start, end)),
1524                    ));
1525                }
1526
1527                if self.peek_char() == Some('(')
1528                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1529                {
1530                    return self.read_complex_word(start);
1531                }
1532
1533                let end = self.current_position();
1534                return self.finish_segmented_word(LexedWord::borrowed(
1535                    LexedWordSegmentKind::Plain,
1536                    &self.input[start.offset..self.offset],
1537                    Some(Span::from_positions(start, end)),
1538                ));
1539            }
1540        }
1541
1542        self.read_complex_word(start)
1543    }
1544
1545    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1546        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1547            return Some(LexedToken::error(kind));
1548        }
1549
1550        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1551    }
1552
1553    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1554        if self.peek_char() == Some('$') {
1555            match self.second_char() {
1556                Some('\'') => return self.read_dollar_single_quoted_string(),
1557                Some('"') => return self.read_dollar_double_quoted_string(),
1558                _ => {}
1559            }
1560        }
1561
1562        let segment = match self.read_unquoted_segment(start) {
1563            Ok(segment) => segment,
1564            Err(kind) => return Some(LexedToken::error(kind)),
1565        };
1566
1567        if segment.as_str().is_empty() {
1568            return None;
1569        }
1570
1571        self.finish_segmented_word(LexedWord::from_segment(segment))
1572    }
1573
1574    fn read_unquoted_segment(
1575        &mut self,
1576        start: Position,
1577    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1578        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1579        while let Some(ch) = self.peek_char() {
1580            if ch == '"' || ch == '\'' {
1581                break;
1582            } else if ch == '$' {
1583                if matches!(self.second_char(), Some('\'') | Some('"'))
1584                    && (self.current_position().offset > start.offset
1585                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1586                {
1587                    break;
1588                }
1589
1590                // Handle variable references and command substitution
1591                self.advance();
1592
1593                Self::push_capture_char(&mut word, ch); // push the '$'
1594
1595                // Check for $[ / $( / ${ forms before falling back to variable text.
1596                if self.peek_char() == Some('[') {
1597                    Self::push_capture_char(&mut word, '[');
1598                    self.advance();
1599                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1600                        return Err(LexerErrorKind::CommandSubstitution);
1601                    }
1602                } else if self.peek_char() == Some('(') {
1603                    if self.second_char() == Some('(') {
1604                        if !self.read_arithmetic_expansion_into(&mut word) {
1605                            return Err(LexerErrorKind::CommandSubstitution);
1606                        }
1607                    } else {
1608                        Self::push_capture_char(&mut word, '(');
1609                        self.advance();
1610                        if !self.read_command_subst_into(&mut word) {
1611                            return Err(LexerErrorKind::CommandSubstitution);
1612                        }
1613                    }
1614                } else if self.peek_char() == Some('{') {
1615                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1616                    // doesn't stop at the inner }.
1617                    Self::push_capture_char(&mut word, '{');
1618                    self.advance();
1619                    let _ = self.read_param_expansion_into(&mut word, start);
1620                } else {
1621                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1622                    if let Some(c) = self.peek_char() {
1623                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1624                            || c.is_ascii_digit()
1625                        {
1626                            Self::push_capture_char(&mut word, c);
1627                            self.advance();
1628                        } else {
1629                            // Read variable name (alphanumeric + _)
1630                            while let Some(c) = self.peek_char() {
1631                                if c.is_ascii_alphanumeric() || c == '_' {
1632                                    Self::push_capture_char(&mut word, c);
1633                                    self.advance();
1634                                } else {
1635                                    break;
1636                                }
1637                            }
1638                        }
1639                    }
1640                }
1641            } else if ch == '{' {
1642                if self.looks_like_mid_word_brace_segment() {
1643                    // Keep balanced {...} forms attached to the current word so
1644                    // plain literals like foo{bar} and brace expansions stay intact.
1645                    Self::push_capture_char(&mut word, ch);
1646                    self.advance();
1647                    self.consume_mid_word_brace_segment(&mut word);
1648                } else {
1649                    // Unmatched literal braces in regexes like ^{ should not swallow
1650                    // trailing delimiters such as ]] or then.
1651                    Self::push_capture_char(&mut word, ch);
1652                    self.advance();
1653                }
1654            } else if ch == '`' {
1655                // Preserve legacy backticks verbatim so the parser can keep the
1656                // original syntax form.
1657                let capture_end = self.current_position();
1658                self.ensure_capture_from_source(&mut word, start, capture_end);
1659                Self::push_capture_char(&mut word, ch);
1660                self.advance(); // consume opening `
1661                let mut closed = false;
1662                while let Some(c) = self.peek_char() {
1663                    Self::push_capture_char(&mut word, c);
1664                    self.advance();
1665                    if c == '`' {
1666                        closed = true;
1667                        break;
1668                    }
1669                    if c == '\\'
1670                        && let Some(next) = self.peek_char()
1671                    {
1672                        Self::push_capture_char(&mut word, next);
1673                        self.advance();
1674                    }
1675                }
1676                if !closed {
1677                    return Err(LexerErrorKind::BacktickSubstitution);
1678                }
1679            } else if ch == '\\' {
1680                let capture_end = self.current_position();
1681                self.ensure_capture_from_source(&mut word, start, capture_end);
1682                self.advance();
1683                if let Some(next) = self.peek_char() {
1684                    if next == '\n' {
1685                        // Line continuation: skip backslash + newline
1686                        self.advance();
1687                    } else {
1688                        // Escaped character: backslash quotes the next char
1689                        // (quote removal — only the literal char survives).
1690                        // Preserve source/decoded alignment with a sentinel so
1691                        // downstream word decoding keeps later spans anchored.
1692                        Self::push_capture_char(&mut word, '\x00');
1693                        Self::push_capture_char(&mut word, next);
1694                        self.advance();
1695                        if next == '{'
1696                            && self.current_word_surface_is_single_char(start, &word, '{')
1697                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1698                        {
1699                            let mut depth = 1;
1700                            while let Some(c) = self.peek_char() {
1701                                Self::push_capture_char(&mut word, c);
1702                                self.advance();
1703                                match c {
1704                                    '{' => depth += 1,
1705                                    '}' => {
1706                                        depth -= 1;
1707                                        if depth == 0 {
1708                                            break;
1709                                        }
1710                                    }
1711                                    _ => {}
1712                                }
1713                            }
1714                        }
1715                    }
1716                } else {
1717                    Self::push_capture_char(&mut word, '\\');
1718                }
1719            } else if ch == '('
1720                && self.current_word_surface_ends_with_char(start, &word, '=')
1721                && self.looks_like_assoc_assign()
1722            {
1723                // Associative compound assignment: var=([k]="v" ...) — keep entire
1724                // (...) as part of word so declare -A m=([k]="v") stays one token.
1725                Self::push_capture_char(&mut word, ch);
1726                self.advance();
1727                let mut depth = 1;
1728                while let Some(c) = self.peek_char() {
1729                    Self::push_capture_char(&mut word, c);
1730                    self.advance();
1731                    match c {
1732                        '(' => depth += 1,
1733                        ')' => {
1734                            depth -= 1;
1735                            if depth == 0 {
1736                                break;
1737                            }
1738                        }
1739                        '"' => {
1740                            while let Some(qc) = self.peek_char() {
1741                                Self::push_capture_char(&mut word, qc);
1742                                self.advance();
1743                                if qc == '"' {
1744                                    break;
1745                                }
1746                                if qc == '\\'
1747                                    && let Some(esc) = self.peek_char()
1748                                {
1749                                    Self::push_capture_char(&mut word, esc);
1750                                    self.advance();
1751                                }
1752                            }
1753                        }
1754                        '\'' => {
1755                            while let Some(qc) = self.peek_char() {
1756                                Self::push_capture_char(&mut word, qc);
1757                                self.advance();
1758                                if qc == '\'' {
1759                                    break;
1760                                }
1761                            }
1762                        }
1763                        '\\' => {
1764                            if let Some(esc) = self.peek_char() {
1765                                Self::push_capture_char(&mut word, esc);
1766                                self.advance();
1767                            }
1768                        }
1769                        _ => {}
1770                    }
1771                }
1772            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1773            {
1774                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1775                // Consume through matching ) including nested parens
1776                Self::push_capture_char(&mut word, ch);
1777                self.advance();
1778                let mut depth = 1;
1779                while let Some(c) = self.peek_char() {
1780                    Self::push_capture_char(&mut word, c);
1781                    self.advance();
1782                    match c {
1783                        '(' => depth += 1,
1784                        ')' => {
1785                            depth -= 1;
1786                            if depth == 0 {
1787                                break;
1788                            }
1789                        }
1790                        '\\' => {
1791                            if let Some(esc) = self.peek_char() {
1792                                Self::push_capture_char(&mut word, esc);
1793                                self.advance();
1794                            }
1795                        }
1796                        _ => {}
1797                    }
1798                }
1799            } else if Self::is_plain_word_char(ch) {
1800                if self.reinject_buf.is_empty() {
1801                    let ascii_len = self.source_ascii_plain_word_len();
1802                    let chunk = if ascii_len > 0
1803                        && self
1804                            .cursor
1805                            .rest()
1806                            .as_bytes()
1807                            .get(ascii_len)
1808                            .is_none_or(|byte| byte.is_ascii())
1809                    {
1810                        self.consume_source_bytes(ascii_len);
1811                        &self.input[self.offset - ascii_len..self.offset]
1812                    } else {
1813                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1814                        self.advance_scanned_source_bytes(chunk.len());
1815                        chunk
1816                    };
1817                    Self::push_capture_str(&mut word, chunk);
1818                } else {
1819                    Self::push_capture_char(&mut word, ch);
1820                    self.advance();
1821                }
1822            } else {
1823                break;
1824            }
1825        }
1826
1827        if let Some(word) = word {
1828            let span = Some(Span::from_positions(start, self.current_position()));
1829            Ok(LexedWordSegment::owned_with_spans(
1830                LexedWordSegmentKind::Plain,
1831                word,
1832                span,
1833                span,
1834            ))
1835        } else {
1836            let end = self.current_position();
1837            Ok(LexedWordSegment::borrowed(
1838                LexedWordSegmentKind::Plain,
1839                &self.input[start.offset..self.offset],
1840                Some(Span::from_positions(start, end)),
1841            ))
1842        }
1843    }
1844
1845    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1846        let segment = match self.read_single_quoted_segment() {
1847            Ok(segment) => segment,
1848            Err(kind) => return Some(LexedToken::error(kind)),
1849        };
1850        let mut word = LexedWord::from_segment(segment);
1851        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1852            return Some(LexedToken::error(kind));
1853        }
1854
1855        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1856    }
1857
1858    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1859        debug_assert_eq!(self.peek_char(), Some('\''));
1860
1861        let wrapper_start = self.current_position();
1862        self.consume_ascii_chars(1); // consume opening '
1863        let content_start = self.current_position();
1864        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1865        let mut content_end = content_start;
1866        let mut content = String::with_capacity(16);
1867        let mut closed = false;
1868
1869        if can_borrow {
1870            let rest = self.cursor.rest();
1871            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1872                self.consume_source_bytes(quote_index);
1873                content_end = self.current_position();
1874                self.consume_ascii_chars(1); // consume closing '
1875                closed = true;
1876            } else {
1877                self.consume_source_bytes(rest.len());
1878            }
1879        }
1880
1881        while let Some(ch) = self.peek_char() {
1882            if closed {
1883                break;
1884            }
1885            if ch == '\'' {
1886                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1887                    if !can_borrow {
1888                        content.push('\'');
1889                    }
1890                    self.advance();
1891                    self.advance();
1892                    continue;
1893                }
1894                content_end = self.current_position();
1895                self.consume_ascii_chars(1); // consume closing '
1896                closed = true;
1897                break;
1898            }
1899            if !can_borrow {
1900                content.push(ch);
1901            }
1902            self.advance();
1903        }
1904
1905        if !closed {
1906            return Err(LexerErrorKind::SingleQuote);
1907        }
1908
1909        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1910        let content_span = Some(Span::from_positions(content_start, content_end));
1911
1912        if can_borrow {
1913            Ok(LexedWordSegment::borrowed_with_spans(
1914                LexedWordSegmentKind::SingleQuoted,
1915                &self.input[content_start.offset..content_end.offset],
1916                content_span,
1917                wrapper_span,
1918            ))
1919        } else {
1920            Ok(LexedWordSegment::owned_with_spans(
1921                LexedWordSegmentKind::SingleQuoted,
1922                content,
1923                content_span,
1924                wrapper_span,
1925            ))
1926        }
1927    }
1928
1929    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1930        let segment = match self.read_dollar_single_quoted_segment() {
1931            Ok(segment) => segment,
1932            Err(kind) => return Some(LexedToken::error(kind)),
1933        };
1934        let mut word = LexedWord::from_segment(segment);
1935        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1936            return Some(LexedToken::error(kind));
1937        }
1938
1939        let kind = if word.single_segment().is_some() {
1940            TokenKind::LiteralWord
1941        } else {
1942            TokenKind::Word
1943        };
1944
1945        Some(LexedToken::with_word_payload(kind, word))
1946    }
1947
1948    fn read_dollar_single_quoted_segment(
1949        &mut self,
1950    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1951        debug_assert_eq!(self.peek_char(), Some('$'));
1952        debug_assert_eq!(self.second_char(), Some('\''));
1953
1954        let wrapper_start = self.current_position();
1955        self.consume_ascii_chars(2); // consume $'
1956        let content_start = self.current_position();
1957        let mut out = String::with_capacity(16);
1958
1959        while let Some(ch) = self.peek_char() {
1960            if ch == '\'' {
1961                let content_end = self.current_position();
1962                self.advance();
1963                let wrapper_span =
1964                    Some(Span::from_positions(wrapper_start, self.current_position()));
1965                let content_span = Some(Span::from_positions(content_start, content_end));
1966                return Ok(LexedWordSegment::owned_with_spans(
1967                    LexedWordSegmentKind::DollarSingleQuoted,
1968                    out,
1969                    content_span,
1970                    wrapper_span,
1971                ));
1972            }
1973
1974            if ch == '\\' {
1975                self.advance();
1976                if let Some(esc) = self.peek_char() {
1977                    self.advance();
1978                    match esc {
1979                        'n' => out.push('\n'),
1980                        't' => out.push('\t'),
1981                        'r' => out.push('\r'),
1982                        'a' => out.push('\x07'),
1983                        'b' => out.push('\x08'),
1984                        'f' => out.push('\x0C'),
1985                        'v' => out.push('\x0B'),
1986                        'e' | 'E' => out.push('\x1B'),
1987                        '\\' => out.push('\\'),
1988                        '\'' => out.push('\''),
1989                        '"' => out.push('"'),
1990                        '?' => out.push('?'),
1991                        'c' => {
1992                            if let Some(control) = self.peek_char() {
1993                                self.advance();
1994                                out.push(((control as u32 & 0x1F) as u8) as char);
1995                            } else {
1996                                out.push('\\');
1997                                out.push('c');
1998                            }
1999                        }
2000                        'x' => {
2001                            let mut hex = String::new();
2002                            for _ in 0..2 {
2003                                if let Some(h) = self.peek_char() {
2004                                    if h.is_ascii_hexdigit() {
2005                                        hex.push(h);
2006                                        self.advance();
2007                                    } else {
2008                                        break;
2009                                    }
2010                                }
2011                            }
2012                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2013                                out.push(val as char);
2014                            }
2015                        }
2016                        'u' => {
2017                            let mut hex = String::new();
2018                            for _ in 0..4 {
2019                                if let Some(h) = self.peek_char() {
2020                                    if h.is_ascii_hexdigit() {
2021                                        hex.push(h);
2022                                        self.advance();
2023                                    } else {
2024                                        break;
2025                                    }
2026                                }
2027                            }
2028                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2029                                && let Some(c) = char::from_u32(val)
2030                            {
2031                                out.push(c);
2032                            }
2033                        }
2034                        'U' => {
2035                            let mut hex = String::new();
2036                            for _ in 0..8 {
2037                                if let Some(h) = self.peek_char() {
2038                                    if h.is_ascii_hexdigit() {
2039                                        hex.push(h);
2040                                        self.advance();
2041                                    } else {
2042                                        break;
2043                                    }
2044                                }
2045                            }
2046                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2047                                && let Some(c) = char::from_u32(val)
2048                            {
2049                                out.push(c);
2050                            }
2051                        }
2052                        '0'..='7' => {
2053                            let mut oct = String::new();
2054                            oct.push(esc);
2055                            for _ in 0..2 {
2056                                if let Some(o) = self.peek_char() {
2057                                    if o.is_ascii_digit() && o < '8' {
2058                                        oct.push(o);
2059                                        self.advance();
2060                                    } else {
2061                                        break;
2062                                    }
2063                                }
2064                            }
2065                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2066                                out.push(val as char);
2067                            }
2068                        }
2069                        _ => {
2070                            out.push('\\');
2071                            out.push(esc);
2072                        }
2073                    }
2074                } else {
2075                    out.push('\\');
2076                }
2077                continue;
2078            }
2079
2080            out.push(ch);
2081            self.advance();
2082        }
2083
2084        Err(LexerErrorKind::SingleQuote)
2085    }
2086
2087    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2088        let start = self.current_position();
2089
2090        if self.reinject_buf.is_empty() {
2091            let ascii_len = self.source_ascii_plain_word_len();
2092            let chunk = if ascii_len > 0
2093                && self
2094                    .cursor
2095                    .rest()
2096                    .as_bytes()
2097                    .get(ascii_len)
2098                    .is_none_or(|byte| byte.is_ascii())
2099            {
2100                self.consume_source_bytes(ascii_len);
2101                &self.input[start.offset..self.offset]
2102            } else {
2103                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2104                self.advance_scanned_source_bytes(chunk.len());
2105                chunk
2106            };
2107            if chunk.is_empty() {
2108                return None;
2109            }
2110
2111            let end = self.current_position();
2112            return Some(LexedWordSegment::borrowed(
2113                LexedWordSegmentKind::Plain,
2114                &self.input[start.offset..self.offset],
2115                Some(Span::from_positions(start, end)),
2116            ));
2117        }
2118
2119        let ch = self.peek_char()?;
2120        if !Self::is_plain_word_char(ch) {
2121            return None;
2122        }
2123
2124        let mut text = String::with_capacity(16);
2125        while let Some(ch) = self.peek_char() {
2126            if !Self::is_plain_word_char(ch) {
2127                break;
2128            }
2129            text.push(ch);
2130            self.advance();
2131        }
2132
2133        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2134    }
2135
2136    /// After a closing quote, read any adjacent quoted or unquoted word chars
2137    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2138    fn append_segmented_continuation(
2139        &mut self,
2140        word: &mut LexedWord<'a>,
2141    ) -> Result<(), LexerErrorKind> {
2142        loop {
2143            match self.peek_char() {
2144                Some('\'') => {
2145                    word.push_segment(self.read_single_quoted_segment()?);
2146                }
2147                Some('"') => {
2148                    word.push_segment(self.read_double_quoted_segment()?);
2149                }
2150                Some('$') if self.second_char() == Some('\'') => {
2151                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2152                }
2153                Some('$') if self.second_char() == Some('"') => {
2154                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2155                }
2156                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2157                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2158                        unreachable!("peeked '(' should produce a suffix segment");
2159                    };
2160                    word.push_segment(segment);
2161                }
2162                _ => {
2163                    if let Some(segment) = self.read_plain_continuation_segment() {
2164                        word.push_segment(segment);
2165                        continue;
2166                    }
2167
2168                    let start = self.current_position();
2169                    let plain = self.read_unquoted_segment(start)?;
2170                    if plain.as_str().is_empty() {
2171                        break;
2172                    }
2173                    word.push_segment(plain);
2174                }
2175            }
2176        }
2177
2178        Ok(())
2179    }
2180
2181    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2182        debug_assert_eq!(self.peek_char(), Some('('));
2183
2184        let start = self.current_position();
2185        let mut depth = 0usize;
2186        let mut escaped = false;
2187        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2188
2189        while let Some(ch) = self.peek_char() {
2190            if let Some(text) = text.as_mut() {
2191                text.push(ch);
2192            }
2193            self.advance();
2194
2195            if escaped {
2196                escaped = false;
2197                continue;
2198            }
2199
2200            match ch {
2201                '\\' => escaped = true,
2202                '(' => depth += 1,
2203                ')' => {
2204                    depth = depth.saturating_sub(1);
2205                    if depth == 0 {
2206                        break;
2207                    }
2208                }
2209                _ => {}
2210            }
2211        }
2212
2213        let end = self.current_position();
2214        let span = Some(Span::from_positions(start, end));
2215        if let Some(text) = text {
2216            Some(LexedWordSegment::owned_with_spans(
2217                LexedWordSegmentKind::Plain,
2218                text,
2219                span,
2220                span,
2221            ))
2222        } else {
2223            Some(LexedWordSegment::borrowed_with_spans(
2224                LexedWordSegmentKind::Plain,
2225                &self.input[start.offset..end.offset],
2226                span,
2227                span,
2228            ))
2229        }
2230    }
2231
2232    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2233        self.read_double_quoted_word(false)
2234    }
2235
2236    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2237        self.read_double_quoted_word(true)
2238    }
2239
2240    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2241        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2242            Ok(segment) => segment,
2243            Err(kind) => return Some(LexedToken::error(kind)),
2244        };
2245        let mut word = LexedWord::from_segment(segment);
2246        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2247            return Some(LexedToken::error(kind));
2248        }
2249
2250        let kind = if word.single_segment().is_some() {
2251            TokenKind::QuotedWord
2252        } else {
2253            TokenKind::Word
2254        };
2255
2256        Some(LexedToken::with_word_payload(kind, word))
2257    }
2258
2259    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2260        self.read_double_quoted_segment_with_dollar(false)
2261    }
2262
2263    fn read_dollar_double_quoted_segment(
2264        &mut self,
2265    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2266        self.read_double_quoted_segment_with_dollar(true)
2267    }
2268
2269    fn read_double_quoted_segment_with_dollar(
2270        &mut self,
2271        dollar: bool,
2272    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2273        if dollar {
2274            debug_assert_eq!(self.peek_char(), Some('$'));
2275            debug_assert_eq!(self.second_char(), Some('"'));
2276        } else {
2277            debug_assert_eq!(self.peek_char(), Some('"'));
2278        }
2279
2280        let wrapper_start = self.current_position();
2281        if dollar {
2282            self.consume_ascii_chars(2); // consume $"
2283        } else {
2284            self.consume_ascii_chars(1); // consume opening "
2285        }
2286        let content_start = self.current_position();
2287        let mut content_end = content_start;
2288        let mut simple = self.reinject_buf.is_empty();
2289        let mut borrowable = self.reinject_buf.is_empty();
2290        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2291        let mut closed = false;
2292
2293        while let Some(ch) = self.peek_char() {
2294            if simple {
2295                if self.reinject_buf.is_empty() {
2296                    let rest = self.cursor.rest();
2297                    match Self::find_double_quote_special(rest) {
2298                        Some(index) if index > 0 => {
2299                            self.consume_source_bytes(index);
2300                            continue;
2301                        }
2302                        None => {
2303                            self.consume_source_bytes(rest.len());
2304                            return Err(LexerErrorKind::DoubleQuote);
2305                        }
2306                        _ => {}
2307                    }
2308                }
2309
2310                match ch {
2311                    '"' => {
2312                        content_end = self.current_position();
2313                        self.consume_ascii_chars(1); // consume closing "
2314                        closed = true;
2315                        break;
2316                    }
2317                    '\\' | '$' | '`' => {
2318                        simple = false;
2319                        if ch == '`' {
2320                            borrowable = false;
2321                            let capture_end = self.current_position();
2322                            self.ensure_capture_from_source(
2323                                &mut content,
2324                                content_start,
2325                                capture_end,
2326                            );
2327                        }
2328                    }
2329                    _ => {
2330                        self.advance();
2331                    }
2332                }
2333                if simple {
2334                    continue;
2335                }
2336            }
2337
2338            match ch {
2339                '"' => {
2340                    if borrowable {
2341                        content_end = self.current_position();
2342                    }
2343                    self.consume_ascii_chars(1); // consume closing "
2344                    closed = true;
2345                    break;
2346                }
2347                '\\' => {
2348                    let escape_start = self.current_position();
2349                    self.advance();
2350                    if let Some(next) = self.peek_char() {
2351                        match next {
2352                            '\n' => {
2353                                borrowable = false;
2354                                self.ensure_capture_from_source(
2355                                    &mut content,
2356                                    content_start,
2357                                    escape_start,
2358                                );
2359                                self.advance();
2360                            }
2361                            '$' => {
2362                                borrowable = false;
2363                                self.ensure_capture_from_source(
2364                                    &mut content,
2365                                    content_start,
2366                                    escape_start,
2367                                );
2368                                Self::push_capture_char(&mut content, '\x00');
2369                                Self::push_capture_char(&mut content, '$');
2370                                self.advance();
2371                            }
2372                            '"' | '\\' | '`' => {
2373                                borrowable = false;
2374                                self.ensure_capture_from_source(
2375                                    &mut content,
2376                                    content_start,
2377                                    escape_start,
2378                                );
2379                                if next == '\\' {
2380                                    Self::push_capture_char(&mut content, '\x00');
2381                                }
2382                                if next == '`' {
2383                                    Self::push_capture_char(&mut content, '\x00');
2384                                }
2385                                Self::push_capture_char(&mut content, next);
2386                                self.advance();
2387                                content_end = self.current_position();
2388                            }
2389                            _ => {
2390                                Self::push_capture_char(&mut content, '\\');
2391                                Self::push_capture_char(&mut content, next);
2392                                self.advance();
2393                                content_end = self.current_position();
2394                            }
2395                        }
2396                    }
2397                }
2398                '$' => {
2399                    Self::push_capture_char(&mut content, '$');
2400                    self.advance();
2401                    if self.peek_char() == Some('(') {
2402                        if self.second_char() == Some('(') {
2403                            self.read_arithmetic_expansion_into(&mut content);
2404                        } else {
2405                            Self::push_capture_char(&mut content, '(');
2406                            self.advance();
2407                            self.read_command_subst_into(&mut content);
2408                        }
2409                    } else if self.peek_char() == Some('{') {
2410                        Self::push_capture_char(&mut content, '{');
2411                        self.advance();
2412                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2413                    }
2414                    content_end = self.current_position();
2415                }
2416                '`' => {
2417                    borrowable = false;
2418                    let capture_end = self.current_position();
2419                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2420                    Self::push_capture_char(&mut content, '`');
2421                    self.advance(); // consume opening `
2422                    while let Some(c) = self.peek_char() {
2423                        Self::push_capture_char(&mut content, c);
2424                        self.advance();
2425                        if c == '`' {
2426                            break;
2427                        }
2428                        if c == '\\'
2429                            && let Some(next) = self.peek_char()
2430                        {
2431                            Self::push_capture_char(&mut content, next);
2432                            self.advance();
2433                        }
2434                    }
2435                    content_end = self.current_position();
2436                }
2437                _ => {
2438                    Self::push_capture_char(&mut content, ch);
2439                    self.advance();
2440                    content_end = self.current_position();
2441                }
2442            }
2443        }
2444
2445        if !closed {
2446            return Err(LexerErrorKind::DoubleQuote);
2447        }
2448
2449        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2450        let content_span = Some(Span::from_positions(content_start, content_end));
2451
2452        if borrowable {
2453            Ok(LexedWordSegment::borrowed_with_spans(
2454                if dollar {
2455                    LexedWordSegmentKind::DollarDoubleQuoted
2456                } else {
2457                    LexedWordSegmentKind::DoubleQuoted
2458                },
2459                &self.input[content_start.offset..content_end.offset],
2460                content_span,
2461                wrapper_span,
2462            ))
2463        } else {
2464            Ok(LexedWordSegment::owned_with_spans(
2465                if dollar {
2466                    LexedWordSegmentKind::DollarDoubleQuoted
2467                } else {
2468                    LexedWordSegmentKind::DoubleQuoted
2469                },
2470                content.unwrap_or_default(),
2471                content_span,
2472                wrapper_span,
2473            ))
2474        }
2475    }
2476
2477    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2478        debug_assert_eq!(self.peek_char(), Some('('));
2479        debug_assert_eq!(self.second_char(), Some('('));
2480
2481        Self::push_capture_char(content, '(');
2482        self.advance();
2483        Self::push_capture_char(content, '(');
2484        self.advance();
2485
2486        let mut depth = 2;
2487        while let Some(c) = self.peek_char() {
2488            match c {
2489                '\\' => {
2490                    Self::push_capture_char(content, c);
2491                    self.advance();
2492                    if let Some(next) = self.peek_char() {
2493                        Self::push_capture_char(content, next);
2494                        self.advance();
2495                    }
2496                }
2497                '\'' => {
2498                    Self::push_capture_char(content, c);
2499                    self.advance();
2500                    while let Some(quoted) = self.peek_char() {
2501                        Self::push_capture_char(content, quoted);
2502                        self.advance();
2503                        if quoted == '\'' {
2504                            break;
2505                        }
2506                    }
2507                }
2508                '"' => {
2509                    let mut escaped = false;
2510                    Self::push_capture_char(content, c);
2511                    self.advance();
2512                    while let Some(quoted) = self.peek_char() {
2513                        Self::push_capture_char(content, quoted);
2514                        self.advance();
2515                        if escaped {
2516                            escaped = false;
2517                            continue;
2518                        }
2519                        match quoted {
2520                            '\\' => escaped = true,
2521                            '"' => break,
2522                            _ => {}
2523                        }
2524                    }
2525                }
2526                '`' => {
2527                    let mut escaped = false;
2528                    Self::push_capture_char(content, c);
2529                    self.advance();
2530                    while let Some(quoted) = self.peek_char() {
2531                        Self::push_capture_char(content, quoted);
2532                        self.advance();
2533                        if escaped {
2534                            escaped = false;
2535                            continue;
2536                        }
2537                        match quoted {
2538                            '\\' => escaped = true,
2539                            '`' => break,
2540                            _ => {}
2541                        }
2542                    }
2543                }
2544                '(' => {
2545                    Self::push_capture_char(content, c);
2546                    self.advance();
2547                    depth += 1;
2548                }
2549                ')' => {
2550                    Self::push_capture_char(content, c);
2551                    self.advance();
2552                    depth -= 1;
2553                    if depth == 0 {
2554                        return true;
2555                    }
2556                }
2557                _ => {
2558                    Self::push_capture_char(content, c);
2559                    self.advance();
2560                }
2561            }
2562        }
2563
2564        false
2565    }
2566
2567    fn read_legacy_arithmetic_into(
2568        &mut self,
2569        content: &mut Option<String>,
2570        segment_start: Position,
2571    ) -> bool {
2572        let mut bracket_depth = 1;
2573
2574        while let Some(c) = self.peek_char() {
2575            match c {
2576                '\\' => {
2577                    Self::push_capture_char(content, c);
2578                    self.advance();
2579                    if let Some(next) = self.peek_char() {
2580                        Self::push_capture_char(content, next);
2581                        self.advance();
2582                    }
2583                }
2584                '\'' => {
2585                    Self::push_capture_char(content, c);
2586                    self.advance();
2587                    while let Some(quoted) = self.peek_char() {
2588                        Self::push_capture_char(content, quoted);
2589                        self.advance();
2590                        if quoted == '\'' {
2591                            break;
2592                        }
2593                    }
2594                }
2595                '"' => {
2596                    let mut escaped = false;
2597                    Self::push_capture_char(content, c);
2598                    self.advance();
2599                    while let Some(quoted) = self.peek_char() {
2600                        Self::push_capture_char(content, quoted);
2601                        self.advance();
2602                        if escaped {
2603                            escaped = false;
2604                            continue;
2605                        }
2606                        match quoted {
2607                            '\\' => escaped = true,
2608                            '"' => break,
2609                            _ => {}
2610                        }
2611                    }
2612                }
2613                '`' => {
2614                    let mut escaped = false;
2615                    Self::push_capture_char(content, c);
2616                    self.advance();
2617                    while let Some(quoted) = self.peek_char() {
2618                        Self::push_capture_char(content, quoted);
2619                        self.advance();
2620                        if escaped {
2621                            escaped = false;
2622                            continue;
2623                        }
2624                        match quoted {
2625                            '\\' => escaped = true,
2626                            '`' => break,
2627                            _ => {}
2628                        }
2629                    }
2630                }
2631                '[' => {
2632                    Self::push_capture_char(content, c);
2633                    self.advance();
2634                    bracket_depth += 1;
2635                }
2636                ']' => {
2637                    Self::push_capture_char(content, c);
2638                    self.advance();
2639                    bracket_depth -= 1;
2640                    if bracket_depth == 0 {
2641                        return true;
2642                    }
2643                }
2644                '$' => {
2645                    Self::push_capture_char(content, c);
2646                    self.advance();
2647                    if self.peek_char() == Some('(') {
2648                        if self.second_char() == Some('(') {
2649                            if !self.read_arithmetic_expansion_into(content) {
2650                                return false;
2651                            }
2652                        } else {
2653                            Self::push_capture_char(content, '(');
2654                            self.advance();
2655                            if !self.read_command_subst_into(content) {
2656                                return false;
2657                            }
2658                        }
2659                    } else if self.peek_char() == Some('{') {
2660                        Self::push_capture_char(content, '{');
2661                        self.advance();
2662                        if !self.read_param_expansion_into(content, segment_start) {
2663                            return false;
2664                        }
2665                    } else if self.peek_char() == Some('[') {
2666                        Self::push_capture_char(content, '[');
2667                        self.advance();
2668                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2669                            return false;
2670                        }
2671                    }
2672                }
2673                _ => {
2674                    Self::push_capture_char(content, c);
2675                    self.advance();
2676                }
2677            }
2678        }
2679
2680        false
2681    }
2682
2683    /// Read command substitution content after `$(`, handling nested parens and quotes.
2684    /// Appends chars to `content` and adds the closing `)`.
2685    /// `subst_depth` tracks nesting to prevent stack overflow.
2686    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2687        self.read_command_subst_into_depth(content, 0)
2688    }
2689
2690    fn flush_command_subst_keyword(
2691        current_word: &mut String,
2692        pending_case_headers: &mut usize,
2693        case_clause_depths: &mut SmallVec<[usize; 4]>,
2694        depth: usize,
2695        word_started_at_command_start: &mut bool,
2696    ) {
2697        if current_word.is_empty() {
2698            *word_started_at_command_start = false;
2699            return;
2700        }
2701
2702        match current_word.as_str() {
2703            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2704            "in" if *pending_case_headers > 0 => {
2705                *pending_case_headers -= 1;
2706                case_clause_depths.push(depth);
2707            }
2708            "esac" if *word_started_at_command_start => {
2709                case_clause_depths.pop();
2710            }
2711            _ => {}
2712        }
2713
2714        current_word.clear();
2715        *word_started_at_command_start = false;
2716    }
2717
2718    fn read_command_subst_heredoc_delimiter_into(
2719        &mut self,
2720        content: &mut Option<String>,
2721    ) -> Option<String> {
2722        while let Some(ch) = self.peek_char() {
2723            if !matches!(ch, ' ' | '\t') {
2724                break;
2725            }
2726            Self::push_capture_char(content, ch);
2727            self.advance();
2728        }
2729
2730        let mut cooked = String::new();
2731        let mut in_single = false;
2732        let mut in_double = false;
2733        let mut escaped = false;
2734        let mut saw_any = false;
2735
2736        while let Some(ch) = self.peek_char() {
2737            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2738                break;
2739            }
2740
2741            saw_any = true;
2742            Self::push_capture_char(content, ch);
2743            self.advance();
2744
2745            if escaped {
2746                cooked.push(ch);
2747                escaped = false;
2748                continue;
2749            }
2750
2751            match ch {
2752                '\\' if !in_single => escaped = true,
2753                '\'' if !in_double => in_single = !in_single,
2754                '"' if !in_single => in_double = !in_double,
2755                _ => cooked.push(ch),
2756            }
2757        }
2758
2759        saw_any.then_some(cooked)
2760    }
2761
2762    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2763        Self::push_capture_char(content, '`');
2764        self.advance();
2765        while let Some(ch) = self.peek_char() {
2766            Self::push_capture_char(content, ch);
2767            self.advance();
2768            if ch == '\\' {
2769                if let Some(esc) = self.peek_char() {
2770                    Self::push_capture_char(content, esc);
2771                    self.advance();
2772                }
2773                continue;
2774            }
2775            if ch == '`' {
2776                break;
2777            }
2778        }
2779    }
2780
2781    fn read_command_subst_pending_heredoc_into(
2782        &mut self,
2783        content: &mut Option<String>,
2784        delimiter: &str,
2785        strip_tabs: bool,
2786    ) -> bool {
2787        loop {
2788            let mut line = String::new();
2789            let mut saw_newline = false;
2790
2791            while let Some(ch) = self.peek_char() {
2792                self.advance();
2793                if ch == '\n' {
2794                    saw_newline = true;
2795                    break;
2796                }
2797                line.push(ch);
2798            }
2799
2800            Self::push_capture_str(content, &line);
2801            if saw_newline {
2802                Self::push_capture_char(content, '\n');
2803            }
2804
2805            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2806                return true;
2807            }
2808        }
2809    }
2810
2811    fn read_command_subst_into_depth(
2812        &mut self,
2813        content: &mut Option<String>,
2814        subst_depth: usize,
2815    ) -> bool {
2816        if subst_depth >= self.max_subst_depth {
2817            // Depth limit exceeded — consume until matching ')' and emit error token
2818            let mut depth = 1;
2819            while let Some(c) = self.peek_char() {
2820                self.advance();
2821                match c {
2822                    '(' => depth += 1,
2823                    ')' => {
2824                        depth -= 1;
2825                        if depth == 0 {
2826                            Self::push_capture_char(content, ')');
2827                            return true;
2828                        }
2829                    }
2830                    _ => {}
2831                }
2832            }
2833            return false;
2834        }
2835
2836        let mut depth = 1;
2837        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2838        let mut pending_case_headers = 0usize;
2839        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2840        let mut current_word = String::with_capacity(16);
2841        let mut at_command_start = true;
2842        let mut expecting_redirection_target = false;
2843        let mut current_word_started_at_command_start = false;
2844        while let Some(c) = self.peek_char() {
2845            match c {
2846                '#' if !self.should_treat_hash_as_word_char() => {
2847                    let had_word = !current_word.is_empty();
2848                    Self::flush_command_subst_keyword(
2849                        &mut current_word,
2850                        &mut pending_case_headers,
2851                        &mut case_clause_depths,
2852                        depth,
2853                        &mut current_word_started_at_command_start,
2854                    );
2855                    if had_word && expecting_redirection_target {
2856                        expecting_redirection_target = false;
2857                    }
2858                    Self::push_capture_char(content, '#');
2859                    self.advance();
2860                    while let Some(comment_ch) = self.peek_char() {
2861                        Self::push_capture_char(content, comment_ch);
2862                        self.advance();
2863                        if comment_ch == '\n' {
2864                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2865                                if !self.read_command_subst_pending_heredoc_into(
2866                                    content, &delimiter, strip_tabs,
2867                                ) {
2868                                    return false;
2869                                }
2870                            }
2871                            at_command_start = true;
2872                            expecting_redirection_target = false;
2873                            break;
2874                        }
2875                    }
2876                }
2877                '(' => {
2878                    Self::flush_command_subst_keyword(
2879                        &mut current_word,
2880                        &mut pending_case_headers,
2881                        &mut case_clause_depths,
2882                        depth,
2883                        &mut current_word_started_at_command_start,
2884                    );
2885                    depth += 1;
2886                    Self::push_capture_char(content, c);
2887                    self.advance();
2888                    at_command_start = true;
2889                    expecting_redirection_target = false;
2890                }
2891                ')' => {
2892                    Self::flush_command_subst_keyword(
2893                        &mut current_word,
2894                        &mut pending_case_headers,
2895                        &mut case_clause_depths,
2896                        depth,
2897                        &mut current_word_started_at_command_start,
2898                    );
2899                    if case_clause_depths
2900                        .last()
2901                        .is_some_and(|case_depth| *case_depth == depth)
2902                    {
2903                        Self::push_capture_char(content, ')');
2904                        self.advance();
2905                        at_command_start = true;
2906                        expecting_redirection_target = false;
2907                        continue;
2908                    }
2909                    depth -= 1;
2910                    self.advance();
2911                    if depth == 0 {
2912                        Self::push_capture_char(content, ')');
2913                        return true;
2914                    }
2915                    Self::push_capture_char(content, c);
2916                    at_command_start = false;
2917                    expecting_redirection_target = false;
2918                }
2919                '"' => {
2920                    let had_word = !current_word.is_empty();
2921                    Self::flush_command_subst_keyword(
2922                        &mut current_word,
2923                        &mut pending_case_headers,
2924                        &mut case_clause_depths,
2925                        depth,
2926                        &mut current_word_started_at_command_start,
2927                    );
2928                    if had_word && expecting_redirection_target {
2929                        expecting_redirection_target = false;
2930                    }
2931                    // Nested double-quoted string inside $()
2932                    Self::push_capture_char(content, '"');
2933                    self.advance();
2934                    while let Some(qc) = self.peek_char() {
2935                        match qc {
2936                            '"' => {
2937                                Self::push_capture_char(content, '"');
2938                                self.advance();
2939                                break;
2940                            }
2941                            '\\' => {
2942                                Self::push_capture_char(content, '\\');
2943                                self.advance();
2944                                if let Some(esc) = self.peek_char() {
2945                                    Self::push_capture_char(content, esc);
2946                                    self.advance();
2947                                }
2948                            }
2949                            '$' => {
2950                                Self::push_capture_char(content, '$');
2951                                self.advance();
2952                                if self.peek_char() == Some('(') {
2953                                    if self.second_char() == Some('(') {
2954                                        if !self.read_arithmetic_expansion_into(content) {
2955                                            return false;
2956                                        }
2957                                    } else {
2958                                        Self::push_capture_char(content, '(');
2959                                        self.advance();
2960                                        if !self
2961                                            .read_command_subst_into_depth(content, subst_depth + 1)
2962                                        {
2963                                            return false;
2964                                        }
2965                                    }
2966                                }
2967                            }
2968                            _ => {
2969                                Self::push_capture_char(content, qc);
2970                                self.advance();
2971                            }
2972                        }
2973                    }
2974                    if expecting_redirection_target {
2975                        expecting_redirection_target = false;
2976                    } else {
2977                        at_command_start = false;
2978                    }
2979                }
2980                '\'' => {
2981                    let had_word = !current_word.is_empty();
2982                    Self::flush_command_subst_keyword(
2983                        &mut current_word,
2984                        &mut pending_case_headers,
2985                        &mut case_clause_depths,
2986                        depth,
2987                        &mut current_word_started_at_command_start,
2988                    );
2989                    if had_word && expecting_redirection_target {
2990                        expecting_redirection_target = false;
2991                    }
2992                    // Single-quoted string inside $()
2993                    Self::push_capture_char(content, '\'');
2994                    self.advance();
2995                    while let Some(qc) = self.peek_char() {
2996                        Self::push_capture_char(content, qc);
2997                        self.advance();
2998                        if qc == '\'' {
2999                            break;
3000                        }
3001                    }
3002                    if expecting_redirection_target {
3003                        expecting_redirection_target = false;
3004                    } else {
3005                        at_command_start = false;
3006                    }
3007                }
3008                '`' => {
3009                    let had_word = !current_word.is_empty();
3010                    Self::flush_command_subst_keyword(
3011                        &mut current_word,
3012                        &mut pending_case_headers,
3013                        &mut case_clause_depths,
3014                        depth,
3015                        &mut current_word_started_at_command_start,
3016                    );
3017                    if had_word && expecting_redirection_target {
3018                        expecting_redirection_target = false;
3019                    }
3020                    self.read_command_subst_backtick_segment_into(content);
3021                    if expecting_redirection_target {
3022                        expecting_redirection_target = false;
3023                    } else {
3024                        at_command_start = false;
3025                    }
3026                }
3027                '$' if self.second_char() == Some('\'') => {
3028                    let had_word = !current_word.is_empty();
3029                    Self::flush_command_subst_keyword(
3030                        &mut current_word,
3031                        &mut pending_case_headers,
3032                        &mut case_clause_depths,
3033                        depth,
3034                        &mut current_word_started_at_command_start,
3035                    );
3036                    if had_word && expecting_redirection_target {
3037                        expecting_redirection_target = false;
3038                    }
3039                    Self::push_capture_char(content, '$');
3040                    self.advance();
3041                    Self::push_capture_char(content, '\'');
3042                    self.advance();
3043                    while let Some(qc) = self.peek_char() {
3044                        Self::push_capture_char(content, qc);
3045                        self.advance();
3046                        if qc == '\\' {
3047                            if let Some(esc) = self.peek_char() {
3048                                Self::push_capture_char(content, esc);
3049                                self.advance();
3050                            }
3051                            continue;
3052                        }
3053                        if qc == '\'' {
3054                            break;
3055                        }
3056                    }
3057                    if expecting_redirection_target {
3058                        expecting_redirection_target = false;
3059                    } else {
3060                        at_command_start = false;
3061                    }
3062                }
3063                '\\' => {
3064                    let had_word = !current_word.is_empty();
3065                    Self::flush_command_subst_keyword(
3066                        &mut current_word,
3067                        &mut pending_case_headers,
3068                        &mut case_clause_depths,
3069                        depth,
3070                        &mut current_word_started_at_command_start,
3071                    );
3072                    if had_word && expecting_redirection_target {
3073                        expecting_redirection_target = false;
3074                    }
3075                    Self::push_capture_char(content, '\\');
3076                    self.advance();
3077                    if let Some(esc) = self.peek_char() {
3078                        Self::push_capture_char(content, esc);
3079                        self.advance();
3080                    }
3081                    if expecting_redirection_target {
3082                        expecting_redirection_target = false;
3083                    } else {
3084                        at_command_start = false;
3085                    }
3086                }
3087                '<' if self.second_char() == Some('<') => {
3088                    let word_was_redirection_fd = current_word_started_at_command_start
3089                        && !current_word.is_empty()
3090                        && current_word.chars().all(|current| current.is_ascii_digit());
3091                    Self::flush_command_subst_keyword(
3092                        &mut current_word,
3093                        &mut pending_case_headers,
3094                        &mut case_clause_depths,
3095                        depth,
3096                        &mut current_word_started_at_command_start,
3097                    );
3098                    if word_was_redirection_fd {
3099                        at_command_start = true;
3100                    }
3101
3102                    Self::push_capture_char(content, '<');
3103                    self.advance();
3104                    Self::push_capture_char(content, '<');
3105                    self.advance();
3106
3107                    if self.peek_char() == Some('<') {
3108                        Self::push_capture_char(content, '<');
3109                        self.advance();
3110                        expecting_redirection_target = true;
3111                        continue;
3112                    }
3113
3114                    let strip_tabs = if self.peek_char() == Some('-') {
3115                        Self::push_capture_char(content, '-');
3116                        self.advance();
3117                        true
3118                    } else {
3119                        false
3120                    };
3121
3122                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3123                    {
3124                        pending_heredocs.push((delimiter, strip_tabs));
3125                        expecting_redirection_target = false;
3126                    } else {
3127                        expecting_redirection_target = true;
3128                    }
3129                }
3130                '>' | '<' => {
3131                    let word_was_redirection_fd = current_word_started_at_command_start
3132                        && !current_word.is_empty()
3133                        && current_word.chars().all(|current| current.is_ascii_digit());
3134                    Self::flush_command_subst_keyword(
3135                        &mut current_word,
3136                        &mut pending_case_headers,
3137                        &mut case_clause_depths,
3138                        depth,
3139                        &mut current_word_started_at_command_start,
3140                    );
3141                    if word_was_redirection_fd {
3142                        at_command_start = true;
3143                    }
3144                    Self::push_capture_char(content, c);
3145                    self.advance();
3146                    expecting_redirection_target = true;
3147                }
3148                '\n' => {
3149                    Self::flush_command_subst_keyword(
3150                        &mut current_word,
3151                        &mut pending_case_headers,
3152                        &mut case_clause_depths,
3153                        depth,
3154                        &mut current_word_started_at_command_start,
3155                    );
3156                    Self::push_capture_char(content, '\n');
3157                    self.advance();
3158                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3159                        if !self.read_command_subst_pending_heredoc_into(
3160                            content, &delimiter, strip_tabs,
3161                        ) {
3162                            return false;
3163                        }
3164                    }
3165                    at_command_start = true;
3166                    expecting_redirection_target = false;
3167                }
3168                _ => {
3169                    if c.is_ascii_alphanumeric() || c == '_' {
3170                        if current_word.is_empty()
3171                            && !expecting_redirection_target
3172                            && at_command_start
3173                        {
3174                            current_word_started_at_command_start = true;
3175                            at_command_start = false;
3176                        }
3177                        current_word.push(c);
3178                    } else {
3179                        let had_word = !current_word.is_empty();
3180                        Self::flush_command_subst_keyword(
3181                            &mut current_word,
3182                            &mut pending_case_headers,
3183                            &mut case_clause_depths,
3184                            depth,
3185                            &mut current_word_started_at_command_start,
3186                        );
3187                        if had_word && expecting_redirection_target {
3188                            expecting_redirection_target = false;
3189                        }
3190                        match c {
3191                            ' ' | '\t' => {}
3192                            ';' | '|' | '&' => {
3193                                at_command_start = true;
3194                                expecting_redirection_target = false;
3195                            }
3196                            _ => {
3197                                if !expecting_redirection_target {
3198                                    at_command_start = false;
3199                                }
3200                            }
3201                        }
3202                    }
3203                    Self::push_capture_char(content, c);
3204                    self.advance();
3205                }
3206            }
3207        }
3208
3209        false
3210    }
3211
3212    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3213    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3214    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3215    fn read_param_expansion_into(
3216        &mut self,
3217        content: &mut Option<String>,
3218        segment_start: Position,
3219    ) -> bool {
3220        let mut borrowable = true;
3221        let mut depth = 1;
3222        let mut literal_brace_depth = 0usize;
3223        let mut in_single = false;
3224        let mut in_double = false;
3225        let mut double_quote_depth = 0usize;
3226        while let Some(c) = self.peek_char() {
3227            if in_single {
3228                match c {
3229                    '\\' => {
3230                        let escape_start = self.current_position();
3231                        if self.second_char() == Some('"') {
3232                            self.advance();
3233                            borrowable = false;
3234                            self.ensure_capture_from_source(content, segment_start, escape_start);
3235                            Self::push_capture_char(content, '"');
3236                            self.advance();
3237                        } else {
3238                            Self::push_capture_char(content, '\\');
3239                            self.advance();
3240                        }
3241                    }
3242                    '\'' => {
3243                        Self::push_capture_char(content, c);
3244                        self.advance();
3245                        in_single = false;
3246                    }
3247                    _ => {
3248                        Self::push_capture_char(content, c);
3249                        self.advance();
3250                    }
3251                }
3252                continue;
3253            }
3254
3255            match c {
3256                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3257                    self.advance();
3258                    Self::push_capture_char(content, '}');
3259                    if depth == 1
3260                        && literal_brace_depth > 0
3261                        && self.has_later_top_level_param_expansion_closer(depth)
3262                    {
3263                        literal_brace_depth -= 1;
3264                        continue;
3265                    }
3266                    depth -= 1;
3267                    if depth == 0 {
3268                        break;
3269                    }
3270                }
3271                '{' if !in_single && !in_double => {
3272                    literal_brace_depth += 1;
3273                    Self::push_capture_char(content, '{');
3274                    self.advance();
3275                }
3276                '"' => {
3277                    // Quotes inside ${...} are part of the expansion, not string delimiters
3278                    Self::push_capture_char(content, '"');
3279                    self.advance();
3280                    in_double = !in_double;
3281                    double_quote_depth = if in_double { depth } else { 0 };
3282                }
3283                '\'' => {
3284                    Self::push_capture_char(content, '\'');
3285                    self.advance();
3286                    if !in_double {
3287                        in_single = true;
3288                    }
3289                }
3290                '\\' => {
3291                    // Inside ${...} within double quotes, same escape rules apply:
3292                    // \", \\, \$, \` produce the escaped char; others keep backslash
3293                    let escape_start = self.current_position();
3294                    self.advance();
3295                    if let Some(esc) = self.peek_char() {
3296                        match esc {
3297                            '$' => {
3298                                borrowable = false;
3299                                self.ensure_capture_from_source(
3300                                    content,
3301                                    segment_start,
3302                                    escape_start,
3303                                );
3304                                Self::push_capture_char(content, '\x00');
3305                                Self::push_capture_char(content, '$');
3306                                self.advance();
3307                            }
3308                            '"' | '\\' | '`' => {
3309                                borrowable = false;
3310                                self.ensure_capture_from_source(
3311                                    content,
3312                                    segment_start,
3313                                    escape_start,
3314                                );
3315                                Self::push_capture_char(content, esc);
3316                                self.advance();
3317                            }
3318                            '}' => {
3319                                // \} should be a literal } without closing the expansion
3320                                Self::push_capture_char(content, '\\');
3321                                Self::push_capture_char(content, '}');
3322                                self.advance();
3323                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3324                            }
3325                            _ => {
3326                                Self::push_capture_char(content, '\\');
3327                                Self::push_capture_char(content, esc);
3328                                self.advance();
3329                            }
3330                        }
3331                    } else {
3332                        Self::push_capture_char(content, '\\');
3333                    }
3334                }
3335                '$' => {
3336                    Self::push_capture_char(content, '$');
3337                    self.advance();
3338                    if self.peek_char() == Some('(') {
3339                        if self.second_char() == Some('(') {
3340                            if !self.read_arithmetic_expansion_into(content) {
3341                                borrowable = false;
3342                            }
3343                        } else {
3344                            Self::push_capture_char(content, '(');
3345                            self.advance();
3346                            self.read_command_subst_into(content);
3347                        }
3348                    } else if self.peek_char() == Some('{') {
3349                        Self::push_capture_char(content, '{');
3350                        self.advance();
3351                        borrowable &= self.read_param_expansion_into(content, segment_start);
3352                    }
3353                }
3354                _ => {
3355                    Self::push_capture_char(content, c);
3356                    self.advance();
3357                }
3358            }
3359        }
3360        borrowable
3361    }
3362
3363    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3364        let mut chars = self.lookahead_chars().peekable();
3365        let mut depth = target_depth;
3366        let mut in_single = false;
3367        let mut in_double = false;
3368        let mut double_quote_depth = 0usize;
3369
3370        while let Some(ch) = chars.next() {
3371            if in_single {
3372                match ch {
3373                    '\'' => in_single = false,
3374                    '\\' if chars.peek() == Some(&'"') => {
3375                        chars.next();
3376                    }
3377                    '\\' => {}
3378                    _ => {}
3379                }
3380                continue;
3381            }
3382
3383            if in_double {
3384                match ch {
3385                    '"' => {
3386                        in_double = false;
3387                        double_quote_depth = 0;
3388                    }
3389                    '\\' => {
3390                        chars.next();
3391                    }
3392                    '$' if chars.peek() == Some(&'{') => {
3393                        chars.next();
3394                        depth += 1;
3395                    }
3396                    '}' if depth > double_quote_depth => {
3397                        depth -= 1;
3398                    }
3399                    _ => {}
3400                }
3401                continue;
3402            }
3403
3404            match ch {
3405                '\n' if depth == target_depth => return false,
3406                '\'' => in_single = true,
3407                '"' => {
3408                    in_double = true;
3409                    double_quote_depth = depth;
3410                }
3411                '\\' => {
3412                    chars.next();
3413                }
3414                '$' if chars.peek() == Some(&'{') => {
3415                    chars.next();
3416                    depth += 1;
3417                }
3418                '}' => {
3419                    if depth == target_depth {
3420                        return true;
3421                    }
3422                    depth -= 1;
3423                }
3424                _ => {}
3425            }
3426        }
3427
3428        false
3429    }
3430
3431    /// Check if the content starting with { looks like a brace expansion
3432    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3433    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3434    /// Caps lookahead to prevent O(n^2) scanning when input
3435    /// contains many unmatched `{` characters (issue #997).
3436    fn looks_like_brace_expansion(&self) -> bool {
3437        const MAX_LOOKAHEAD: usize = 10_000;
3438
3439        let mut chars = self.lookahead_chars();
3440
3441        // Skip the opening {
3442        if chars.next() != Some('{') {
3443            return false;
3444        }
3445
3446        let mut depth = 1;
3447        let mut paren_depth = 0usize;
3448        let mut has_comma = false;
3449        let mut has_dot_dot = false;
3450        let mut escaped = false;
3451        let mut in_single = false;
3452        let mut in_double = false;
3453        let mut in_backtick = false;
3454        let mut prev_char = None;
3455        let mut scanned = 0usize;
3456
3457        for ch in chars {
3458            scanned += 1;
3459            if scanned > MAX_LOOKAHEAD {
3460                return false;
3461            }
3462
3463            let brace_surface_active = !in_single && !in_double && !in_backtick;
3464            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3465
3466            match ch {
3467                _ if escaped => {
3468                    escaped = false;
3469                }
3470                '\\' if !in_single => escaped = true,
3471                '\'' if !in_double && !in_backtick => in_single = !in_single,
3472                '"' if !in_single && !in_backtick => in_double = !in_double,
3473                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3474                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3475                    paren_depth += 1
3476                }
3477                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3478                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3479                '}' if !in_single && !in_double && !in_backtick => {
3480                    depth -= 1;
3481                    if depth == 0 {
3482                        // Found matching }, check if we have brace expansion markers
3483                        return has_comma || has_dot_dot;
3484                    }
3485                }
3486                ',' if at_top_level => has_comma = true,
3487                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3488                // Brace groups have whitespace/newlines/semicolons at depth 1
3489                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3490                _ => {}
3491            }
3492            prev_char = Some(ch);
3493        }
3494
3495        false
3496    }
3497
3498    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3499        let mut brace_depth = 1usize;
3500        let mut paren_depth = 0usize;
3501        let mut escaped = false;
3502        let mut in_single = false;
3503        let mut in_double = false;
3504        let mut in_backtick = false;
3505        let mut prev_char = None;
3506
3507        while let Some(ch) = self.peek_char() {
3508            Self::push_capture_char(word, ch);
3509            self.advance();
3510
3511            if escaped {
3512                escaped = false;
3513                prev_char = Some(ch);
3514                continue;
3515            }
3516
3517            match ch {
3518                '\\' if !in_single => escaped = true,
3519                '\'' if !in_double && !in_backtick => in_single = !in_single,
3520                '"' if !in_single && !in_backtick => in_double = !in_double,
3521                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3522                '(' if !in_single
3523                    && !in_double
3524                    && !in_backtick
3525                    && (paren_depth > 0 || prev_char == Some('$')) =>
3526                {
3527                    paren_depth += 1
3528                }
3529                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3530                    paren_depth -= 1
3531                }
3532                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3533                '}' if !in_single && !in_double && !in_backtick => {
3534                    brace_depth -= 1;
3535                    if brace_depth == 0 {
3536                        break;
3537                    }
3538                }
3539                _ => {}
3540            }
3541
3542            prev_char = Some(ch);
3543        }
3544    }
3545
3546    fn consume_brace_word_body(&mut self, word: &mut String) {
3547        let mut brace_depth = 1usize;
3548        let mut paren_depth = 0usize;
3549        let mut escaped = false;
3550        let mut in_single = false;
3551        let mut in_double = false;
3552        let mut in_backtick = false;
3553        let mut prev_char = None;
3554
3555        while let Some(ch) = self.peek_char() {
3556            word.push(ch);
3557            self.advance();
3558
3559            if escaped {
3560                escaped = false;
3561                prev_char = Some(ch);
3562                continue;
3563            }
3564
3565            match ch {
3566                '\\' if !in_single => escaped = true,
3567                '\'' if !in_double && !in_backtick => in_single = !in_single,
3568                '"' if !in_single && !in_backtick => in_double = !in_double,
3569                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3570                '(' if !in_single
3571                    && !in_double
3572                    && !in_backtick
3573                    && (paren_depth > 0 || prev_char == Some('$')) =>
3574                {
3575                    paren_depth += 1
3576                }
3577                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3578                    paren_depth -= 1
3579                }
3580                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3581                '}' if !in_single && !in_double && !in_backtick => {
3582                    brace_depth -= 1;
3583                    if brace_depth == 0 {
3584                        break;
3585                    }
3586                }
3587                _ => {}
3588            }
3589
3590            prev_char = Some(ch);
3591        }
3592    }
3593
3594    /// Check whether a mid-word `{...}` segment can stay attached to the current
3595    /// word without crossing a top-level word boundary.
3596    fn looks_like_mid_word_brace_segment(&self) -> bool {
3597        const MAX_LOOKAHEAD: usize = 10_000;
3598
3599        let mut chars = self.lookahead_chars();
3600        if chars.next() != Some('{') {
3601            return false;
3602        }
3603
3604        let mut brace_depth = 1;
3605        let mut paren_depth = 0usize;
3606        let mut escaped = false;
3607        let mut in_single = false;
3608        let mut in_double = false;
3609        let mut in_backtick = false;
3610        let mut prev_char = None;
3611        let mut scanned = 0usize;
3612
3613        for ch in chars {
3614            scanned += 1;
3615            if scanned > MAX_LOOKAHEAD {
3616                return false;
3617            }
3618
3619            if !in_single
3620                && !in_double
3621                && !in_backtick
3622                && !escaped
3623                && brace_depth == 1
3624                && paren_depth == 0
3625                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3626            {
3627                return false;
3628            }
3629
3630            if escaped {
3631                escaped = false;
3632                prev_char = Some(ch);
3633                continue;
3634            }
3635
3636            match ch {
3637                '\\' => escaped = true,
3638                '\'' if !in_double && !in_backtick => in_single = !in_single,
3639                '"' if !in_single && !in_backtick => in_double = !in_double,
3640                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3641                '(' if !in_single
3642                    && !in_double
3643                    && !in_backtick
3644                    && (paren_depth > 0 || prev_char == Some('$')) =>
3645                {
3646                    paren_depth += 1
3647                }
3648                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3649                    paren_depth -= 1
3650                }
3651                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3652                '}' if !in_single && !in_double && !in_backtick => {
3653                    brace_depth -= 1;
3654                    if brace_depth == 0 {
3655                        return true;
3656                    }
3657                }
3658                _ => {}
3659            }
3660
3661            prev_char = Some(ch);
3662        }
3663
3664        false
3665    }
3666
3667    /// Check if { is followed by whitespace (brace group start)
3668    fn is_brace_group_start(&self) -> bool {
3669        let mut chars = self.lookahead_chars();
3670        // Skip the opening {
3671        if chars.next() != Some('{') {
3672            return false;
3673        }
3674        // If next char is whitespace or newline, it's a brace group
3675        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3676    }
3677
3678    /// Check whether the text after an escaped `{` looks like a brace-expansion
3679    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3680    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3681        const MAX_LOOKAHEAD: usize = 10_000;
3682
3683        let mut chars = self.lookahead_chars();
3684        let mut depth = 1;
3685        let mut has_comma = false;
3686        let mut has_dot_dot = false;
3687        let mut prev_char = None;
3688        let mut scanned = 0usize;
3689
3690        for ch in chars.by_ref() {
3691            scanned += 1;
3692            if scanned > MAX_LOOKAHEAD {
3693                return false;
3694            }
3695            match ch {
3696                '{' => depth += 1,
3697                '}' => {
3698                    depth -= 1;
3699                    if depth == 0 {
3700                        return has_comma || has_dot_dot;
3701                    }
3702                }
3703                ',' if depth == 1 => has_comma = true,
3704                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3705                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3706                _ => {}
3707            }
3708            prev_char = Some(ch);
3709        }
3710
3711        false
3712    }
3713
3714    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3715        let mut chars = self.lookahead_chars();
3716        if chars.next() != Some('{') {
3717            return false;
3718        }
3719        chars.next() == Some(')')
3720    }
3721
3722    /// Read a {literal} pattern without comma/dot-dot as a word
3723    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3724        let mut word = String::with_capacity(16);
3725
3726        if let Some('{') = self.peek_char() {
3727            word.push('{');
3728            self.advance();
3729        } else {
3730            return None;
3731        }
3732
3733        self.consume_brace_word_body(&mut word);
3734
3735        while let Some(ch) = self.peek_char() {
3736            if Self::is_word_char(ch) {
3737                if self.reinject_buf.is_empty() {
3738                    let chunk = self.cursor.eat_while(Self::is_word_char);
3739                    word.push_str(chunk);
3740                    self.advance_scanned_source_bytes(chunk.len());
3741                } else {
3742                    word.push(ch);
3743                    self.advance();
3744                }
3745            } else {
3746                break;
3747            }
3748        }
3749
3750        Some(LexedToken::owned_word(TokenKind::Word, word))
3751    }
3752
3753    /// Read a brace expansion pattern as a word
3754    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3755        let mut word = String::with_capacity(16);
3756
3757        // Read the opening {
3758        if let Some('{') = self.peek_char() {
3759            word.push('{');
3760            self.advance();
3761        } else {
3762            return None;
3763        }
3764
3765        // Read until matching }
3766        self.consume_brace_word_body(&mut word);
3767
3768        // Continue reading any suffix after the brace pattern
3769        while let Some(ch) = self.peek_char() {
3770            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3771                if ch == '{' {
3772                    // Another brace pattern - include it
3773                    word.push(ch);
3774                    self.advance();
3775                    self.consume_brace_word_body(&mut word);
3776                } else {
3777                    word.push(ch);
3778                    self.advance();
3779                }
3780            } else {
3781                break;
3782            }
3783        }
3784
3785        Some(LexedToken::owned_word(TokenKind::Word, word))
3786    }
3787
3788    /// Peek ahead (without consuming) to see if `=(` starts an associative
3789    /// compound assignment like `([key]=val ...)`.  Returns true when the
3790    /// first non-whitespace char after `(` is `[`.
3791    fn looks_like_assoc_assign(&self) -> bool {
3792        let mut chars = self.lookahead_chars();
3793        // Skip the `(` we haven't consumed yet
3794        if chars.next() != Some('(') {
3795            return false;
3796        }
3797        // Skip optional whitespace
3798        for ch in chars {
3799            match ch {
3800                ' ' | '\t' => continue,
3801                '[' => return true,
3802                _ => return false,
3803            }
3804        }
3805        false
3806    }
3807
3808    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3809        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3810    }
3811
3812    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3813        word.segments().any(|segment| {
3814            matches!(
3815                segment.kind(),
3816                LexedWordSegmentKind::SingleQuoted
3817                    | LexedWordSegmentKind::DollarSingleQuoted
3818                    | LexedWordSegmentKind::DoubleQuoted
3819                    | LexedWordSegmentKind::DollarDoubleQuoted
3820            )
3821        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3822    }
3823
3824    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3825        text.contains(['*', '?'])
3826            || text.ends_with('}') && text.contains("${")
3827            || text.ends_with(']')
3828                && text
3829                    .rfind('[')
3830                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3831    }
3832
3833    fn is_word_char(ch: char) -> bool {
3834        !matches!(
3835            ch,
3836            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3837        )
3838    }
3839
3840    const fn is_ascii_word_byte(byte: u8) -> bool {
3841        !matches!(
3842            byte,
3843            b' ' | b'\t'
3844                | b'\n'
3845                | b';'
3846                | b'|'
3847                | b'&'
3848                | b'>'
3849                | b'<'
3850                | b'('
3851                | b')'
3852                | b'{'
3853                | b'}'
3854                | b'\''
3855                | b'"'
3856        )
3857    }
3858
3859    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3860        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3861    }
3862
3863    fn is_plain_word_char(ch: char) -> bool {
3864        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3865    }
3866
3867    /// Read here document content until the delimiter line is found
3868    pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3869        let mut content = String::with_capacity(64);
3870        let mut current_line = String::with_capacity(64);
3871
3872        // Save rest of current line (after the delimiter token on the command line).
3873        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3874        // tokenize the pipe and subsequent command after the heredoc body.
3875        //
3876        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3877        // so we track quoting state and continue across newlines until quotes close.
3878        let mut rest_of_line = String::with_capacity(32);
3879        let rest_of_line_start = self.current_position();
3880        let mut in_double_quote = false;
3881        let mut in_single_quote = false;
3882        let mut in_comment = false;
3883        let mut saw_non_whitespace_tail = false;
3884        let mut consecutive_backslashes = 0usize;
3885        let mut previous_tail_char = None;
3886        while let Some(ch) = self.peek_char() {
3887            self.advance();
3888            if in_comment {
3889                if ch == '\n' {
3890                    break;
3891                }
3892                rest_of_line.push(ch);
3893                previous_tail_char = Some(ch);
3894                continue;
3895            }
3896            if ch == '#'
3897                && !in_single_quote
3898                && !in_double_quote
3899                && self.comments_enabled()
3900                && heredoc_tail_hash_starts_comment(previous_tail_char)
3901            {
3902                in_comment = true;
3903                rest_of_line.push(ch);
3904                previous_tail_char = Some(ch);
3905                consecutive_backslashes = 0;
3906                continue;
3907            }
3908            let backslash_continues_line = ch == '\\'
3909                && !in_single_quote
3910                && self.peek_char() == Some('\n')
3911                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3912                && consecutive_backslashes.is_multiple_of(2);
3913            if backslash_continues_line {
3914                rest_of_line.push(ch);
3915                rest_of_line.push('\n');
3916                self.advance();
3917                consecutive_backslashes = 0;
3918                continue;
3919            }
3920            if ch == '\n' && !in_double_quote && !in_single_quote {
3921                break;
3922            }
3923            if ch == '"' && !in_single_quote {
3924                in_double_quote = !in_double_quote;
3925            } else if ch == '\'' && !in_double_quote {
3926                in_single_quote = !in_single_quote;
3927            } else if ch == '\\' && in_double_quote {
3928                // Escaped char inside double quotes — skip the next char too
3929                rest_of_line.push(ch);
3930                if let Some(next) = self.peek_char() {
3931                    rest_of_line.push(next);
3932                    self.advance();
3933                }
3934                continue;
3935            }
3936            rest_of_line.push(ch);
3937            if !ch.is_whitespace() {
3938                saw_non_whitespace_tail = true;
3939            }
3940            if ch == '\\' && !in_single_quote {
3941                consecutive_backslashes += 1;
3942            } else {
3943                consecutive_backslashes = 0;
3944            }
3945            previous_tail_char = Some(ch);
3946        }
3947
3948        // If we just drained a heredoc replay buffer (for example when multiple
3949        // heredocs share one command line), resume tracking from the true cursor
3950        // position before we measure the body span.
3951        self.sync_offset_to_cursor();
3952        let content_start = self.current_position();
3953        let mut current_line_start = content_start;
3954        let content_end;
3955
3956        // Read lines until we find the delimiter
3957        loop {
3958            if self.reinject_buf.is_empty() {
3959                // When the body reading drains a reinject buffer (from a
3960                // previous heredoc on the same command line), the virtual
3961                // offset drifts away from the cursor. Snap it back before
3962                // any source-based work so spans and `post_heredoc_offset`
3963                // stay within bounds.
3964                self.sync_offset_to_cursor();
3965                let rest = self.cursor.rest();
3966                if rest.is_empty() {
3967                    content_end = self.current_position();
3968                    break;
3969                }
3970
3971                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3972                let line = &rest[..line_len];
3973                let has_newline = line_len < rest.len();
3974
3975                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3976                    content_end = current_line_start;
3977                    self.consume_source_bytes(line_len);
3978                    if has_newline {
3979                        self.consume_ascii_chars(1);
3980                    }
3981                    break;
3982                }
3983
3984                content.push_str(line);
3985                self.consume_source_bytes(line_len);
3986
3987                if has_newline {
3988                    self.consume_ascii_chars(1);
3989                    content.push('\n');
3990                    current_line_start = self.current_position();
3991                    continue;
3992                }
3993
3994                content_end = self.current_position();
3995                break;
3996            }
3997
3998            match self.peek_char() {
3999                Some('\n') => {
4000                    self.advance();
4001                    // Check if current line matches delimiter
4002                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4003                        content_end = current_line_start;
4004                        break;
4005                    }
4006                    content.push_str(&current_line);
4007                    content.push('\n');
4008                    current_line.clear();
4009                    current_line_start = self.current_position();
4010                }
4011                Some(ch) => {
4012                    current_line.push(ch);
4013                    self.advance();
4014                }
4015                None => {
4016                    // End of input - check last line
4017                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4018                        content_end = current_line_start;
4019                        break;
4020                    }
4021                    if !current_line.is_empty() {
4022                        content.push_str(&current_line);
4023                    }
4024                    content_end = self.current_position();
4025                    break;
4026                }
4027            }
4028        }
4029
4030        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4031        // redirects, command words, additional heredocs) stay visible to the
4032        // parser. Always replay a terminating newline so parsing stops before
4033        // tokens that originally lived on later source lines, like `}` or `do`.
4034        let post_heredoc_offset = self.offset;
4035        self.offset = rest_of_line_start.offset;
4036        for ch in rest_of_line.chars() {
4037            self.reinject_buf.push_back(ch);
4038        }
4039        self.reinject_buf.push_back('\n');
4040        self.reinject_resume_offset = Some(post_heredoc_offset);
4041
4042        HeredocRead {
4043            content,
4044            content_span: Span::from_positions(content_start, content_end),
4045        }
4046    }
4047
4048    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4049        let mut chars = self.cursor.rest().chars();
4050        if chars.next() != Some('\n') {
4051            return false;
4052        }
4053
4054        for ch in chars {
4055            if matches!(ch, ' ' | '\t') {
4056                continue;
4057            }
4058            if ch == '\n' {
4059                return false;
4060            }
4061            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4062                || (ch == '#' && self.comments_enabled());
4063        }
4064
4065        false
4066    }
4067}
4068
4069fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4070    let line = if strip_tabs {
4071        line.trim_start_matches('\t')
4072    } else {
4073        line
4074    };
4075
4076    if line == delimiter {
4077        return true;
4078    }
4079
4080    let Some(trailing) = line.strip_prefix(delimiter) else {
4081        return false;
4082    };
4083
4084    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4085}
4086
4087fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4088    previous_tail_char.is_none_or(|prev| {
4089        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4090    })
4091}
4092
4093fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4094    let ch = input.get(index..)?.chars().next()?;
4095    Some((ch, index + ch.len_utf8()))
4096}
4097
4098fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4099    let mut index = 0usize;
4100    let mut depth = 0usize;
4101    let mut in_single = false;
4102    let mut in_double = false;
4103    let mut in_backtick = false;
4104    let mut escaped = false;
4105
4106    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4107        let was_escaped = escaped;
4108        if ch == '\\' && !in_single {
4109            escaped = !escaped;
4110            index = next_index;
4111            continue;
4112        }
4113        escaped = false;
4114
4115        match ch {
4116            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4117            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4118            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4119            '(' if !in_single
4120                && !in_double
4121                && !in_backtick
4122                && !was_escaped
4123                && prefix[next_index..].starts_with('(') =>
4124            {
4125                depth += 1;
4126                index = next_index + '('.len_utf8();
4127                continue;
4128            }
4129            ')' if !in_single
4130                && !in_double
4131                && !in_backtick
4132                && !was_escaped
4133                && prefix[next_index..].starts_with(')') =>
4134            {
4135                depth = depth.saturating_sub(1);
4136                index = next_index + ')'.len_utf8();
4137                continue;
4138            }
4139            _ => {}
4140        }
4141
4142        index = next_index;
4143    }
4144
4145    depth > 0
4146}
4147
4148fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4149    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4150    let prefix = &input[line_start..index];
4151    line_has_unclosed_double_paren(prefix)
4152}
4153
4154fn hash_starts_comment(input: &str, index: usize) -> bool {
4155    if inside_unclosed_double_paren_on_line(input, index) {
4156        return false;
4157    }
4158
4159    let next = &input[index + '#'.len_utf8()..];
4160    input[..index]
4161        .chars()
4162        .next_back()
4163        .is_none_or(|prev| match prev {
4164            '(' => {
4165                let whitespace_index = next.find(char::is_whitespace);
4166                let close_index = next.find(')');
4167
4168                match (whitespace_index, close_index) {
4169                    (Some(whitespace), Some(close)) => whitespace < close,
4170                    (Some(_), None) | (None, None) => true,
4171                    (None, Some(_)) => false,
4172                }
4173            }
4174            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4175        })
4176}
4177
4178fn heredoc_delimiter_is_terminator(
4179    ch: char,
4180    in_single: bool,
4181    in_double: bool,
4182    escaped: bool,
4183) -> bool {
4184    !in_single
4185        && !in_double
4186        && !escaped
4187        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4188}
4189
4190fn scan_double_quoted_command_substitution_segment(
4191    input: &str,
4192    mut index: usize,
4193    subst_depth: usize,
4194) -> Option<usize> {
4195    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4196        match ch {
4197            '"' => return Some(next_index),
4198            '\\' => {
4199                index = next_index;
4200                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4201                    index = escaped_next;
4202                }
4203            }
4204            '$' if input[next_index..].starts_with('{') => {
4205                let consumed = scan_command_subst_parameter_expansion_len(
4206                    &input[next_index + '{'.len_utf8()..],
4207                    subst_depth,
4208                )?;
4209                index = next_index + '{'.len_utf8() + consumed;
4210            }
4211            '$' if input[next_index..].starts_with('(')
4212                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4213            {
4214                let consumed = scan_command_substitution_body_len_inner(
4215                    &input[next_index + '('.len_utf8()..],
4216                    subst_depth + 1,
4217                )?;
4218                index = next_index + '('.len_utf8() + consumed;
4219            }
4220            _ => index = next_index,
4221        }
4222    }
4223
4224    None
4225}
4226
4227fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4228    let mut index = 0usize;
4229    let mut in_single = false;
4230    let mut in_double = false;
4231    let mut in_ansi_c_single = false;
4232    let mut in_backtick = false;
4233    let mut escaped = false;
4234    let mut ansi_c_quote_pending = false;
4235
4236    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4237        let was_escaped = escaped;
4238        if ch == '\\' && !in_single {
4239            escaped = !escaped;
4240            index = next_index;
4241            ansi_c_quote_pending = false;
4242            continue;
4243        }
4244        escaped = false;
4245
4246        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4247            if input[next_index..].starts_with('{')
4248                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4249                    &input[next_index + '{'.len_utf8()..],
4250                    subst_depth,
4251                )
4252            {
4253                index = next_index + '{'.len_utf8() + consumed;
4254                ansi_c_quote_pending = false;
4255                continue;
4256            }
4257
4258            if input[next_index..].starts_with('(')
4259                && !input[next_index + '('.len_utf8()..].starts_with('(')
4260                && let Some(consumed) = scan_command_substitution_body_len_inner(
4261                    &input[next_index + '('.len_utf8()..],
4262                    subst_depth + 1,
4263                )
4264            {
4265                index = next_index + '('.len_utf8() + consumed;
4266                ansi_c_quote_pending = false;
4267                continue;
4268            }
4269        }
4270
4271        if !in_single
4272            && !in_ansi_c_single
4273            && !in_double
4274            && !in_backtick
4275            && !was_escaped
4276            && matches!(ch, '<' | '>')
4277            && input[next_index..].starts_with('(')
4278            && let Some(consumed) = scan_command_substitution_body_len_inner(
4279                &input[next_index + '('.len_utf8()..],
4280                subst_depth + 1,
4281            )
4282        {
4283            index = next_index + '('.len_utf8() + consumed;
4284            ansi_c_quote_pending = false;
4285            continue;
4286        }
4287
4288        match ch {
4289            '\'' if !in_double && !in_backtick && !was_escaped => {
4290                if in_ansi_c_single {
4291                    in_ansi_c_single = false;
4292                } else if !in_single && ansi_c_quote_pending {
4293                    in_ansi_c_single = true;
4294                } else {
4295                    in_single = !in_single;
4296                }
4297            }
4298            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4299                in_double = !in_double
4300            }
4301            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4302                in_backtick = !in_backtick
4303            }
4304            '}' if !in_single
4305                && !in_ansi_c_single
4306                && !in_double
4307                && !in_backtick
4308                && !was_escaped =>
4309            {
4310                return Some(next_index);
4311            }
4312            _ => {}
4313        }
4314
4315        ansi_c_quote_pending = ch == '$'
4316            && !in_single
4317            && !in_ansi_c_single
4318            && !in_double
4319            && !in_backtick
4320            && !was_escaped;
4321        index = next_index;
4322    }
4323
4324    None
4325}
4326
4327fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4328    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4329        if !matches!(ch, ' ' | '\t') {
4330            break;
4331        }
4332        index = next_index;
4333    }
4334
4335    let start = index;
4336    let mut cooked = String::new();
4337    let mut in_single = false;
4338    let mut in_double = false;
4339    let mut escaped = false;
4340
4341    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4342        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4343            break;
4344        }
4345
4346        index = next_index;
4347        if escaped {
4348            cooked.push(ch);
4349            escaped = false;
4350            continue;
4351        }
4352
4353        match ch {
4354            '\\' if !in_single => escaped = true,
4355            '\'' if !in_double => in_single = !in_single,
4356            '"' if !in_single => in_double = !in_double,
4357            _ => cooked.push(ch),
4358        }
4359    }
4360
4361    (index > start).then_some((index, cooked))
4362}
4363
4364fn skip_command_subst_pending_heredoc(
4365    input: &str,
4366    mut index: usize,
4367    delimiter: &str,
4368    strip_tabs: bool,
4369) -> usize {
4370    while index <= input.len() {
4371        let rest = &input[index..];
4372        let line_len = rest.find('\n').unwrap_or(rest.len());
4373        let line = &rest[..line_len];
4374        let has_newline = line_len < rest.len();
4375
4376        index += line_len;
4377        if has_newline {
4378            index += '\n'.len_utf8();
4379        }
4380
4381        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4382            return index;
4383        }
4384    }
4385
4386    index
4387}
4388
4389fn scan_command_subst_ansi_c_single_quoted_segment(
4390    input: &str,
4391    quote_index: usize,
4392) -> Option<usize> {
4393    let mut index = quote_index + '\''.len_utf8();
4394
4395    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4396        index = next_index;
4397        if ch == '\\' {
4398            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4399                index = escaped_next;
4400            }
4401            continue;
4402        }
4403
4404        if ch == '\'' {
4405            return Some(index);
4406        }
4407    }
4408
4409    None
4410}
4411
4412fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4413    let mut index = start;
4414
4415    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4416        index = next_index;
4417        if ch == '\\' {
4418            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4419                index = escaped_next;
4420            }
4421            continue;
4422        }
4423
4424        if ch == '`' {
4425            return Some(index);
4426        }
4427    }
4428
4429    None
4430}
4431
4432fn flush_scanned_command_subst_keyword(
4433    current_word: &mut String,
4434    pending_case_headers: &mut usize,
4435    case_clause_depths: &mut SmallVec<[usize; 4]>,
4436    depth: usize,
4437    word_started_at_command_start: &mut bool,
4438) {
4439    if current_word.is_empty() {
4440        *word_started_at_command_start = false;
4441        return;
4442    }
4443
4444    match current_word.as_str() {
4445        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4446        "in" if *pending_case_headers > 0 => {
4447            *pending_case_headers -= 1;
4448            case_clause_depths.push(depth);
4449        }
4450        "esac" if *word_started_at_command_start => {
4451            case_clause_depths.pop();
4452        }
4453        _ => {}
4454    }
4455
4456    current_word.clear();
4457    *word_started_at_command_start = false;
4458}
4459
4460fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4461    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4462        return None;
4463    }
4464
4465    let mut index = 0usize;
4466    let mut depth = 1;
4467    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4468    let mut pending_case_headers = 0usize;
4469    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4470    let mut current_word = String::with_capacity(16);
4471    let mut at_command_start = true;
4472    let mut expecting_redirection_target = false;
4473    let mut current_word_started_at_command_start = false;
4474
4475    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4476        match ch {
4477            '#' if hash_starts_comment(input, index) => {
4478                let had_word = !current_word.is_empty();
4479                flush_scanned_command_subst_keyword(
4480                    &mut current_word,
4481                    &mut pending_case_headers,
4482                    &mut case_clause_depths,
4483                    depth,
4484                    &mut current_word_started_at_command_start,
4485                );
4486                if had_word && expecting_redirection_target {
4487                    expecting_redirection_target = false;
4488                }
4489                index = next_index;
4490                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4491                    index = comment_next;
4492                    if comment_ch == '\n' {
4493                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4494                            index = skip_command_subst_pending_heredoc(
4495                                input, index, &delimiter, strip_tabs,
4496                            );
4497                        }
4498                        at_command_start = true;
4499                        expecting_redirection_target = false;
4500                        break;
4501                    }
4502                }
4503            }
4504            '(' => {
4505                flush_scanned_command_subst_keyword(
4506                    &mut current_word,
4507                    &mut pending_case_headers,
4508                    &mut case_clause_depths,
4509                    depth,
4510                    &mut current_word_started_at_command_start,
4511                );
4512                depth += 1;
4513                index = next_index;
4514                at_command_start = true;
4515                expecting_redirection_target = false;
4516            }
4517            ')' => {
4518                flush_scanned_command_subst_keyword(
4519                    &mut current_word,
4520                    &mut pending_case_headers,
4521                    &mut case_clause_depths,
4522                    depth,
4523                    &mut current_word_started_at_command_start,
4524                );
4525                if case_clause_depths
4526                    .last()
4527                    .is_some_and(|case_depth| *case_depth == depth)
4528                {
4529                    index = next_index;
4530                    at_command_start = true;
4531                    expecting_redirection_target = false;
4532                    continue;
4533                }
4534                depth -= 1;
4535                index = next_index;
4536                if depth == 0 {
4537                    return Some(index);
4538                }
4539                at_command_start = false;
4540                expecting_redirection_target = false;
4541            }
4542            '"' => {
4543                let had_word = !current_word.is_empty();
4544                flush_scanned_command_subst_keyword(
4545                    &mut current_word,
4546                    &mut pending_case_headers,
4547                    &mut case_clause_depths,
4548                    depth,
4549                    &mut current_word_started_at_command_start,
4550                );
4551                if had_word && expecting_redirection_target {
4552                    expecting_redirection_target = false;
4553                }
4554                index = scan_double_quoted_command_substitution_segment(
4555                    input,
4556                    next_index,
4557                    subst_depth,
4558                )?;
4559                if expecting_redirection_target {
4560                    expecting_redirection_target = false;
4561                } else {
4562                    at_command_start = false;
4563                }
4564            }
4565            '\'' => {
4566                let had_word = !current_word.is_empty();
4567                flush_scanned_command_subst_keyword(
4568                    &mut current_word,
4569                    &mut pending_case_headers,
4570                    &mut case_clause_depths,
4571                    depth,
4572                    &mut current_word_started_at_command_start,
4573                );
4574                if had_word && expecting_redirection_target {
4575                    expecting_redirection_target = false;
4576                }
4577                index = next_index;
4578                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4579                    index = quoted_next;
4580                    if quoted_ch == '\'' {
4581                        break;
4582                    }
4583                }
4584                if expecting_redirection_target {
4585                    expecting_redirection_target = false;
4586                } else {
4587                    at_command_start = false;
4588                }
4589            }
4590            '`' => {
4591                let had_word = !current_word.is_empty();
4592                flush_scanned_command_subst_keyword(
4593                    &mut current_word,
4594                    &mut pending_case_headers,
4595                    &mut case_clause_depths,
4596                    depth,
4597                    &mut current_word_started_at_command_start,
4598                );
4599                if had_word && expecting_redirection_target {
4600                    expecting_redirection_target = false;
4601                }
4602                index = scan_command_subst_backtick_segment(input, next_index)?;
4603                if expecting_redirection_target {
4604                    expecting_redirection_target = false;
4605                } else {
4606                    at_command_start = false;
4607                }
4608            }
4609            '$' if input[next_index..].starts_with('\'') => {
4610                let had_word = !current_word.is_empty();
4611                flush_scanned_command_subst_keyword(
4612                    &mut current_word,
4613                    &mut pending_case_headers,
4614                    &mut case_clause_depths,
4615                    depth,
4616                    &mut current_word_started_at_command_start,
4617                );
4618                if had_word && expecting_redirection_target {
4619                    expecting_redirection_target = false;
4620                }
4621                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4622                if expecting_redirection_target {
4623                    expecting_redirection_target = false;
4624                } else {
4625                    at_command_start = false;
4626                }
4627            }
4628            '\\' => {
4629                let had_word = !current_word.is_empty();
4630                flush_scanned_command_subst_keyword(
4631                    &mut current_word,
4632                    &mut pending_case_headers,
4633                    &mut case_clause_depths,
4634                    depth,
4635                    &mut current_word_started_at_command_start,
4636                );
4637                if had_word && expecting_redirection_target {
4638                    expecting_redirection_target = false;
4639                }
4640                index = next_index;
4641                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4642                    index = escaped_next;
4643                }
4644                if expecting_redirection_target {
4645                    expecting_redirection_target = false;
4646                } else {
4647                    at_command_start = false;
4648                }
4649            }
4650            '>' => {
4651                let word_was_redirection_fd = current_word_started_at_command_start
4652                    && !current_word.is_empty()
4653                    && current_word.chars().all(|current| current.is_ascii_digit());
4654                flush_scanned_command_subst_keyword(
4655                    &mut current_word,
4656                    &mut pending_case_headers,
4657                    &mut case_clause_depths,
4658                    depth,
4659                    &mut current_word_started_at_command_start,
4660                );
4661                if word_was_redirection_fd {
4662                    at_command_start = true;
4663                }
4664                index = next_index;
4665                expecting_redirection_target = true;
4666            }
4667            '<' if input[next_index..].starts_with('<') => {
4668                let word_was_redirection_fd = current_word_started_at_command_start
4669                    && !current_word.is_empty()
4670                    && current_word.chars().all(|current| current.is_ascii_digit());
4671                let had_word = !current_word.is_empty();
4672                flush_scanned_command_subst_keyword(
4673                    &mut current_word,
4674                    &mut pending_case_headers,
4675                    &mut case_clause_depths,
4676                    depth,
4677                    &mut current_word_started_at_command_start,
4678                );
4679                if had_word && expecting_redirection_target {
4680                    expecting_redirection_target = false;
4681                }
4682                if word_was_redirection_fd {
4683                    at_command_start = true;
4684                }
4685                if inside_unclosed_double_paren_on_line(input, index) {
4686                    index = next_index + '<'.len_utf8();
4687                    continue;
4688                }
4689
4690                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4691                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4692                    expecting_redirection_target = true;
4693                    continue;
4694                }
4695
4696                let strip_tabs = input[next_index..].starts_with("<-");
4697                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4698                if let Some((delimiter_index, delimiter)) =
4699                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4700                {
4701                    pending_heredocs.push((delimiter, strip_tabs));
4702                    index = delimiter_index;
4703                    expecting_redirection_target = false;
4704                } else {
4705                    index = next_index;
4706                    expecting_redirection_target = true;
4707                }
4708            }
4709            '\n' => {
4710                flush_scanned_command_subst_keyword(
4711                    &mut current_word,
4712                    &mut pending_case_headers,
4713                    &mut case_clause_depths,
4714                    depth,
4715                    &mut current_word_started_at_command_start,
4716                );
4717                index = next_index;
4718                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4719                    index =
4720                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4721                }
4722                at_command_start = true;
4723                expecting_redirection_target = false;
4724            }
4725            '$' if input[next_index..].starts_with('{') => {
4726                let had_word = !current_word.is_empty();
4727                flush_scanned_command_subst_keyword(
4728                    &mut current_word,
4729                    &mut pending_case_headers,
4730                    &mut case_clause_depths,
4731                    depth,
4732                    &mut current_word_started_at_command_start,
4733                );
4734                if had_word && expecting_redirection_target {
4735                    expecting_redirection_target = false;
4736                }
4737                let consumed = scan_command_subst_parameter_expansion_len(
4738                    &input[next_index + '{'.len_utf8()..],
4739                    subst_depth,
4740                )?;
4741                index = next_index + '{'.len_utf8() + consumed;
4742                if expecting_redirection_target {
4743                    expecting_redirection_target = false;
4744                } else {
4745                    at_command_start = false;
4746                }
4747            }
4748            '$' if input[next_index..].starts_with('(')
4749                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4750            {
4751                let had_word = !current_word.is_empty();
4752                flush_scanned_command_subst_keyword(
4753                    &mut current_word,
4754                    &mut pending_case_headers,
4755                    &mut case_clause_depths,
4756                    depth,
4757                    &mut current_word_started_at_command_start,
4758                );
4759                if had_word && expecting_redirection_target {
4760                    expecting_redirection_target = false;
4761                }
4762                let consumed = scan_command_substitution_body_len_inner(
4763                    &input[next_index + '('.len_utf8()..],
4764                    subst_depth + 1,
4765                )?;
4766                index = next_index + '('.len_utf8() + consumed;
4767                if expecting_redirection_target {
4768                    expecting_redirection_target = false;
4769                } else {
4770                    at_command_start = false;
4771                }
4772            }
4773            _ => {
4774                if ch.is_ascii_alphanumeric() || ch == '_' {
4775                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4776                    {
4777                        current_word_started_at_command_start = true;
4778                        at_command_start = false;
4779                    }
4780                    current_word.push(ch);
4781                } else {
4782                    let had_word = !current_word.is_empty();
4783                    flush_scanned_command_subst_keyword(
4784                        &mut current_word,
4785                        &mut pending_case_headers,
4786                        &mut case_clause_depths,
4787                        depth,
4788                        &mut current_word_started_at_command_start,
4789                    );
4790                    if had_word && expecting_redirection_target {
4791                        expecting_redirection_target = false;
4792                    }
4793                    match ch {
4794                        ' ' | '\t' => {}
4795                        ';' | '|' | '&' => {
4796                            at_command_start = true;
4797                            expecting_redirection_target = false;
4798                        }
4799                        _ => {
4800                            if !expecting_redirection_target {
4801                                at_command_start = false;
4802                            }
4803                        }
4804                    }
4805                }
4806                index = next_index;
4807            }
4808        }
4809    }
4810
4811    None
4812}
4813
4814pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4815    scan_command_substitution_body_len_inner(input, 0)
4816}
4817
4818#[cfg(test)]
4819mod tests {
4820    use super::*;
4821
4822    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4823        match token.kind {
4824            kind if kind.is_word_like() => token.word_string(),
4825            TokenKind::Comment => token
4826                .span
4827                .slice(source)
4828                .strip_prefix('#')
4829                .map(str::to_string),
4830            TokenKind::Error => token
4831                .error_kind()
4832                .map(LexerErrorKind::message)
4833                .map(str::to_string),
4834            _ => None,
4835        }
4836    }
4837
4838    fn assert_next_token(
4839        lexer: &mut Lexer<'_>,
4840        expected_kind: TokenKind,
4841        expected_text: Option<&str>,
4842    ) {
4843        let token = lexer.next_lexed_token().unwrap();
4844        assert_eq!(token.kind, expected_kind);
4845        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4846    }
4847
4848    fn assert_next_token_with_comments(
4849        lexer: &mut Lexer<'_>,
4850        expected_kind: TokenKind,
4851        expected_text: Option<&str>,
4852    ) {
4853        let token = lexer.next_lexed_token_with_comments().unwrap();
4854        assert_eq!(token.kind, expected_kind);
4855        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4856    }
4857
4858    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4859        let mut lexer = Lexer::new(input);
4860
4861        while let Some(token) = lexer.next_lexed_token() {
4862            if token.kind == TokenKind::Newline {
4863                continue;
4864            }
4865
4866            assert_eq!(
4867                token.span.start.line, token.span.end.line,
4868                "token should stay on one line: {:?}",
4869                token
4870            );
4871        }
4872    }
4873
4874    #[test]
4875    fn test_simple_words() {
4876        let mut lexer = Lexer::new("echo hello world");
4877
4878        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4879        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4880        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4881        assert!(lexer.next_lexed_token().is_none());
4882    }
4883
4884    #[test]
4885    fn test_single_quoted_string() {
4886        let mut lexer = Lexer::new("echo 'hello world'");
4887
4888        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4889        // Single-quoted strings return LiteralWord (no variable expansion)
4890        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4891        assert!(lexer.next_lexed_token().is_none());
4892    }
4893
4894    #[test]
4895    fn test_double_quoted_string() {
4896        let mut lexer = Lexer::new("echo \"hello world\"");
4897
4898        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4899        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4900        assert!(lexer.next_lexed_token().is_none());
4901    }
4902
4903    #[test]
4904    fn test_brace_expansion_token_ignores_quoted_closers() {
4905        let mut lexer = Lexer::new("echo {\"}\",a}\n");
4906
4907        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4908        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
4909        assert_next_token(&mut lexer, TokenKind::Newline, None);
4910        assert!(lexer.next_lexed_token().is_none());
4911    }
4912
4913    #[test]
4914    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
4915        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
4916
4917        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4918        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
4919        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
4920        assert_next_token(&mut lexer, TokenKind::Newline, None);
4921        assert!(lexer.next_lexed_token().is_none());
4922    }
4923
4924    #[test]
4925    fn test_double_quoted_expansion_token_keeps_source_backing() {
4926        let source = r#""$bar""#;
4927        let mut lexer = Lexer::new(source);
4928
4929        let token = lexer.next_lexed_token().unwrap();
4930        assert_eq!(token.kind, TokenKind::QuotedWord);
4931        assert_eq!(token.word_text(), Some("$bar"));
4932
4933        let word = token.word().unwrap();
4934        let segment = word.single_segment().unwrap();
4935        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4936        assert_eq!(segment.span().unwrap().slice(source), "$bar");
4937    }
4938
4939    #[test]
4940    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4941        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4942        let mut lexer = Lexer::new(source);
4943
4944        let token = lexer.next_lexed_token().unwrap();
4945        assert_eq!(token.kind, TokenKind::QuotedWord);
4946        assert_eq!(
4947            token.word_text(),
4948            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4949        );
4950    }
4951
4952    #[test]
4953    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4954        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4955        let mut lexer = Lexer::new(source);
4956
4957        let token = lexer.next_lexed_token().unwrap();
4958        assert_eq!(token.kind, TokenKind::QuotedWord);
4959        assert_eq!(
4960            token.word_text(),
4961            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4962        );
4963    }
4964
4965    #[test]
4966    fn test_mixed_word_keeps_segment_kinds() {
4967        let source = r#"foo"bar"'baz'"#;
4968        let mut lexer = Lexer::new(source);
4969
4970        let token = lexer.next_lexed_token().unwrap();
4971        assert_eq!(token.kind, TokenKind::Word);
4972
4973        let word = token.word().unwrap();
4974        let segments: Vec<_> = word
4975            .segments()
4976            .map(|segment| (segment.kind(), segment.as_str().to_string()))
4977            .collect();
4978
4979        assert_eq!(
4980            segments,
4981            vec![
4982                (LexedWordSegmentKind::Plain, "foo".to_string()),
4983                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4984                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4985            ]
4986        );
4987        assert_eq!(word.joined_text(), "foobarbaz");
4988        assert_eq!(
4989            word.segments()
4990                .next()
4991                .and_then(LexedWordSegment::span)
4992                .unwrap()
4993                .slice(source),
4994            "foo"
4995        );
4996    }
4997
4998    #[test]
4999    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5000        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5001
5002        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5003        let body = &source[..consumed];
5004
5005        assert!(body.contains("field, direction"));
5006        assert!(body.ends_with(')'));
5007    }
5008
5009    #[test]
5010    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5011        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5012
5013        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5014        let body = &source[..consumed];
5015
5016        assert!(body.contains("printf '%s' y"));
5017        assert!(body.ends_with(')'));
5018    }
5019
5020    #[test]
5021    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5022        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5023
5024        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5025        let body = &source[..consumed];
5026
5027        assert!(body.contains("printf %s 1,2"));
5028        assert!(body.ends_with(')'));
5029    }
5030
5031    #[test]
5032    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5033        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5034
5035        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5036        let body = &source[..consumed];
5037
5038        assert!(body.contains("field, direction"));
5039        assert!(body.ends_with(')'));
5040    }
5041
5042    #[test]
5043    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5044        let source = "printf %s ${x//foo/)},1)\"";
5045
5046        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5047        let body = &source[..consumed];
5048
5049        assert!(body.contains("${x//foo/)},1"));
5050        assert!(body.ends_with(')'));
5051    }
5052
5053    #[test]
5054    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5055        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5056
5057        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5058        let body = &source[..consumed];
5059
5060        assert!(body.contains("printf %s 1,2"));
5061        assert!(body.ends_with(')'));
5062    }
5063
5064    #[test]
5065    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5066        let source = "[[ \"$buf\" == (#b)(*) ]]";
5067        let index = source.find('#').expect("expected hash");
5068
5069        assert!(!hash_starts_comment(source, index));
5070    }
5071
5072    #[test]
5073    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5074        let source = "(#comment with )";
5075        let index = source.find('#').expect("expected hash");
5076
5077        assert!(hash_starts_comment(source, index));
5078    }
5079
5080    #[test]
5081    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5082        let source = "(( #c < 256 ))";
5083        let index = source.find('#').expect("expected hash");
5084
5085        assert!(!hash_starts_comment(source, index));
5086    }
5087
5088    #[test]
5089    fn test_hash_starts_comment_respects_quoted_double_parens() {
5090        let source = "printf '((' # comment";
5091        let index = source.find('#').expect("expected hash");
5092
5093        assert!(hash_starts_comment(source, index));
5094    }
5095
5096    #[test]
5097    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5098        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5099
5100        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5101        let body = &source[..consumed];
5102
5103        assert!(body.contains("printf %s 1,2"));
5104        assert!(body.ends_with(')'));
5105    }
5106
5107    #[test]
5108    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5109        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5110
5111        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5112        let body = &source[..consumed];
5113
5114        assert!(body.contains("printf %s 1,2"));
5115        assert!(body.ends_with(')'));
5116    }
5117
5118    #[test]
5119    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5120        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5121
5122        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5123        let body = &source[..consumed];
5124
5125        assert!(body.contains("printf %s 1,2"));
5126        assert!(body.ends_with(')'));
5127    }
5128
5129    #[test]
5130    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5131        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5132
5133        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5134        let body = &source[..consumed];
5135
5136        assert!(body.contains("printf %s 1,2"));
5137        assert!(body.ends_with("))"));
5138    }
5139
5140    #[test]
5141    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5142        let source = "printf %s 1,2; echo case in)\"";
5143
5144        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5145        let body = &source[..consumed];
5146
5147        assert!(body.contains("echo case in"));
5148        assert!(body.ends_with(')'));
5149    }
5150
5151    #[test]
5152    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5153        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5154
5155        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5156        let body = &source[..consumed];
5157
5158        assert!(body.contains("$'a\\'b'"));
5159        assert!(body.contains("printf %s 1,2"));
5160        assert!(body.ends_with(')'));
5161    }
5162
5163    #[test]
5164    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5165        let source = "printf %s `echo foo)`; printf %s ok)\"";
5166
5167        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5168        let body = &source[..consumed];
5169
5170        assert!(body.contains("`echo foo)`"));
5171        assert!(body.contains("printf %s ok"));
5172        assert!(body.ends_with(')'));
5173    }
5174
5175    #[test]
5176    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5177        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5178
5179        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5180        let body = &source[..consumed];
5181
5182        assert!(body.contains("${x/`echo }`/foo)},1"));
5183        assert!(body.ends_with(')'));
5184    }
5185
5186    #[test]
5187    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5188     {
5189        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5190
5191        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5192        let body = &source[..consumed];
5193
5194        assert!(body.contains("${x/<(echo })/foo)},1"));
5195        assert!(body.ends_with(')'));
5196    }
5197
5198    #[test]
5199    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5200        let source = "printf %s 1,2; echo case in)";
5201
5202        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5203        let body = &source[..consumed];
5204
5205        assert_eq!(body, source);
5206    }
5207
5208    #[test]
5209    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5210        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5211
5212        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5213        let body = &source[..consumed];
5214
5215        assert_eq!(body, source);
5216    }
5217
5218    #[test]
5219    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5220        let source = "printf %s `echo foo)`; printf %s ok)";
5221
5222        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5223        let body = &source[..consumed];
5224
5225        assert_eq!(body, source);
5226    }
5227
5228    #[test]
5229    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5230        let source = "echo \"$line\" | cut -d' ' -f2-)";
5231
5232        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5233        let body = &source[..consumed];
5234
5235        assert_eq!(body, source);
5236    }
5237
5238    #[test]
5239    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5240        let source = "echo \"${@}\" | tr -d '[:space:]')";
5241
5242        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5243        let body = &source[..consumed];
5244
5245        assert_eq!(body, source);
5246    }
5247
5248    #[test]
5249    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5250        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5251
5252        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5253        let body = &source[..consumed];
5254
5255        assert_eq!(body, source);
5256    }
5257
5258    #[test]
5259    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5260        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5261
5262        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5263        let body = &source[..consumed];
5264
5265        assert_eq!(body, source);
5266    }
5267
5268    #[test]
5269    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5270        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5271        let mut lexer = Lexer::new(source);
5272
5273        let first = lexer.next_lexed_token().expect("expected first token");
5274        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5275        assert_eq!(first.word_string().as_deref(), Some("echo"));
5276
5277        let second = lexer.next_lexed_token().expect("expected second token");
5278        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5279        assert_eq!(
5280            second.word_string().as_deref(),
5281            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5282        );
5283    }
5284
5285    #[test]
5286    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5287        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5288        let start = source.find("$(").expect("expected command substitution") + 2;
5289        let consumed =
5290            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5291        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5292    }
5293
5294    #[test]
5295    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5296        let source = "echo $(echo $(basename $filename .fuzz))";
5297        let start = source.find("$(").expect("expected command substitution") + 2;
5298        let consumed =
5299            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5300        assert_eq!(
5301            &source[start..start + consumed],
5302            "echo $(basename $filename .fuzz))"
5303        );
5304    }
5305
5306    #[test]
5307    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5308        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5309        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5310        assert_eq!(consumed, source.len());
5311    }
5312
5313    #[test]
5314    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5315        let source = "'foo'bar";
5316        let mut lexer = Lexer::new(source);
5317
5318        let token = lexer.next_lexed_token().unwrap();
5319        assert_eq!(token.kind, TokenKind::LiteralWord);
5320
5321        let word = token.word().unwrap();
5322        let segments: Vec<_> = word
5323            .segments()
5324            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5325            .collect();
5326
5327        assert_eq!(
5328            segments,
5329            vec![
5330                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5331                (LexedWordSegmentKind::Plain, "bar".to_string()),
5332            ]
5333        );
5334        assert_eq!(word.joined_text(), "foobar");
5335        assert_eq!(
5336            word.segments()
5337                .nth(1)
5338                .and_then(LexedWordSegment::span)
5339                .unwrap()
5340                .slice(source),
5341            "bar"
5342        );
5343    }
5344
5345    #[test]
5346    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5347        let source = "$(printf hi)";
5348        let mut lexer = Lexer::new(source);
5349
5350        let token = lexer.next_lexed_token().unwrap();
5351        assert_eq!(token.kind, TokenKind::Word);
5352
5353        let word = token.word().unwrap();
5354        let segment = word.single_segment().unwrap();
5355        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5356        assert_eq!(segment.as_str(), source);
5357        assert_eq!(segment.span().unwrap().slice(source), source);
5358    }
5359
5360    #[test]
5361    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5362        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5363        let mut lexer = Lexer::new(source);
5364
5365        let token = lexer.next_lexed_token().unwrap();
5366        assert_eq!(token.kind, TokenKind::Word);
5367
5368        let word = token.word().unwrap();
5369        let segment = word.single_segment().unwrap();
5370        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5371        assert_eq!(segment.as_str(), source);
5372        assert_eq!(segment.span().unwrap().slice(source), source);
5373    }
5374
5375    #[test]
5376    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5377        let source = "\"foo\"$(printf hi)";
5378        let mut lexer = Lexer::new(source);
5379
5380        let token = lexer.next_lexed_token().unwrap();
5381        assert_eq!(token.kind, TokenKind::Word);
5382
5383        let word = token.word().unwrap();
5384        let continuation = word.segments().nth(1).unwrap();
5385        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5386        assert_eq!(continuation.as_str(), "$(printf hi)");
5387        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5388    }
5389
5390    #[test]
5391    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5392        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5393        let mut lexer = Lexer::new(source);
5394
5395        let token = lexer.next_lexed_token().unwrap();
5396        assert_eq!(token.kind, TokenKind::QuotedWord);
5397
5398        let word = token.word().unwrap();
5399        let segment = word.single_segment().unwrap();
5400        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5401        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5402        assert_eq!(
5403            segment.span().unwrap().slice(source),
5404            "${arr[$RANDOM % ${#arr[@]}]}"
5405        );
5406    }
5407
5408    #[test]
5409    fn test_ansi_c_control_escape_can_consume_quote() {
5410        let mut lexer = Lexer::new("echo $'\\c''");
5411
5412        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5413        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5414        assert!(lexer.next_lexed_token().is_none());
5415    }
5416
5417    #[test]
5418    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5419        let source = r#"out_line="${out_line//'"'/'\"'}"
5420"#;
5421        let mut lexer = Lexer::new(source);
5422
5423        assert_next_token(
5424            &mut lexer,
5425            TokenKind::Word,
5426            Some(r#"out_line=${out_line//'"'/'"'}"#),
5427        );
5428        assert_next_token(&mut lexer, TokenKind::Newline, None);
5429        assert!(lexer.next_lexed_token().is_none());
5430    }
5431
5432    #[test]
5433    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5434        let source = r#"out_line="${out_line//'"'/'\"'}"
5435echo "Error: Missing python3!"
5436cat << 'EOF' > "${pywrapper}"
5437import os
5438EOF
5439"#;
5440        let mut lexer = Lexer::new(source);
5441
5442        assert_next_token(
5443            &mut lexer,
5444            TokenKind::Word,
5445            Some(r#"out_line=${out_line//'"'/'"'}"#),
5446        );
5447        assert_next_token(&mut lexer, TokenKind::Newline, None);
5448        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5449        assert_next_token(
5450            &mut lexer,
5451            TokenKind::QuotedWord,
5452            Some("Error: Missing python3!"),
5453        );
5454        assert_next_token(&mut lexer, TokenKind::Newline, None);
5455        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5456        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5457        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5458        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5459        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5460    }
5461
5462    #[test]
5463    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5464        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5465        let mut lexer = Lexer::new(source);
5466
5467        let token = lexer.next_lexed_token().unwrap();
5468        assert_eq!(token.kind, TokenKind::Word);
5469        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5470        assert!(token.source_slice(source).is_none());
5471        assert_eq!(
5472            token.word_string().as_deref(),
5473            Some("crypt=${crypt//\\/\\\\}")
5474        );
5475        assert_next_token(&mut lexer, TokenKind::Newline, None);
5476        assert!(lexer.next_lexed_token().is_none());
5477    }
5478
5479    #[test]
5480    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5481        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5482        let mut lexer = Lexer::new(source);
5483
5484        assert_next_token(
5485            &mut lexer,
5486            TokenKind::Word,
5487            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5488        );
5489        assert_next_token(&mut lexer, TokenKind::Newline, None);
5490        assert_next_token(&mut lexer, TokenKind::Newline, None);
5491        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5492        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5493        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5494        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5495        assert_next_token(&mut lexer, TokenKind::Newline, None);
5496        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5497        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5498        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5499        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5500        assert_next_token(&mut lexer, TokenKind::Newline, None);
5501        assert_next_token(
5502            &mut lexer,
5503            TokenKind::Word,
5504            Some(
5505                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5506            ),
5507        );
5508        assert_next_token(&mut lexer, TokenKind::Newline, None);
5509        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5510        assert_next_token(&mut lexer, TokenKind::Newline, None);
5511        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5512        assert_next_token(&mut lexer, TokenKind::Newline, None);
5513        assert!(lexer.next_lexed_token().is_none());
5514    }
5515
5516    #[test]
5517    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5518        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5519        let mut lexer = Lexer::new(source);
5520
5521        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5522        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5523        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5524        assert_next_token(&mut lexer, TokenKind::Newline, None);
5525        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5526        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5527        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5528        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5529        assert_next_token(&mut lexer, TokenKind::Newline, None);
5530        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5531        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5532        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5533        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5534        assert_next_token(&mut lexer, TokenKind::Newline, None);
5535        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5536        assert_next_token(&mut lexer, TokenKind::Newline, None);
5537        assert!(lexer.next_lexed_token().is_none());
5538    }
5539
5540    #[test]
5541    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5542        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5543        let mut lexer = Lexer::new(source);
5544
5545        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5546        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5547        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5548        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5549        assert_next_token(&mut lexer, TokenKind::And, None);
5550        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5551        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5552        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5553        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5554        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5555        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5556        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5557        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5558        assert_next_token(&mut lexer, TokenKind::Newline, None);
5559        assert!(lexer.next_lexed_token().is_none());
5560    }
5561
5562    #[test]
5563    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5564        let source = "echo -{$(echo a),b}-\n";
5565        let mut lexer = Lexer::new(source);
5566
5567        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5568        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5569        assert_next_token(&mut lexer, TokenKind::Newline, None);
5570        assert!(lexer.next_lexed_token().is_none());
5571    }
5572
5573    #[test]
5574    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5575        let source = "echo -{$((1 + 2)),b}-\n";
5576        let mut lexer = Lexer::new(source);
5577
5578        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5579        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5580        assert_next_token(&mut lexer, TokenKind::Newline, None);
5581        assert!(lexer.next_lexed_token().is_none());
5582    }
5583
5584    #[test]
5585    fn test_operators() {
5586        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5587
5588        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5589        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5590        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5591        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5592        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5593        assert_next_token(&mut lexer, TokenKind::And, None);
5594        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5595        assert_next_token(&mut lexer, TokenKind::Or, None);
5596        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5597        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5598        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5599        assert_next_token(&mut lexer, TokenKind::Background, None);
5600        assert!(lexer.next_lexed_token().is_none());
5601    }
5602
5603    #[test]
5604    fn test_double_left_bracket_requires_separator() {
5605        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5606
5607        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5608        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5609        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5610        assert_next_token(&mut lexer, TokenKind::Newline, None);
5611        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5612        assert_next_token(&mut lexer, TokenKind::Newline, None);
5613        assert!(lexer.next_lexed_token().is_none());
5614    }
5615
5616    #[test]
5617    fn test_redirects() {
5618        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5619
5620        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5621        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5622        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5623        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5624        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5625        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5626        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5627        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5628        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5629        let token = lexer.next_lexed_token().unwrap();
5630        assert_eq!(token.kind, TokenKind::Clobber);
5631        assert_eq!(token.fd_value(), Some(2));
5632        assert_eq!(token_text(&token, lexer.input), None);
5633        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5634        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5635        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5636        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5637        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5638        assert_next_token(&mut lexer, TokenKind::HereString, None);
5639        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5640        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5641        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5642        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5643        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5644    }
5645
5646    #[test]
5647    fn test_comment() {
5648        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5649
5650        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5651        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5652        assert_next_token(&mut lexer, TokenKind::Newline, None);
5653        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5654        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5655    }
5656
5657    #[test]
5658    fn test_comment_token_with_span() {
5659        let mut lexer = Lexer::new("# lead\necho hi # tail");
5660
5661        let comment = lexer.next_lexed_token_with_comments().unwrap();
5662        assert_eq!(comment.kind, TokenKind::Comment);
5663        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5664        assert_eq!(comment.span.start.line, 1);
5665        assert_eq!(comment.span.start.column, 1);
5666        assert_eq!(comment.span.end.line, 1);
5667        assert_eq!(comment.span.end.column, 7);
5668
5669        assert_next_token(&mut lexer, TokenKind::Newline, None);
5670        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5671        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5672
5673        let inline = lexer.next_lexed_token_with_comments().unwrap();
5674        assert_eq!(inline.kind, TokenKind::Comment);
5675        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5676        assert_eq!(inline.span.start.line, 2);
5677        assert_eq!(inline.span.start.column, 9);
5678    }
5679
5680    #[test]
5681    fn test_comment_token_preserves_hash_boundaries() {
5682        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5683
5684        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5685        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5686        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5687        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5688        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5689        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5690        assert!(lexer.next_lexed_token_with_comments().is_none());
5691    }
5692
5693    #[test]
5694    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5695        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5696
5697        let mut saw_comment = false;
5698        while let Some(token) = lexer.next_lexed_token_with_comments() {
5699            if token.kind == TokenKind::Comment {
5700                saw_comment = true;
5701                break;
5702            }
5703        }
5704
5705        assert!(
5706            !saw_comment,
5707            "zsh inline glob controls inside [[ ]] should not lex as comments"
5708        );
5709    }
5710
5711    #[test]
5712    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5713        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5714
5715        let mut saw_comment = false;
5716        while let Some(token) = lexer.next_lexed_token_with_comments() {
5717            if token.kind == TokenKind::Comment {
5718                saw_comment = true;
5719                break;
5720            }
5721        }
5722
5723        assert!(
5724            !saw_comment,
5725            "zsh arithmetic char literals inside (( )) should not lex as comments"
5726        );
5727    }
5728
5729    #[test]
5730    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5731        let mut lexer = Lexer::new(
5732            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5733        );
5734
5735        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5736        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5737        assert_next_token(
5738            &mut lexer,
5739            TokenKind::LiteralWord,
5740            Some("\\e]133;C;cmdline_url=%s\\a"),
5741        );
5742        assert_next_token(
5743            &mut lexer,
5744            TokenKind::QuotedWord,
5745            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5746        );
5747        assert_next_token(&mut lexer, TokenKind::Newline, None);
5748    }
5749
5750    #[test]
5751    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5752        let mut lexer = Lexer::new(
5753            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5754        );
5755
5756        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5757        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5758        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5759        assert_next_token(&mut lexer, TokenKind::Newline, None);
5760        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5761        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5762        assert_next_token(
5763            &mut lexer,
5764            TokenKind::LiteralWord,
5765            Some("\\e]133;C;cmdline_url=%s\\a"),
5766        );
5767        assert_next_token(
5768            &mut lexer,
5769            TokenKind::QuotedWord,
5770            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5771        );
5772        assert_next_token(&mut lexer, TokenKind::Newline, None);
5773        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5774        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5775        assert_next_token(&mut lexer, TokenKind::Newline, None);
5776    }
5777
5778    #[test]
5779    fn test_variable_words() {
5780        let mut lexer = Lexer::new("echo $HOME $USER");
5781
5782        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5783        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5784        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5785        assert!(lexer.next_lexed_token().is_none());
5786    }
5787
5788    #[test]
5789    fn test_pipeline_tokens() {
5790        let mut lexer = Lexer::new("echo hello | cat");
5791
5792        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5793        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5794        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5795        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5796        assert!(lexer.next_lexed_token().is_none());
5797    }
5798
5799    #[test]
5800    fn test_read_heredoc() {
5801        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5802        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5803        let content = lexer.read_heredoc("EOF", false);
5804        assert_eq!(content.content, "hello\nworld\n");
5805    }
5806
5807    #[test]
5808    fn test_read_heredoc_single_line() {
5809        let mut lexer = Lexer::new("\ntest\nEOF");
5810        let content = lexer.read_heredoc("EOF", false);
5811        assert_eq!(content.content, "test\n");
5812    }
5813
5814    #[test]
5815    fn test_read_heredoc_full_scenario() {
5816        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5817        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5818
5819        // Parser would read these tokens
5820        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5821        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5822        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5823
5824        // Now read heredoc content
5825        let content = lexer.read_heredoc("EOF", false);
5826        assert_eq!(content.content, "hello\nworld\n");
5827    }
5828
5829    #[test]
5830    fn test_read_heredoc_with_redirect() {
5831        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5832        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5833        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5834        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5835        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5836        let content = lexer.read_heredoc("EOF", false);
5837        assert_eq!(content.content, "hello\n");
5838        // The redirect tokens are now available from the lexer
5839        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5840        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5841    }
5842
5843    #[test]
5844    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5845        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
5846        let mut lexer = Lexer::new(source);
5847
5848        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5849        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5850        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5851
5852        let heredoc = lexer.read_heredoc("EOF", false);
5853        assert_eq!(heredoc.content, "hello\n");
5854
5855        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5856        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5857        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5858        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5859        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5860        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5861        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5862    }
5863
5864    #[test]
5865    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5866        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5867        let mut lexer = Lexer::new(source);
5868
5869        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5870        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5871        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5872
5873        let heredoc = lexer.read_heredoc("EOF", false);
5874        assert_eq!(heredoc.content, "1\n2\n3\n");
5875    }
5876
5877    #[test]
5878    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5879        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5880        let mut lexer = Lexer::new(source);
5881
5882        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5883        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5884        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5885
5886        let heredoc = lexer.read_heredoc("EOF", false);
5887        assert_eq!(heredoc.content, "body\n");
5888    }
5889
5890    #[test]
5891    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5892        let source = "cat <<EOF # note \\\nbody\nEOF\n";
5893        let mut lexer = Lexer::new(source);
5894
5895        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5896        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5897        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5898
5899        let heredoc = lexer.read_heredoc("EOF", false);
5900        assert_eq!(heredoc.content, "body\n");
5901    }
5902
5903    #[test]
5904    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5905        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5906        let mut lexer = Lexer::new(source);
5907
5908        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5909        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5910        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5911        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5912
5913        let heredoc = lexer.read_heredoc("EOF", false);
5914        assert_eq!(heredoc.content, "body\n");
5915
5916        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5917    }
5918
5919    #[test]
5920    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5921        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5922        let mut lexer = Lexer::new(source);
5923
5924        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5925        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5926        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5927
5928        let heredoc = lexer.read_heredoc("EOF", false);
5929        assert_eq!(heredoc.content, "1\n");
5930
5931        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5932        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5933    }
5934
5935    #[test]
5936    fn test_read_heredoc_with_redirect_preserves_following_spans() {
5937        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5938        let mut lexer = Lexer::new(source);
5939
5940        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5941        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5942        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5943
5944        let heredoc = lexer.read_heredoc("EOF", false);
5945        assert_eq!(heredoc.content, "hello\n");
5946
5947        let redirect = lexer.next_lexed_token_with_comments().unwrap();
5948        assert_eq!(redirect.kind, TokenKind::RedirectOut);
5949        assert_eq!(redirect.span.slice(source), ">");
5950
5951        let target = lexer.next_lexed_token_with_comments().unwrap();
5952        assert_eq!(target.kind, TokenKind::Word);
5953        assert_eq!(
5954            token_text(&target, lexer.input).as_deref(),
5955            Some("file.txt")
5956        );
5957        assert_eq!(target.span.slice(source), "file.txt");
5958
5959        let newline = lexer.next_lexed_token_with_comments().unwrap();
5960        assert_eq!(newline.kind, TokenKind::Newline);
5961        assert_eq!(newline.span.slice(source), "\n");
5962
5963        let comment = lexer.next_lexed_token_with_comments().unwrap();
5964        assert_eq!(comment.kind, TokenKind::Comment);
5965        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5966        assert_eq!(comment.span.slice(source), "# done");
5967    }
5968
5969    #[test]
5970    fn test_comment_with_unicode() {
5971        // Comment containing multi-byte UTF-8 characters
5972        let source = "# café résumé\necho ok";
5973        let mut lexer = Lexer::new(source);
5974
5975        let comment = lexer.next_lexed_token_with_comments().unwrap();
5976        assert_eq!(comment.kind, TokenKind::Comment);
5977        assert_eq!(
5978            token_text(&comment, lexer.input).as_deref(),
5979            Some(" café résumé")
5980        );
5981        // Span should cover exactly the comment bytes (including #)
5982        let start = comment.span.start.offset;
5983        let end = comment.span.end.offset;
5984        assert_eq!(start, 0);
5985        assert_eq!(&source[start..end], "# café résumé");
5986        assert!(source.is_char_boundary(start));
5987        assert!(source.is_char_boundary(end));
5988
5989        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5990        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5991    }
5992
5993    #[test]
5994    fn test_comment_with_cjk_characters() {
5995        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
5996        let source = "# 你好世界\necho ok";
5997        let mut lexer = Lexer::new(source);
5998
5999        let comment = lexer.next_lexed_token_with_comments().unwrap();
6000        assert_eq!(comment.kind, TokenKind::Comment);
6001        assert_eq!(
6002            token_text(&comment, lexer.input).as_deref(),
6003            Some(" 你好世界")
6004        );
6005        let start = comment.span.start.offset;
6006        let end = comment.span.end.offset;
6007        assert_eq!(&source[start..end], "# 你好世界");
6008        assert!(source.is_char_boundary(start));
6009        assert!(source.is_char_boundary(end));
6010    }
6011
6012    #[test]
6013    fn test_heredoc_with_comments_inside() {
6014        // Comments inside heredoc body should NOT appear as comment tokens
6015        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6016        let mut lexer = Lexer::new(source);
6017
6018        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6019        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6020        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6021
6022        let heredoc = lexer.read_heredoc("EOF", false);
6023        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6024
6025        // After heredoc, replayed line termination should appear before
6026        // tokens from following source lines.
6027        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6028        let comment = lexer.next_lexed_token_with_comments().unwrap();
6029        assert_eq!(comment.kind, TokenKind::Comment);
6030        assert_eq!(
6031            token_text(&comment, lexer.input).as_deref(),
6032            Some(" real comment")
6033        );
6034    }
6035
6036    #[test]
6037    fn test_heredoc_with_hash_in_variable() {
6038        // ${var#pattern} inside heredoc should not produce comment tokens
6039        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6040        let mut lexer = Lexer::new(source);
6041
6042        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6043        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6044        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6045
6046        let heredoc = lexer.read_heredoc("EOF", false);
6047        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6048    }
6049
6050    #[test]
6051    fn test_heredoc_span_does_not_leak() {
6052        // Heredoc content span must be within source bounds and must not
6053        // overlap with content before or after.
6054        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6055        let mut lexer = Lexer::new(source);
6056
6057        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6058        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6059        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6060
6061        let heredoc = lexer.read_heredoc("EOF", false);
6062        let start = heredoc.content_span.start.offset;
6063        let end = heredoc.content_span.end.offset;
6064        assert!(
6065            end <= source.len(),
6066            "heredoc span end ({end}) exceeds source length ({})",
6067            source.len()
6068        );
6069        assert_eq!(&source[start..end], "hello\nworld\n");
6070
6071        // Tokens after heredoc should still parse correctly
6072        assert_next_token(&mut lexer, TokenKind::Newline, None);
6073        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6074        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6075    }
6076
6077    #[test]
6078    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6079        let source = "\
6080cat <<\\_ACEOF
6081Use these variables to override the choices made by `configure' or to help
6082it to find libraries and programs with nonstandard names/locations.
6083_ACEOF
6084ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6085ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6086";
6087        let mut lexer = Lexer::new(source);
6088
6089        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6090        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6091        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6092        assert_eq!(delimiter.kind, TokenKind::Word);
6093        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6094
6095        let heredoc = lexer.read_heredoc("_ACEOF", false);
6096        assert_eq!(
6097            heredoc.content,
6098            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6099        );
6100
6101        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6102
6103        let first = lexer.next_lexed_token_with_comments().unwrap();
6104        assert_eq!(first.kind, TokenKind::Word);
6105        assert_eq!(
6106            first.span.slice(source),
6107            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6108        );
6109        let first_segments = first
6110            .word()
6111            .unwrap()
6112            .segments()
6113            .map(|segment| {
6114                (
6115                    segment.kind(),
6116                    segment.as_str().to_string(),
6117                    segment.span().map(|span| span.slice(source).to_string()),
6118                )
6119            })
6120            .collect::<Vec<_>>();
6121        assert_eq!(
6122            first_segments,
6123            vec![
6124                (
6125                    LexedWordSegmentKind::Plain,
6126                    "ac_dir_suffix=/".to_string(),
6127                    Some("ac_dir_suffix=/".to_string()),
6128                ),
6129                (
6130                    LexedWordSegmentKind::Plain,
6131                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6132                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6133                ),
6134            ]
6135        );
6136
6137        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6138
6139        let second = lexer.next_lexed_token_with_comments().unwrap();
6140        assert_eq!(second.kind, TokenKind::Word);
6141        assert_eq!(
6142            second.span.slice(source),
6143            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6144        );
6145        let second_segments = second
6146            .word()
6147            .unwrap()
6148            .segments()
6149            .map(|segment| {
6150                (
6151                    segment.kind(),
6152                    segment.as_str().to_string(),
6153                    segment.span().map(|span| span.slice(source).to_string()),
6154                )
6155            })
6156            .collect::<Vec<_>>();
6157        assert_eq!(
6158            second_segments,
6159            vec![
6160                (
6161                    LexedWordSegmentKind::Plain,
6162                    "ac_top_builddir_sub=".to_string(),
6163                    Some("ac_top_builddir_sub=".to_string()),
6164                ),
6165                (
6166                    LexedWordSegmentKind::Plain,
6167                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6168                    Some(
6169                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6170                            .to_string(),
6171                    ),
6172                ),
6173            ]
6174        );
6175    }
6176
6177    #[test]
6178    fn test_heredoc_with_unicode_content() {
6179        // Heredoc containing multi-byte characters; spans must be on char boundaries
6180        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6181        let mut lexer = Lexer::new(source);
6182
6183        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6184        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6185        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6186
6187        let heredoc = lexer.read_heredoc("EOF", false);
6188        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6189        let start = heredoc.content_span.start.offset;
6190        let end = heredoc.content_span.end.offset;
6191        assert!(
6192            source.is_char_boundary(start),
6193            "heredoc span start ({start}) not on char boundary"
6194        );
6195        assert!(
6196            source.is_char_boundary(end),
6197            "heredoc span end ({end}) not on char boundary"
6198        );
6199        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6200    }
6201
6202    #[test]
6203    fn test_assoc_compound_assignment() {
6204        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6205        // assignment as a single Word token
6206        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6207        assert_next_token(
6208            &mut lexer,
6209            TokenKind::Word,
6210            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6211        );
6212        assert!(lexer.next_lexed_token().is_none());
6213    }
6214
6215    #[test]
6216    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6217        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6218        let mut lexer = Lexer::new(source);
6219
6220        let token = lexer.next_lexed_token().unwrap();
6221        assert_eq!(token.kind, TokenKind::Word);
6222        assert_eq!(token.span.slice(source), source);
6223        assert!(lexer.next_lexed_token().is_none());
6224    }
6225
6226    #[test]
6227    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6228        let source = r#"foo\_bar@(baz|qux)"#;
6229        let mut lexer = Lexer::new(source);
6230
6231        let token = lexer.next_lexed_token().unwrap();
6232        assert_eq!(token.kind, TokenKind::Word);
6233        assert_eq!(token.span.slice(source), source);
6234        assert!(lexer.next_lexed_token().is_none());
6235    }
6236
6237    #[test]
6238    fn test_indexed_array_not_collapsed() {
6239        // arr=("hello world") should NOT be collapsed — parser handles
6240        // quoted elements token-by-token via the LeftParen path
6241        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6242        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6243        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6244    }
6245
6246    #[test]
6247    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6248        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6249        let mut lexer = Lexer::new(source);
6250
6251        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6252        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6253
6254        let token = lexer.next_lexed_token().unwrap();
6255        assert_eq!(token.kind, TokenKind::Word);
6256        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6257
6258        let word = token.word().unwrap();
6259        let segments: Vec<_> = word
6260            .segments()
6261            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6262            .collect();
6263        assert_eq!(
6264            segments,
6265            vec![
6266                (
6267                    LexedWordSegmentKind::DoubleQuoted,
6268                    "$plugin_dir".to_string()
6269                ),
6270                (LexedWordSegmentKind::Plain, "/*".to_string()),
6271                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6272            ]
6273        );
6274
6275        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6276        assert!(lexer.next_lexed_token().is_none());
6277    }
6278
6279    #[test]
6280    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6281        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6282        let mut lexer = Lexer::new(source);
6283
6284        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6285        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6286
6287        let token = lexer.next_lexed_token().unwrap();
6288        assert_eq!(token.kind, TokenKind::Word);
6289        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6290
6291        let word = token.word().unwrap();
6292        let segments: Vec<_> = word
6293            .segments()
6294            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6295            .collect();
6296        assert_eq!(
6297            segments,
6298            vec![
6299                (
6300                    LexedWordSegmentKind::DoubleQuoted,
6301                    "$__GREP_CACHE_FILE".to_string()
6302                ),
6303                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6304            ]
6305        );
6306
6307        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6308        assert!(lexer.next_lexed_token().is_none());
6309    }
6310
6311    #[test]
6312    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6313        let source = r#"$dir/${~pats}(N)"#;
6314        let mut lexer = Lexer::new(source);
6315
6316        let token = lexer.next_lexed_token().unwrap();
6317        assert_eq!(token.kind, TokenKind::Word);
6318        assert_eq!(token.span.slice(source), source);
6319        assert!(lexer.next_lexed_token().is_none());
6320    }
6321
6322    #[test]
6323    fn test_dollar_word_does_not_absorb_function_parens() {
6324        let mut lexer = Lexer::new(r#"foo$x()"#);
6325
6326        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6327        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6328        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6329        assert!(lexer.next_lexed_token().is_none());
6330    }
6331
6332    #[test]
6333    fn test_command_substitution_word_does_not_absorb_function_parens() {
6334        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6335
6336        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6337        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6338        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6339        assert!(lexer.next_lexed_token().is_none());
6340    }
6341
6342    /// Regression test for fuzz crash: single digit at EOF should not panic
6343    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6344    #[test]
6345    fn test_digit_at_eof_no_panic() {
6346        // A lone digit with no following redirect operator must not panic
6347        let mut lexer = Lexer::new("2");
6348        let token = lexer.next_lexed_token();
6349        assert!(token.is_some());
6350    }
6351
6352    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6353    #[test]
6354    fn test_nested_brace_expansion_single_token() {
6355        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6356        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6357        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6358        // No more tokens — everything was consumed
6359        assert!(lexer.next_lexed_token().is_none());
6360    }
6361
6362    /// Simple ${var} still works after brace depth change.
6363    #[test]
6364    fn test_simple_brace_expansion_unchanged() {
6365        let mut lexer = Lexer::new("${foo}");
6366        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6367        assert!(lexer.next_lexed_token().is_none());
6368    }
6369
6370    #[test]
6371    fn test_nvm_fixture_lexes_without_stalling() {
6372        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6373        let mut lexer = Lexer::new(input);
6374        let mut tokens = 0usize;
6375
6376        while lexer.next_lexed_token().is_some() {
6377            tokens += 1;
6378            assert!(
6379                tokens < 100_000,
6380                "lexer should continue making progress on the nvm fixture"
6381            );
6382        }
6383
6384        assert!(tokens > 0, "nvm fixture should produce at least one token");
6385    }
6386
6387    #[test]
6388    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6389        let input = concat!(
6390            "case \"${_input_type:-}\" in\n",
6391            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6392            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6393            "esac\n",
6394        );
6395
6396        assert_non_newline_tokens_stay_on_one_line(input);
6397
6398        let mut lexer = Lexer::new(input);
6399        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6400            .map(|token| (token.kind, token_text(&token, input)))
6401            .collect::<Vec<_>>();
6402        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6403        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6404    }
6405
6406    #[test]
6407    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6408        let input = concat!(
6409            "case $2 in\n",
6410            "  cygwin*) bin='cygwin32/bin' ;|\n",
6411            "esac\n",
6412        );
6413
6414        let mut lexer = Lexer::new(input);
6415        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6416            .map(|token| (token.kind, token_text(&token, input)))
6417            .collect::<Vec<_>>();
6418
6419        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6420        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6421        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6422    }
6423
6424    #[test]
6425    fn test_inline_if_with_array_append_stays_line_local() {
6426        let input = concat!(
6427            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6428            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6429        );
6430
6431        assert_non_newline_tokens_stay_on_one_line(input);
6432    }
6433
6434    #[test]
6435    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6436        let source = "unsetopt interactive_comments\n#literal\n";
6437        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6438        let mut lexer = Lexer::with_profile(source, &profile);
6439
6440        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6441        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6442        assert_next_token(&mut lexer, TokenKind::Newline, None);
6443        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6444    }
6445
6446    #[test]
6447    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6448        let source = "setopt rc_quotes\nprint 'a''b'\n";
6449        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6450        let mut lexer = Lexer::with_profile(source, &profile);
6451
6452        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6453        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6454        assert_next_token(&mut lexer, TokenKind::Newline, None);
6455        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6456        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6457    }
6458
6459    #[test]
6460    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6461        let source = "setopt ignore_braces\n{ echo }\n";
6462        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6463        let mut lexer = Lexer::with_profile(source, &profile);
6464
6465        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6466        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6467        assert_next_token(&mut lexer, TokenKind::Newline, None);
6468        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6469        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6470        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6471    }
6472
6473    #[test]
6474    fn test_heredoc_in_arithmetic_fuzz_crash() {
6475        // Regression test: the fuzzer found that heredoc re-injection inside
6476        // arithmetic context can push self.offset past self.input.len(),
6477        // causing a panic in read_unquoted_segment's borrowed-slice path.
6478        let data: &[u8] = &[
6479            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6480            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6481            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6482            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6483            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6484            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6485            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6486            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6487            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6488            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6489            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6490            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6491            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6492            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6493            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6494            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6495            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6496            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6497            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6498        ];
6499        let input = std::str::from_utf8(data).unwrap();
6500        let script = format!("echo $(({input}))\n");
6501        // Must not panic.
6502        let _ = crate::parser::Parser::new(&script).parse();
6503    }
6504}
shuck_parser/parser/lexer.rs

shuck_parser/parser/
lexer.rs