shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335    /// Token kind used by the parser.
336    pub kind: TokenKind,
337    /// Source span covered by the token.
338    pub span: Span,
339    pub(crate) flags: TokenFlags,
340    payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345        match kind {
346            TokenKind::Word => LexedWordSegmentKind::Plain,
347            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349            _ => LexedWordSegmentKind::Composite,
350        }
351    }
352
353    pub(crate) fn punctuation(kind: TokenKind) -> Self {
354        Self {
355            kind,
356            span: Span::new(),
357            flags: TokenFlags::empty(),
358            payload: TokenPayload::None,
359        }
360    }
361
362    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363        let flags = if word.has_cooked_text() {
364            TokenFlags::cooked_text()
365        } else {
366            TokenFlags::empty()
367        };
368
369        Self {
370            kind,
371            span: Span::new(),
372            flags,
373            payload: TokenPayload::Word(word),
374        }
375    }
376
377    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378        Self::with_word_payload(
379            kind,
380            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381        )
382    }
383
384    fn owned_word(kind: TokenKind, text: String) -> Self {
385        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386    }
387
388    fn comment() -> Self {
389        Self {
390            kind: TokenKind::Comment,
391            span: Span::new(),
392            flags: TokenFlags::empty(),
393            payload: TokenPayload::None,
394        }
395    }
396
397    fn fd(kind: TokenKind, fd: i32) -> Self {
398        Self {
399            kind,
400            span: Span::new(),
401            flags: TokenFlags::empty(),
402            payload: TokenPayload::Fd(fd),
403        }
404    }
405
406    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407        Self {
408            kind,
409            span: Span::new(),
410            flags: TokenFlags::empty(),
411            payload: TokenPayload::FdPair(src_fd, dst_fd),
412        }
413    }
414
415    fn error(kind: LexerErrorKind) -> Self {
416        Self {
417            kind: TokenKind::Error,
418            span: Span::new(),
419            flags: TokenFlags::empty(),
420            payload: TokenPayload::Error(kind),
421        }
422    }
423
424    pub(crate) fn with_span(mut self, span: Span) -> Self {
425        self.span = span;
426        self
427    }
428
429    pub(crate) fn rebased(mut self, base: Position) -> Self {
430        self.span = self.span.rebased(base);
431        self.payload = match self.payload {
432            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433            payload => payload,
434        };
435        self
436    }
437
438    pub(crate) fn with_synthetic_flag(mut self) -> Self {
439        self.flags = self.flags.with_synthetic();
440        self
441    }
442
443    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444        let payload = match self.payload {
445            TokenPayload::None => TokenPayload::None,
446            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449            TokenPayload::Error(kind) => TokenPayload::Error(kind),
450        };
451
452        LexedToken {
453            kind: self.kind,
454            span: self.span,
455            flags: self.flags,
456            payload,
457        }
458    }
459
460    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461        let payload = match self.payload {
462            TokenPayload::None => TokenPayload::None,
463            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466            TokenPayload::Error(kind) => TokenPayload::Error(kind),
467        };
468
469        LexedToken {
470            kind: self.kind,
471            span: self.span,
472            flags: self.flags,
473            payload,
474        }
475    }
476
477    /// Borrow the token text when it is a single-segment word token.
478    pub fn word_text(&self) -> Option<&str> {
479        self.kind
480            .is_word_like()
481            .then_some(())
482            .and_then(|_| match &self.payload {
483                TokenPayload::Word(word) => word.text(),
484                _ => None,
485            })
486    }
487
488    /// Return an owned string containing the token's word text.
489    pub fn word_string(&self) -> Option<String> {
490        self.kind
491            .is_word_like()
492            .then_some(())
493            .and_then(|_| match &self.payload {
494                TokenPayload::Word(word) => Some(word.joined_text()),
495                _ => None,
496            })
497    }
498
499    /// Borrow the structured word payload for word-like tokens.
500    pub fn word(&self) -> Option<&LexedWord<'a>> {
501        match &self.payload {
502            TokenPayload::Word(word) => Some(word),
503            _ => None,
504        }
505    }
506
507    /// Borrow the original source slice when the token is source-backed and uncooked.
508    pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510            return None;
511        }
512
513        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514            .then(|| &source[self.span.start.offset..self.span.end.offset])
515    }
516
517    /// Return the file-descriptor payload for redirection tokens that carry one.
518    pub fn fd_value(&self) -> Option<i32> {
519        match self.payload {
520            TokenPayload::Fd(fd) => Some(fd),
521            _ => None,
522        }
523    }
524
525    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
526    pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527        match self.payload {
528            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529            _ => None,
530        }
531    }
532
533    /// Return the lexer error payload when this token represents `TokenKind::Error`.
534    pub fn error_kind(&self) -> Option<LexerErrorKind> {
535        match self.payload {
536            TokenPayload::Error(kind) => Some(kind),
537            _ => None,
538        }
539    }
540}
541
542/// Result of reading a heredoc body from the source.
543#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545    /// Decoded heredoc content.
546    pub content: String,
547    /// Source span covering the heredoc body content.
548    pub content_span: Span,
549}
550
551/// Maximum nesting depth for command substitution in the lexer.
552/// Prevents stack overflow from deeply nested $() patterns.
553const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557    rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561    fn new(source: &'a str) -> Self {
562        Self { rest: source }
563    }
564
565    fn first(&self) -> Option<char> {
566        self.rest.chars().next()
567    }
568
569    fn second(&self) -> Option<char> {
570        let mut chars = self.rest.chars();
571        chars.next()?;
572        chars.next()
573    }
574
575    fn third(&self) -> Option<char> {
576        let mut chars = self.rest.chars();
577        chars.next()?;
578        chars.next()?;
579        chars.next()
580    }
581
582    fn bump(&mut self) -> Option<char> {
583        let ch = self.first()?;
584        self.rest = &self.rest[ch.len_utf8()..];
585        Some(ch)
586    }
587
588    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589        let start = self.rest;
590        let mut end = 0;
591
592        for ch in start.chars() {
593            if !predicate(ch) {
594                break;
595            }
596            end += ch.len_utf8();
597        }
598
599        self.rest = &start[end..];
600        &start[..end]
601    }
602
603    fn rest(&self) -> &'a str {
604        self.rest
605    }
606
607    fn skip_bytes(&mut self, count: usize) {
608        self.rest = &self.rest[count..];
609    }
610
611    fn find_byte(&self, byte: u8) -> Option<usize> {
612        memchr(byte, self.rest.as_bytes())
613    }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618    source: &'a str,
619    line_starts: Arc<[usize]>,
620    cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626    pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630    fn new(source: &'a str) -> Self {
631        let mut line_starts =
632            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633        line_starts.push(0);
634        line_starts.extend(
635            source
636                .bytes()
637                .enumerate()
638                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639        );
640
641        Self {
642            source,
643            line_starts: line_starts.into(),
644            cached: Position::new(),
645        }
646    }
647
648    fn position(&mut self, offset: usize) -> Position {
649        if offset == self.cached.offset {
650            return self.cached;
651        }
652
653        let position = if offset > self.cached.offset && offset <= self.source.len() {
654            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655        } else {
656            self.position_uncached(offset)
657        };
658        self.cached = position;
659        position
660    }
661
662    fn position_uncached(&self, offset: usize) -> Position {
663        let offset = offset.min(self.source.len());
664        let line_index = self
665            .line_starts
666            .partition_point(|start| *start <= offset)
667            .saturating_sub(1);
668        let line_start = self.line_starts[line_index];
669        let line_text = &self.source[line_start..offset];
670        let column = if line_text.is_ascii() {
671            line_text.len() + 1
672        } else {
673            line_text.chars().count() + 1
674        };
675
676        Position {
677            line: line_index + 1,
678            column,
679            offset,
680        }
681    }
682
683    fn advance_from(mut position: Position, text: &str) -> Position {
684        position.offset += text.len();
685        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686        if newline_count == 0 {
687            position.column += if text.is_ascii() {
688                text.len()
689            } else {
690                text.chars().count()
691            };
692            return position;
693        }
694
695        position.line += newline_count;
696        let tail_start = memrchr(b'\n', text.as_bytes())
697            .map(|index| index + 1)
698            .unwrap_or_default();
699        let tail = &text[tail_start..];
700        position.column = if tail.is_ascii() {
701            tail.len() + 1
702        } else {
703            tail.chars().count() + 1
704        };
705        position
706    }
707}
708
709/// Lexer for bash scripts.
710#[derive(Clone)]
711pub struct Lexer<'a> {
712    #[allow(dead_code)] // Stored for error reporting in future
713    input: &'a str,
714    /// Current byte offset in the input/reinjected stream.
715    offset: usize,
716    cursor: Cursor<'a>,
717    position_map: PositionMap<'a>,
718    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
719    /// Consumed before `cursor`.
720    reinject_buf: VecDeque<char>,
721    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
722    reinject_resume_offset: Option<usize>,
723    /// Maximum allowed nesting depth for command substitution
724    max_subst_depth: usize,
725    initial_zsh_options: Option<ZshOptionState>,
726    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727    zsh_timeline_index: usize,
728    #[cfg(feature = "benchmarking")]
729    benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733    /// Create a new lexer for the given input.
734    pub fn new(input: &'a str) -> Self {
735        Self::with_max_subst_depth_and_profile(
736            input,
737            DEFAULT_MAX_SUBST_DEPTH,
738            &ShellProfile::native(super::ShellDialect::Bash),
739            None,
740        )
741    }
742
743    /// Create a new lexer with a custom max substitution nesting depth.
744    /// Limits recursion in read_command_subst_into().
745    pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746        Self::with_max_subst_depth_and_profile(
747            input,
748            max_depth,
749            &ShellProfile::native(super::ShellDialect::Bash),
750            None,
751        )
752    }
753
754    /// Create a new lexer using the provided shell profile.
755    pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757            .then(|| ZshOptionTimeline::build(input, shell_profile))
758            .flatten()
759            .map(Arc::new);
760        Self::with_max_subst_depth_and_profile(
761            input,
762            DEFAULT_MAX_SUBST_DEPTH,
763            shell_profile,
764            zsh_timeline,
765        )
766    }
767
768    pub(crate) fn with_max_subst_depth_and_profile(
769        input: &'a str,
770        max_depth: usize,
771        shell_profile: &ShellProfile,
772        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773    ) -> Self {
774        Self {
775            input,
776            offset: 0,
777            cursor: Cursor::new(input),
778            position_map: PositionMap::new(input),
779            reinject_buf: VecDeque::new(),
780            reinject_resume_offset: None,
781            max_subst_depth: max_depth,
782            initial_zsh_options: shell_profile.zsh_options().cloned(),
783            zsh_timeline,
784            zsh_timeline_index: 0,
785            #[cfg(feature = "benchmarking")]
786            benchmark_counters: None,
787        }
788    }
789
790    /// Get the current position in the input.
791    pub fn position(&self) -> Position {
792        self.position_map.position_uncached(self.offset)
793    }
794
795    fn current_position(&mut self) -> Position {
796        #[cfg(feature = "benchmarking")]
797        self.maybe_record_current_position_call();
798        self.position_map.position(self.offset)
799    }
800
801    #[cfg(feature = "benchmarking")]
802    pub(crate) fn enable_benchmark_counters(&mut self) {
803        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804    }
805
806    #[cfg(feature = "benchmarking")]
807    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808        self.benchmark_counters.unwrap_or_default()
809    }
810
811    #[cfg(feature = "benchmarking")]
812    fn maybe_record_current_position_call(&mut self) {
813        if let Some(counters) = &mut self.benchmark_counters {
814            counters.current_position_calls += 1;
815        }
816    }
817
818    fn sync_offset_to_cursor(&mut self) {
819        if self.reinject_buf.is_empty()
820            && let Some(offset) = self.reinject_resume_offset.take()
821        {
822            self.offset = offset;
823        }
824    }
825
826    /// Get the next token kind from the input without decoding or materializing
827    /// any payload text.
828    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829        self.next_lexed_token().map(|token| token.kind)
830    }
831
832    fn peek_char(&mut self) -> Option<char> {
833        self.sync_offset_to_cursor();
834        if let Some(&ch) = self.reinject_buf.front() {
835            Some(ch)
836        } else {
837            self.cursor.first()
838        }
839    }
840
841    fn advance(&mut self) -> Option<char> {
842        self.sync_offset_to_cursor();
843        let ch = if !self.reinject_buf.is_empty() {
844            self.reinject_buf.pop_front()
845        } else {
846            self.cursor.bump()
847        };
848        if let Some(c) = ch {
849            self.offset += c.len_utf8();
850        }
851        ch
852    }
853
854    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855        self.reinject_buf
856            .iter()
857            .copied()
858            .chain(self.cursor.rest().chars())
859    }
860
861    fn second_char(&self) -> Option<char> {
862        match self.reinject_buf.len() {
863            0 => self.cursor.second(),
864            1 => self.cursor.first(),
865            _ => self.reinject_buf.get(1).copied(),
866        }
867    }
868
869    fn third_char(&self) -> Option<char> {
870        match self.reinject_buf.len() {
871            0 => self.cursor.third(),
872            1 => self.cursor.second(),
873            2 => self.cursor.first(),
874            _ => self.reinject_buf.get(2).copied(),
875        }
876    }
877
878    fn fourth_char(&self) -> Option<char> {
879        match self.reinject_buf.len() {
880            0 => self.cursor.rest().chars().nth(3),
881            1 => self.cursor.third(),
882            2 => self.cursor.second(),
883            3 => self.cursor.first(),
884            _ => self.reinject_buf.get(3).copied(),
885        }
886    }
887
888    fn consume_source_bytes(&mut self, byte_len: usize) {
889        debug_assert!(self.reinject_buf.is_empty());
890        self.sync_offset_to_cursor();
891        self.offset += byte_len;
892        self.cursor.skip_bytes(byte_len);
893    }
894
895    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896        debug_assert!(self.reinject_buf.is_empty());
897        self.offset += byte_len;
898    }
899
900    fn consume_ascii_chars(&mut self, count: usize) {
901        if self.reinject_buf.is_empty() {
902            self.consume_source_bytes(count);
903            return;
904        }
905
906        for _ in 0..count {
907            self.advance();
908        }
909    }
910
911    fn source_horizontal_whitespace_len(&self) -> usize {
912        self.cursor
913            .rest()
914            .as_bytes()
915            .iter()
916            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917            .count()
918    }
919
920    fn source_ascii_plain_word_len(&self) -> usize {
921        self.cursor
922            .rest()
923            .as_bytes()
924            .iter()
925            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926            .count()
927    }
928
929    fn find_double_quote_special(source: &str) -> Option<usize> {
930        source
931            .as_bytes()
932            .iter()
933            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934    }
935
936    fn ensure_capture_from_source(
937        &self,
938        capture: &mut Option<String>,
939        start: Position,
940        end: Position,
941    ) {
942        if capture.is_none() {
943            *capture = Some(self.input[start.offset..end.offset].to_string());
944        }
945    }
946
947    fn push_capture_char(capture: &mut Option<String>, ch: char) {
948        if let Some(text) = capture.as_mut() {
949            text.push(ch);
950        }
951    }
952
953    fn push_capture_str(capture: &mut Option<String>, text: &str) {
954        if let Some(current) = capture.as_mut() {
955            current.push_str(text);
956        }
957    }
958
959    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960        if let Some(timeline) = self.zsh_timeline.as_ref() {
961            while self.zsh_timeline_index < timeline.entries.len()
962                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963            {
964                self.zsh_timeline_index += 1;
965            }
966            return if self.zsh_timeline_index == 0 {
967                self.initial_zsh_options.as_ref()
968            } else {
969                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970            };
971        }
972
973        self.initial_zsh_options.as_ref()
974    }
975
976    fn comments_enabled(&mut self) -> bool {
977        !self
978            .current_zsh_options()
979            .is_some_and(|options| options.interactive_comments.is_definitely_off())
980    }
981
982    fn rc_quotes_enabled(&mut self) -> bool {
983        self.current_zsh_options()
984            .is_some_and(|options| options.rc_quotes.is_definitely_on())
985    }
986
987    fn ignore_braces_enabled(&mut self) -> bool {
988        self.current_zsh_options()
989            .is_some_and(|options| options.ignore_braces.is_definitely_on())
990    }
991
992    fn ignore_close_braces_enabled(&mut self) -> bool {
993        self.current_zsh_options().is_some_and(|options| {
994            options.ignore_braces.is_definitely_on()
995                || options.ignore_close_braces.is_definitely_on()
996        })
997    }
998
999    fn should_treat_hash_as_word_char(&mut self) -> bool {
1000        if !self.comments_enabled() {
1001            return true;
1002        }
1003        self.reinject_buf.is_empty()
1004            && (self
1005                .input
1006                .get(..self.offset)
1007                .and_then(|prefix| prefix.chars().next_back())
1008                .is_some_and(|prev| {
1009                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010                })
1011                || self.is_inside_unclosed_double_paren_on_line())
1012    }
1013
1014    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015        capture
1016            .as_deref()
1017            .unwrap_or(&self.input[start.offset..self.offset])
1018    }
1019
1020    fn current_word_surface_is_single_char(
1021        &self,
1022        start: Position,
1023        capture: &Option<String>,
1024        target: char,
1025    ) -> bool {
1026        let text = self.current_word_text(start, capture);
1027        if !text.contains('\x00') {
1028            let mut encoded = [0; 4];
1029            return text == target.encode_utf8(&mut encoded);
1030        }
1031
1032        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034    }
1035
1036    fn current_word_surface_last_char<'b>(
1037        &'b self,
1038        start: Position,
1039        capture: &'b Option<String>,
1040    ) -> Option<char> {
1041        self.current_word_text(start, capture)
1042            .chars()
1043            .rev()
1044            .find(|&ch| ch != '\x00')
1045    }
1046
1047    fn current_word_surface_ends_with_char(
1048        &self,
1049        start: Position,
1050        capture: &Option<String>,
1051        target: char,
1052    ) -> bool {
1053        self.current_word_surface_last_char(start, capture) == Some(target)
1054    }
1055
1056    fn current_word_surface_ends_with_extglob_prefix(
1057        &self,
1058        start: Position,
1059        capture: &Option<String>,
1060    ) -> bool {
1061        self.current_word_surface_last_char(start, capture)
1062            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063    }
1064
1065    /// Get the next source-backed token from the input, skipping line comments.
1066    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067        self.skip_whitespace();
1068        let start = self.current_position();
1069        let token = self.next_lexed_token_inner(false)?;
1070        let end = self.current_position();
1071        Some(token.with_span(Span::from_positions(start, end)))
1072    }
1073
1074    /// Get the next source-backed token from the input, preserving line comments.
1075    pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076        self.skip_whitespace();
1077        let start = self.current_position();
1078        let token = self.next_lexed_token_inner(true)?;
1079        let end = self.current_position();
1080        Some(token.with_span(Span::from_positions(start, end)))
1081    }
1082
1083    /// Internal: get next token without recording position (called after whitespace skip)
1084    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085        let ch = self.peek_char()?;
1086
1087        match ch {
1088            '\n' => {
1089                self.consume_ascii_chars(1);
1090                Some(LexedToken::punctuation(TokenKind::Newline))
1091            }
1092            ';' => {
1093                if self.second_char() == Some(';') {
1094                    if self.third_char() == Some('&') {
1095                        self.consume_ascii_chars(3);
1096                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1097                    } else {
1098                        self.consume_ascii_chars(2);
1099                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1100                    }
1101                } else if self.second_char() == Some('|') {
1102                    self.consume_ascii_chars(2);
1103                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1104                } else if self.second_char() == Some('&') {
1105                    self.consume_ascii_chars(2);
1106                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1107                } else {
1108                    self.consume_ascii_chars(1);
1109                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1110                }
1111            }
1112            '|' => {
1113                if self.second_char() == Some('|') {
1114                    self.consume_ascii_chars(2);
1115                    Some(LexedToken::punctuation(TokenKind::Or))
1116                } else if self.second_char() == Some('&') {
1117                    self.consume_ascii_chars(2);
1118                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119                } else {
1120                    self.consume_ascii_chars(1);
1121                    Some(LexedToken::punctuation(TokenKind::Pipe))
1122                }
1123            }
1124            '&' => {
1125                if self.second_char() == Some('&') {
1126                    self.consume_ascii_chars(2);
1127                    Some(LexedToken::punctuation(TokenKind::And))
1128                } else if self.second_char() == Some('>') {
1129                    if self.third_char() == Some('>') {
1130                        self.consume_ascii_chars(3);
1131                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132                    } else {
1133                        self.consume_ascii_chars(2);
1134                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135                    }
1136                } else if self.second_char() == Some('|') {
1137                    self.consume_ascii_chars(2);
1138                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139                } else if self.second_char() == Some('!') {
1140                    self.consume_ascii_chars(2);
1141                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142                } else {
1143                    self.consume_ascii_chars(1);
1144                    Some(LexedToken::punctuation(TokenKind::Background))
1145                }
1146            }
1147            '>' => {
1148                if self.second_char() == Some('>') {
1149                    if self.third_char() == Some('|') {
1150                        self.consume_ascii_chars(3);
1151                    } else {
1152                        self.consume_ascii_chars(2);
1153                    }
1154                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155                } else if self.second_char() == Some('|') {
1156                    self.consume_ascii_chars(2);
1157                    Some(LexedToken::punctuation(TokenKind::Clobber))
1158                } else if self.second_char() == Some('(') {
1159                    self.consume_ascii_chars(2);
1160                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161                } else if self.second_char() == Some('&') {
1162                    self.consume_ascii_chars(2);
1163                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1164                } else {
1165                    self.consume_ascii_chars(1);
1166                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167                }
1168            }
1169            '<' => {
1170                if self.second_char() == Some('<') {
1171                    if self.third_char() == Some('<') {
1172                        self.consume_ascii_chars(3);
1173                        Some(LexedToken::punctuation(TokenKind::HereString))
1174                    } else if self.third_char() == Some('-') {
1175                        self.consume_ascii_chars(3);
1176                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177                    } else {
1178                        self.consume_ascii_chars(2);
1179                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1180                    }
1181                } else if self.second_char() == Some('>') {
1182                    self.consume_ascii_chars(2);
1183                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184                } else if self.second_char() == Some('(') {
1185                    self.consume_ascii_chars(2);
1186                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187                } else if self.second_char() == Some('&') {
1188                    self.consume_ascii_chars(2);
1189                    Some(LexedToken::punctuation(TokenKind::DupInput))
1190                } else {
1191                    self.consume_ascii_chars(1);
1192                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193                }
1194            }
1195            '(' => {
1196                if self.second_char() == Some('(') {
1197                    self.consume_ascii_chars(2);
1198                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199                } else {
1200                    self.consume_ascii_chars(1);
1201                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1202                }
1203            }
1204            ')' => {
1205                if self.second_char() == Some(')') {
1206                    self.consume_ascii_chars(2);
1207                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208                } else {
1209                    self.consume_ascii_chars(1);
1210                    Some(LexedToken::punctuation(TokenKind::RightParen))
1211                }
1212            }
1213            '{' => {
1214                let start = self.current_position();
1215                if self.ignore_braces_enabled() {
1216                    self.consume_ascii_chars(1);
1217                    match self.peek_char() {
1218                        Some(' ') | Some('\t') | Some('\n') | None => {
1219                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220                        }
1221                        _ => self.read_word_starting_with("{", start),
1222                    }
1223                } else if self.looks_like_brace_expansion() {
1224                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1225                    // vs a brace group like { cmd; }
1226                    // Note: { must be followed by space/newline to be a brace group
1227                    self.read_brace_expansion_word()
1228                } else if self.is_brace_group_start() {
1229                    self.advance();
1230                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231                } else if self.brace_literal_starts_case_pattern_delimiter() {
1232                    self.read_word_starting_with("{", start)
1233                } else {
1234                    self.read_brace_literal_word()
1235                }
1236            }
1237            '}' => {
1238                self.consume_ascii_chars(1);
1239                if self.ignore_close_braces_enabled() {
1240                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1241                } else {
1242                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1243                }
1244            }
1245            '[' => {
1246                let start = self.current_position();
1247                self.consume_ascii_chars(1);
1248                if self.peek_char() == Some('[')
1249                    && matches!(
1250                        self.second_char(),
1251                        Some(' ') | Some('\t') | Some('\n') | None
1252                    )
1253                {
1254                    self.consume_ascii_chars(1);
1255                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1256                } else {
1257                    // `[` can start the test command when followed by whitespace, or it can be
1258                    // ordinary word text such as a glob bracket expression.
1259                    //
1260                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1261                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1262                    // structural tokens mid-word.
1263                    match self.peek_char() {
1264                        Some(' ') | Some('\t') | Some('\n') | None => {
1265                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1266                        }
1267                        _ => self.read_word_starting_with("[", start),
1268                    }
1269                }
1270            }
1271            ']' => {
1272                if self.second_char() == Some(']') {
1273                    self.consume_ascii_chars(2);
1274                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1275                } else {
1276                    self.consume_ascii_chars(1);
1277                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1278                }
1279            }
1280            '\'' => self.read_single_quoted_string(),
1281            '"' => self.read_double_quoted_string(),
1282            '#' => {
1283                if self.should_treat_hash_as_word_char() {
1284                    let start = self.current_position();
1285                    return self.read_word_starting_with("#", start);
1286                }
1287                if preserve_comments {
1288                    self.read_comment();
1289                    Some(LexedToken::comment())
1290                } else {
1291                    self.skip_comment();
1292                    self.next_lexed_token_inner(false)
1293                }
1294            }
1295            // Handle file descriptor redirects like 2> or 2>&1
1296            '0'..='9' => self.read_word_or_fd_redirect(),
1297            _ => self.read_word(),
1298        }
1299    }
1300
1301    fn skip_whitespace(&mut self) {
1302        while let Some(ch) = self.peek_char() {
1303            if self.reinject_buf.is_empty() {
1304                let whitespace_len = self.source_horizontal_whitespace_len();
1305                if whitespace_len > 0 {
1306                    self.consume_source_bytes(whitespace_len);
1307                    continue;
1308                }
1309
1310                if self.cursor.rest().starts_with("\\\n") {
1311                    self.consume_source_bytes(2);
1312                    continue;
1313                }
1314            }
1315
1316            if ch == ' ' || ch == '\t' {
1317                self.consume_ascii_chars(1);
1318            } else if ch == '\\' {
1319                // Check for backslash-newline (line continuation) between tokens
1320                if self.second_char() == Some('\n') {
1321                    self.consume_ascii_chars(2);
1322                } else {
1323                    break;
1324                }
1325            } else {
1326                break;
1327            }
1328        }
1329    }
1330
1331    fn skip_comment(&mut self) {
1332        if self.reinject_buf.is_empty() {
1333            let end = self
1334                .cursor
1335                .find_byte(b'\n')
1336                .unwrap_or(self.cursor.rest().len());
1337            self.consume_source_bytes(end);
1338            return;
1339        }
1340
1341        while let Some(ch) = self.peek_char() {
1342            if ch == '\n' {
1343                break;
1344            }
1345            self.advance();
1346        }
1347    }
1348
1349    fn read_comment(&mut self) {
1350        debug_assert_eq!(self.peek_char(), Some('#'));
1351
1352        if self.reinject_buf.is_empty() {
1353            let rest = self.cursor.rest();
1354            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1355            self.consume_source_bytes(end);
1356            return;
1357        }
1358
1359        self.advance(); // consume '#'
1360
1361        while let Some(ch) = self.peek_char() {
1362            if ch == '\n' {
1363                break;
1364            }
1365            self.advance();
1366        }
1367    }
1368
1369    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1370        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1371            return false;
1372        }
1373
1374        let line_start = self.input[..self.offset]
1375            .rfind('\n')
1376            .map_or(0, |index| index + 1);
1377        let prefix = &self.input[line_start..self.offset];
1378        line_has_unclosed_double_paren(prefix)
1379    }
1380
1381    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1382    /// or just a regular word starting with a digit
1383    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1384        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1385            let Some(fd) = first_digit.to_digit(10) else {
1386                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1387            };
1388            let fd = fd as i32;
1389
1390            match (self.second_char(), self.third_char()) {
1391                (Some('>'), Some('>')) => {
1392                    if self.fourth_char() == Some('|') {
1393                        self.consume_ascii_chars(4);
1394                    } else {
1395                        self.consume_ascii_chars(3);
1396                    }
1397                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1398                }
1399                (Some('>'), Some('|')) => {
1400                    self.consume_ascii_chars(3);
1401                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1402                }
1403                (Some('>'), Some('&')) => {
1404                    self.consume_ascii_chars(3);
1405
1406                    let mut target_str = String::with_capacity(4);
1407                    while let Some(c) = self.peek_char() {
1408                        if c.is_ascii_digit() {
1409                            target_str.push(c);
1410                            self.advance();
1411                        } else {
1412                            break;
1413                        }
1414                    }
1415
1416                    if target_str.is_empty() {
1417                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1418                    }
1419
1420                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1421                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1422                }
1423                (Some('>'), _) => {
1424                    self.consume_ascii_chars(2);
1425                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1426                }
1427                (Some('<'), Some('&')) => {
1428                    self.consume_ascii_chars(3);
1429
1430                    let mut target_str = String::with_capacity(4);
1431                    while let Some(c) = self.peek_char() {
1432                        if c.is_ascii_digit() || c == '-' {
1433                            target_str.push(c);
1434                            self.advance();
1435                            if c == '-' {
1436                                break;
1437                            }
1438                        } else {
1439                            break;
1440                        }
1441                    }
1442
1443                    if target_str == "-" {
1444                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1445                    }
1446                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1447                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1448                }
1449                (Some('<'), Some('>')) => {
1450                    self.consume_ascii_chars(3);
1451                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1452                }
1453                (Some('<'), Some('<')) => {}
1454                (Some('<'), _) => {
1455                    self.consume_ascii_chars(2);
1456                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1457                }
1458                _ => {}
1459            }
1460        }
1461
1462        // Not a fd redirect pattern, read as regular word
1463        self.read_word()
1464    }
1465
1466    fn read_word_starting_with(
1467        &mut self,
1468        _prefix: &str,
1469        start: Position,
1470    ) -> Option<LexedToken<'a>> {
1471        let segment = match self.read_unquoted_segment(start) {
1472            Ok(segment) => segment,
1473            Err(kind) => return Some(LexedToken::error(kind)),
1474        };
1475        if segment.as_str().is_empty() {
1476            return None;
1477        }
1478        let mut lexed_word = LexedWord::from_segment(segment);
1479        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1480            return Some(LexedToken::error(kind));
1481        }
1482        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1483    }
1484
1485    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1486        let start = self.current_position();
1487
1488        if self.reinject_buf.is_empty() {
1489            let ascii_len = self.source_ascii_plain_word_len();
1490            let chunk = if ascii_len > 0
1491                && self
1492                    .cursor
1493                    .rest()
1494                    .as_bytes()
1495                    .get(ascii_len)
1496                    .is_none_or(|byte| byte.is_ascii())
1497            {
1498                self.consume_source_bytes(ascii_len);
1499                &self.input[start.offset..self.offset]
1500            } else {
1501                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1502                self.advance_scanned_source_bytes(chunk.len());
1503                chunk
1504            };
1505            if !chunk.is_empty() {
1506                let continues = matches!(
1507                    self.peek_char(),
1508                    Some(next)
1509                        if Self::is_word_char(next)
1510                            || next == '$'
1511                            || matches!(next, '\'' | '"')
1512                            || next == '{'
1513                            || (next == '\\' && self.second_char() == Some('\n'))
1514                            || (next == '('
1515                                && (chunk.ends_with('=')
1516                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1517                );
1518
1519                if !continues {
1520                    let end = self.current_position();
1521                    return Some(LexedToken::borrowed_word(
1522                        TokenKind::Word,
1523                        &self.input[start.offset..self.offset],
1524                        Some(Span::from_positions(start, end)),
1525                    ));
1526                }
1527
1528                if self.peek_char() == Some('(')
1529                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1530                {
1531                    return self.read_complex_word(start);
1532                }
1533
1534                let end = self.current_position();
1535                return self.finish_segmented_word(LexedWord::borrowed(
1536                    LexedWordSegmentKind::Plain,
1537                    &self.input[start.offset..self.offset],
1538                    Some(Span::from_positions(start, end)),
1539                ));
1540            }
1541        }
1542
1543        self.read_complex_word(start)
1544    }
1545
1546    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1547        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1548            return Some(LexedToken::error(kind));
1549        }
1550
1551        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1552    }
1553
1554    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1555        if self.peek_char() == Some('$') {
1556            match self.second_char() {
1557                Some('\'') => return self.read_dollar_single_quoted_string(),
1558                Some('"') => return self.read_dollar_double_quoted_string(),
1559                _ => {}
1560            }
1561        }
1562
1563        let segment = match self.read_unquoted_segment(start) {
1564            Ok(segment) => segment,
1565            Err(kind) => return Some(LexedToken::error(kind)),
1566        };
1567
1568        if segment.as_str().is_empty() {
1569            return None;
1570        }
1571
1572        self.finish_segmented_word(LexedWord::from_segment(segment))
1573    }
1574
1575    fn read_unquoted_segment(
1576        &mut self,
1577        start: Position,
1578    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1579        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1580        while let Some(ch) = self.peek_char() {
1581            if ch == '"' || ch == '\'' {
1582                break;
1583            } else if ch == '$' {
1584                if matches!(self.second_char(), Some('\'') | Some('"'))
1585                    && (self.current_position().offset > start.offset
1586                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1587                {
1588                    break;
1589                }
1590
1591                // Handle variable references and command substitution
1592                self.advance();
1593
1594                Self::push_capture_char(&mut word, ch); // push the '$'
1595
1596                // Check for $[ / $( / ${ forms before falling back to variable text.
1597                if self.peek_char() == Some('[') {
1598                    Self::push_capture_char(&mut word, '[');
1599                    self.advance();
1600                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1601                        return Err(LexerErrorKind::CommandSubstitution);
1602                    }
1603                } else if self.peek_char() == Some('(') {
1604                    if self.second_char() == Some('(') {
1605                        if !self.read_arithmetic_expansion_into(&mut word) {
1606                            return Err(LexerErrorKind::CommandSubstitution);
1607                        }
1608                    } else {
1609                        Self::push_capture_char(&mut word, '(');
1610                        self.advance();
1611                        if !self.read_command_subst_into(&mut word) {
1612                            return Err(LexerErrorKind::CommandSubstitution);
1613                        }
1614                    }
1615                } else if self.peek_char() == Some('{') {
1616                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1617                    // doesn't stop at the inner }.
1618                    Self::push_capture_char(&mut word, '{');
1619                    self.advance();
1620                    let _ = self.read_param_expansion_into(&mut word, start);
1621                } else {
1622                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1623                    if let Some(c) = self.peek_char() {
1624                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1625                            || c.is_ascii_digit()
1626                        {
1627                            Self::push_capture_char(&mut word, c);
1628                            self.advance();
1629                        } else {
1630                            // Read variable name (alphanumeric + _)
1631                            while let Some(c) = self.peek_char() {
1632                                if c.is_ascii_alphanumeric() || c == '_' {
1633                                    Self::push_capture_char(&mut word, c);
1634                                    self.advance();
1635                                } else {
1636                                    break;
1637                                }
1638                            }
1639                        }
1640                    }
1641                }
1642            } else if ch == '{' {
1643                if self.looks_like_mid_word_brace_segment() {
1644                    // Keep balanced {...} forms attached to the current word so
1645                    // plain literals like foo{bar} and brace expansions stay intact.
1646                    Self::push_capture_char(&mut word, ch);
1647                    self.advance();
1648                    self.consume_mid_word_brace_segment(&mut word);
1649                } else {
1650                    // Unmatched literal braces in regexes like ^{ should not swallow
1651                    // trailing delimiters such as ]] or then.
1652                    Self::push_capture_char(&mut word, ch);
1653                    self.advance();
1654                }
1655            } else if ch == '`' {
1656                // Preserve legacy backticks verbatim so the parser can keep the
1657                // original syntax form.
1658                let capture_end = self.current_position();
1659                self.ensure_capture_from_source(&mut word, start, capture_end);
1660                Self::push_capture_char(&mut word, ch);
1661                self.advance(); // consume opening `
1662                let mut closed = false;
1663                while let Some(c) = self.peek_char() {
1664                    Self::push_capture_char(&mut word, c);
1665                    self.advance();
1666                    if c == '`' {
1667                        closed = true;
1668                        break;
1669                    }
1670                    if c == '\\'
1671                        && let Some(next) = self.peek_char()
1672                    {
1673                        Self::push_capture_char(&mut word, next);
1674                        self.advance();
1675                    }
1676                }
1677                if !closed {
1678                    return Err(LexerErrorKind::BacktickSubstitution);
1679                }
1680            } else if ch == '\\' {
1681                let capture_end = self.current_position();
1682                self.ensure_capture_from_source(&mut word, start, capture_end);
1683                self.advance();
1684                if let Some(next) = self.peek_char() {
1685                    if next == '\n' {
1686                        // Line continuation: skip backslash + newline
1687                        self.advance();
1688                    } else {
1689                        // Escaped character: backslash quotes the next char
1690                        // (quote removal — only the literal char survives).
1691                        // Preserve source/decoded alignment with a sentinel so
1692                        // downstream word decoding keeps later spans anchored.
1693                        Self::push_capture_char(&mut word, '\x00');
1694                        Self::push_capture_char(&mut word, next);
1695                        self.advance();
1696                        if next == '{'
1697                            && self.current_word_surface_is_single_char(start, &word, '{')
1698                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1699                        {
1700                            let mut depth = 1;
1701                            while let Some(c) = self.peek_char() {
1702                                Self::push_capture_char(&mut word, c);
1703                                self.advance();
1704                                match c {
1705                                    '{' => depth += 1,
1706                                    '}' => {
1707                                        depth -= 1;
1708                                        if depth == 0 {
1709                                            break;
1710                                        }
1711                                    }
1712                                    _ => {}
1713                                }
1714                            }
1715                        }
1716                    }
1717                } else {
1718                    Self::push_capture_char(&mut word, '\\');
1719                }
1720            } else if ch == '('
1721                && self.current_word_surface_ends_with_char(start, &word, '=')
1722                && self.looks_like_assoc_assign()
1723            {
1724                // Associative compound assignment: var=([k]="v" ...) — keep entire
1725                // (...) as part of word so declare -A m=([k]="v") stays one token.
1726                Self::push_capture_char(&mut word, ch);
1727                self.advance();
1728                let mut depth = 1;
1729                while let Some(c) = self.peek_char() {
1730                    Self::push_capture_char(&mut word, c);
1731                    self.advance();
1732                    match c {
1733                        '(' => depth += 1,
1734                        ')' => {
1735                            depth -= 1;
1736                            if depth == 0 {
1737                                break;
1738                            }
1739                        }
1740                        '"' => {
1741                            while let Some(qc) = self.peek_char() {
1742                                Self::push_capture_char(&mut word, qc);
1743                                self.advance();
1744                                if qc == '"' {
1745                                    break;
1746                                }
1747                                if qc == '\\'
1748                                    && let Some(esc) = self.peek_char()
1749                                {
1750                                    Self::push_capture_char(&mut word, esc);
1751                                    self.advance();
1752                                }
1753                            }
1754                        }
1755                        '\'' => {
1756                            while let Some(qc) = self.peek_char() {
1757                                Self::push_capture_char(&mut word, qc);
1758                                self.advance();
1759                                if qc == '\'' {
1760                                    break;
1761                                }
1762                            }
1763                        }
1764                        '\\' => {
1765                            if let Some(esc) = self.peek_char() {
1766                                Self::push_capture_char(&mut word, esc);
1767                                self.advance();
1768                            }
1769                        }
1770                        _ => {}
1771                    }
1772                }
1773            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1774            {
1775                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1776                // Consume through matching ) including nested parens
1777                Self::push_capture_char(&mut word, ch);
1778                self.advance();
1779                let mut depth = 1;
1780                while let Some(c) = self.peek_char() {
1781                    Self::push_capture_char(&mut word, c);
1782                    self.advance();
1783                    match c {
1784                        '(' => depth += 1,
1785                        ')' => {
1786                            depth -= 1;
1787                            if depth == 0 {
1788                                break;
1789                            }
1790                        }
1791                        '\\' => {
1792                            if let Some(esc) = self.peek_char() {
1793                                Self::push_capture_char(&mut word, esc);
1794                                self.advance();
1795                            }
1796                        }
1797                        _ => {}
1798                    }
1799                }
1800            } else if Self::is_plain_word_char(ch) {
1801                if self.reinject_buf.is_empty() {
1802                    let ascii_len = self.source_ascii_plain_word_len();
1803                    let chunk = if ascii_len > 0
1804                        && self
1805                            .cursor
1806                            .rest()
1807                            .as_bytes()
1808                            .get(ascii_len)
1809                            .is_none_or(|byte| byte.is_ascii())
1810                    {
1811                        self.consume_source_bytes(ascii_len);
1812                        &self.input[self.offset - ascii_len..self.offset]
1813                    } else {
1814                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1815                        self.advance_scanned_source_bytes(chunk.len());
1816                        chunk
1817                    };
1818                    Self::push_capture_str(&mut word, chunk);
1819                } else {
1820                    Self::push_capture_char(&mut word, ch);
1821                    self.advance();
1822                }
1823            } else {
1824                break;
1825            }
1826        }
1827
1828        if let Some(word) = word {
1829            let span = Some(Span::from_positions(start, self.current_position()));
1830            Ok(LexedWordSegment::owned_with_spans(
1831                LexedWordSegmentKind::Plain,
1832                word,
1833                span,
1834                span,
1835            ))
1836        } else {
1837            let end = self.current_position();
1838            Ok(LexedWordSegment::borrowed(
1839                LexedWordSegmentKind::Plain,
1840                &self.input[start.offset..self.offset],
1841                Some(Span::from_positions(start, end)),
1842            ))
1843        }
1844    }
1845
1846    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1847        let segment = match self.read_single_quoted_segment() {
1848            Ok(segment) => segment,
1849            Err(kind) => return Some(LexedToken::error(kind)),
1850        };
1851        let mut word = LexedWord::from_segment(segment);
1852        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1853            return Some(LexedToken::error(kind));
1854        }
1855
1856        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1857    }
1858
1859    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1860        debug_assert_eq!(self.peek_char(), Some('\''));
1861
1862        let wrapper_start = self.current_position();
1863        self.consume_ascii_chars(1); // consume opening '
1864        let content_start = self.current_position();
1865        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1866        let mut content_end = content_start;
1867        let mut content = String::with_capacity(16);
1868        let mut closed = false;
1869
1870        if can_borrow {
1871            let rest = self.cursor.rest();
1872            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1873                self.consume_source_bytes(quote_index);
1874                content_end = self.current_position();
1875                self.consume_ascii_chars(1); // consume closing '
1876                closed = true;
1877            } else {
1878                self.consume_source_bytes(rest.len());
1879            }
1880        }
1881
1882        while let Some(ch) = self.peek_char() {
1883            if closed {
1884                break;
1885            }
1886            if ch == '\'' {
1887                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1888                    if !can_borrow {
1889                        content.push('\'');
1890                    }
1891                    self.advance();
1892                    self.advance();
1893                    continue;
1894                }
1895                content_end = self.current_position();
1896                self.consume_ascii_chars(1); // consume closing '
1897                closed = true;
1898                break;
1899            }
1900            if !can_borrow {
1901                content.push(ch);
1902            }
1903            self.advance();
1904        }
1905
1906        if !closed {
1907            return Err(LexerErrorKind::SingleQuote);
1908        }
1909
1910        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1911        let content_span = Some(Span::from_positions(content_start, content_end));
1912
1913        if can_borrow {
1914            Ok(LexedWordSegment::borrowed_with_spans(
1915                LexedWordSegmentKind::SingleQuoted,
1916                &self.input[content_start.offset..content_end.offset],
1917                content_span,
1918                wrapper_span,
1919            ))
1920        } else {
1921            Ok(LexedWordSegment::owned_with_spans(
1922                LexedWordSegmentKind::SingleQuoted,
1923                content,
1924                content_span,
1925                wrapper_span,
1926            ))
1927        }
1928    }
1929
1930    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1931        let segment = match self.read_dollar_single_quoted_segment() {
1932            Ok(segment) => segment,
1933            Err(kind) => return Some(LexedToken::error(kind)),
1934        };
1935        let mut word = LexedWord::from_segment(segment);
1936        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1937            return Some(LexedToken::error(kind));
1938        }
1939
1940        let kind = if word.single_segment().is_some() {
1941            TokenKind::LiteralWord
1942        } else {
1943            TokenKind::Word
1944        };
1945
1946        Some(LexedToken::with_word_payload(kind, word))
1947    }
1948
1949    fn read_dollar_single_quoted_segment(
1950        &mut self,
1951    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1952        debug_assert_eq!(self.peek_char(), Some('$'));
1953        debug_assert_eq!(self.second_char(), Some('\''));
1954
1955        let wrapper_start = self.current_position();
1956        self.consume_ascii_chars(2); // consume $'
1957        let content_start = self.current_position();
1958        let mut out = String::with_capacity(16);
1959
1960        while let Some(ch) = self.peek_char() {
1961            if ch == '\'' {
1962                let content_end = self.current_position();
1963                self.advance();
1964                let wrapper_span =
1965                    Some(Span::from_positions(wrapper_start, self.current_position()));
1966                let content_span = Some(Span::from_positions(content_start, content_end));
1967                return Ok(LexedWordSegment::owned_with_spans(
1968                    LexedWordSegmentKind::DollarSingleQuoted,
1969                    out,
1970                    content_span,
1971                    wrapper_span,
1972                ));
1973            }
1974
1975            if ch == '\\' {
1976                self.advance();
1977                if let Some(esc) = self.peek_char() {
1978                    self.advance();
1979                    match esc {
1980                        'n' => out.push('\n'),
1981                        't' => out.push('\t'),
1982                        'r' => out.push('\r'),
1983                        'a' => out.push('\x07'),
1984                        'b' => out.push('\x08'),
1985                        'f' => out.push('\x0C'),
1986                        'v' => out.push('\x0B'),
1987                        'e' | 'E' => out.push('\x1B'),
1988                        '\\' => out.push('\\'),
1989                        '\'' => out.push('\''),
1990                        '"' => out.push('"'),
1991                        '?' => out.push('?'),
1992                        'c' => {
1993                            if let Some(control) = self.peek_char() {
1994                                self.advance();
1995                                out.push(((control as u32 & 0x1F) as u8) as char);
1996                            } else {
1997                                out.push('\\');
1998                                out.push('c');
1999                            }
2000                        }
2001                        'x' => {
2002                            let mut hex = String::new();
2003                            for _ in 0..2 {
2004                                if let Some(h) = self.peek_char() {
2005                                    if h.is_ascii_hexdigit() {
2006                                        hex.push(h);
2007                                        self.advance();
2008                                    } else {
2009                                        break;
2010                                    }
2011                                }
2012                            }
2013                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2014                                out.push(val as char);
2015                            }
2016                        }
2017                        'u' => {
2018                            let mut hex = String::new();
2019                            for _ in 0..4 {
2020                                if let Some(h) = self.peek_char() {
2021                                    if h.is_ascii_hexdigit() {
2022                                        hex.push(h);
2023                                        self.advance();
2024                                    } else {
2025                                        break;
2026                                    }
2027                                }
2028                            }
2029                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2030                                && let Some(c) = char::from_u32(val)
2031                            {
2032                                out.push(c);
2033                            }
2034                        }
2035                        'U' => {
2036                            let mut hex = String::new();
2037                            for _ in 0..8 {
2038                                if let Some(h) = self.peek_char() {
2039                                    if h.is_ascii_hexdigit() {
2040                                        hex.push(h);
2041                                        self.advance();
2042                                    } else {
2043                                        break;
2044                                    }
2045                                }
2046                            }
2047                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2048                                && let Some(c) = char::from_u32(val)
2049                            {
2050                                out.push(c);
2051                            }
2052                        }
2053                        '0'..='7' => {
2054                            let mut oct = String::new();
2055                            oct.push(esc);
2056                            for _ in 0..2 {
2057                                if let Some(o) = self.peek_char() {
2058                                    if o.is_ascii_digit() && o < '8' {
2059                                        oct.push(o);
2060                                        self.advance();
2061                                    } else {
2062                                        break;
2063                                    }
2064                                }
2065                            }
2066                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2067                                out.push(val as char);
2068                            }
2069                        }
2070                        _ => {
2071                            out.push('\\');
2072                            out.push(esc);
2073                        }
2074                    }
2075                } else {
2076                    out.push('\\');
2077                }
2078                continue;
2079            }
2080
2081            out.push(ch);
2082            self.advance();
2083        }
2084
2085        Err(LexerErrorKind::SingleQuote)
2086    }
2087
2088    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2089        let start = self.current_position();
2090
2091        if self.reinject_buf.is_empty() {
2092            let ascii_len = self.source_ascii_plain_word_len();
2093            let chunk = if ascii_len > 0
2094                && self
2095                    .cursor
2096                    .rest()
2097                    .as_bytes()
2098                    .get(ascii_len)
2099                    .is_none_or(|byte| byte.is_ascii())
2100            {
2101                self.consume_source_bytes(ascii_len);
2102                &self.input[start.offset..self.offset]
2103            } else {
2104                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2105                self.advance_scanned_source_bytes(chunk.len());
2106                chunk
2107            };
2108            if chunk.is_empty() {
2109                return None;
2110            }
2111
2112            let end = self.current_position();
2113            return Some(LexedWordSegment::borrowed(
2114                LexedWordSegmentKind::Plain,
2115                &self.input[start.offset..self.offset],
2116                Some(Span::from_positions(start, end)),
2117            ));
2118        }
2119
2120        let ch = self.peek_char()?;
2121        if !Self::is_plain_word_char(ch) {
2122            return None;
2123        }
2124
2125        let mut text = String::with_capacity(16);
2126        while let Some(ch) = self.peek_char() {
2127            if !Self::is_plain_word_char(ch) {
2128                break;
2129            }
2130            text.push(ch);
2131            self.advance();
2132        }
2133
2134        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2135    }
2136
2137    /// After a closing quote, read any adjacent quoted or unquoted word chars
2138    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2139    fn append_segmented_continuation(
2140        &mut self,
2141        word: &mut LexedWord<'a>,
2142    ) -> Result<(), LexerErrorKind> {
2143        loop {
2144            match self.peek_char() {
2145                Some('\\') if self.second_char() == Some('\n') => {
2146                    self.advance();
2147                    self.advance();
2148                    continue;
2149                }
2150                Some('\'') => {
2151                    word.push_segment(self.read_single_quoted_segment()?);
2152                }
2153                Some('"') => {
2154                    word.push_segment(self.read_double_quoted_segment()?);
2155                }
2156                Some('$') if self.second_char() == Some('\'') => {
2157                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2158                }
2159                Some('$') if self.second_char() == Some('"') => {
2160                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2161                }
2162                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2163                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2164                        unreachable!("peeked '(' should produce a suffix segment");
2165                    };
2166                    word.push_segment(segment);
2167                }
2168                _ => {
2169                    if let Some(segment) = self.read_plain_continuation_segment() {
2170                        word.push_segment(segment);
2171                        continue;
2172                    }
2173
2174                    let start = self.current_position();
2175                    let plain = self.read_unquoted_segment(start)?;
2176                    if plain.as_str().is_empty() {
2177                        break;
2178                    }
2179                    word.push_segment(plain);
2180                }
2181            }
2182        }
2183
2184        Ok(())
2185    }
2186
2187    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2188        debug_assert_eq!(self.peek_char(), Some('('));
2189
2190        let start = self.current_position();
2191        let mut depth = 0usize;
2192        let mut escaped = false;
2193        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2194
2195        while let Some(ch) = self.peek_char() {
2196            if let Some(text) = text.as_mut() {
2197                text.push(ch);
2198            }
2199            self.advance();
2200
2201            if escaped {
2202                escaped = false;
2203                continue;
2204            }
2205
2206            match ch {
2207                '\\' => escaped = true,
2208                '(' => depth += 1,
2209                ')' => {
2210                    depth = depth.saturating_sub(1);
2211                    if depth == 0 {
2212                        break;
2213                    }
2214                }
2215                _ => {}
2216            }
2217        }
2218
2219        let end = self.current_position();
2220        let span = Some(Span::from_positions(start, end));
2221        if let Some(text) = text {
2222            Some(LexedWordSegment::owned_with_spans(
2223                LexedWordSegmentKind::Plain,
2224                text,
2225                span,
2226                span,
2227            ))
2228        } else {
2229            Some(LexedWordSegment::borrowed_with_spans(
2230                LexedWordSegmentKind::Plain,
2231                &self.input[start.offset..end.offset],
2232                span,
2233                span,
2234            ))
2235        }
2236    }
2237
2238    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2239        self.read_double_quoted_word(false)
2240    }
2241
2242    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2243        self.read_double_quoted_word(true)
2244    }
2245
2246    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2247        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2248            Ok(segment) => segment,
2249            Err(kind) => return Some(LexedToken::error(kind)),
2250        };
2251        let mut word = LexedWord::from_segment(segment);
2252        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2253            return Some(LexedToken::error(kind));
2254        }
2255
2256        let kind = if word.single_segment().is_some() {
2257            TokenKind::QuotedWord
2258        } else {
2259            TokenKind::Word
2260        };
2261
2262        Some(LexedToken::with_word_payload(kind, word))
2263    }
2264
2265    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2266        self.read_double_quoted_segment_with_dollar(false)
2267    }
2268
2269    fn read_dollar_double_quoted_segment(
2270        &mut self,
2271    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2272        self.read_double_quoted_segment_with_dollar(true)
2273    }
2274
2275    fn read_double_quoted_segment_with_dollar(
2276        &mut self,
2277        dollar: bool,
2278    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2279        if dollar {
2280            debug_assert_eq!(self.peek_char(), Some('$'));
2281            debug_assert_eq!(self.second_char(), Some('"'));
2282        } else {
2283            debug_assert_eq!(self.peek_char(), Some('"'));
2284        }
2285
2286        let wrapper_start = self.current_position();
2287        if dollar {
2288            self.consume_ascii_chars(2); // consume $"
2289        } else {
2290            self.consume_ascii_chars(1); // consume opening "
2291        }
2292        let content_start = self.current_position();
2293        let mut content_end = content_start;
2294        let mut simple = self.reinject_buf.is_empty();
2295        let mut borrowable = self.reinject_buf.is_empty();
2296        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2297        let mut closed = false;
2298
2299        while let Some(ch) = self.peek_char() {
2300            if simple {
2301                if self.reinject_buf.is_empty() {
2302                    let rest = self.cursor.rest();
2303                    match Self::find_double_quote_special(rest) {
2304                        Some(index) if index > 0 => {
2305                            self.consume_source_bytes(index);
2306                            continue;
2307                        }
2308                        None => {
2309                            self.consume_source_bytes(rest.len());
2310                            return Err(LexerErrorKind::DoubleQuote);
2311                        }
2312                        _ => {}
2313                    }
2314                }
2315
2316                match ch {
2317                    '"' => {
2318                        content_end = self.current_position();
2319                        self.consume_ascii_chars(1); // consume closing "
2320                        closed = true;
2321                        break;
2322                    }
2323                    '\\' | '$' | '`' => {
2324                        simple = false;
2325                        if ch == '`' {
2326                            borrowable = false;
2327                            let capture_end = self.current_position();
2328                            self.ensure_capture_from_source(
2329                                &mut content,
2330                                content_start,
2331                                capture_end,
2332                            );
2333                        }
2334                    }
2335                    _ => {
2336                        self.advance();
2337                    }
2338                }
2339                if simple {
2340                    continue;
2341                }
2342            }
2343
2344            match ch {
2345                '"' => {
2346                    if borrowable {
2347                        content_end = self.current_position();
2348                    }
2349                    self.consume_ascii_chars(1); // consume closing "
2350                    closed = true;
2351                    break;
2352                }
2353                '\\' => {
2354                    let escape_start = self.current_position();
2355                    self.advance();
2356                    if let Some(next) = self.peek_char() {
2357                        match next {
2358                            '\n' => {
2359                                borrowable = false;
2360                                self.ensure_capture_from_source(
2361                                    &mut content,
2362                                    content_start,
2363                                    escape_start,
2364                                );
2365                                self.advance();
2366                            }
2367                            '$' => {
2368                                borrowable = false;
2369                                self.ensure_capture_from_source(
2370                                    &mut content,
2371                                    content_start,
2372                                    escape_start,
2373                                );
2374                                Self::push_capture_char(&mut content, '\x00');
2375                                Self::push_capture_char(&mut content, '$');
2376                                self.advance();
2377                            }
2378                            '"' | '\\' | '`' => {
2379                                borrowable = false;
2380                                self.ensure_capture_from_source(
2381                                    &mut content,
2382                                    content_start,
2383                                    escape_start,
2384                                );
2385                                if next == '\\' {
2386                                    Self::push_capture_char(&mut content, '\x00');
2387                                }
2388                                if next == '`' {
2389                                    Self::push_capture_char(&mut content, '\x00');
2390                                }
2391                                Self::push_capture_char(&mut content, next);
2392                                self.advance();
2393                                content_end = self.current_position();
2394                            }
2395                            _ => {
2396                                Self::push_capture_char(&mut content, '\\');
2397                                Self::push_capture_char(&mut content, next);
2398                                self.advance();
2399                                content_end = self.current_position();
2400                            }
2401                        }
2402                    }
2403                }
2404                '$' => {
2405                    Self::push_capture_char(&mut content, '$');
2406                    self.advance();
2407                    if self.peek_char() == Some('(') {
2408                        if self.second_char() == Some('(') {
2409                            self.read_arithmetic_expansion_into(&mut content);
2410                        } else {
2411                            Self::push_capture_char(&mut content, '(');
2412                            self.advance();
2413                            self.read_command_subst_into(&mut content);
2414                        }
2415                    } else if self.peek_char() == Some('{') {
2416                        Self::push_capture_char(&mut content, '{');
2417                        self.advance();
2418                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2419                    }
2420                    content_end = self.current_position();
2421                }
2422                '`' => {
2423                    borrowable = false;
2424                    let capture_end = self.current_position();
2425                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2426                    Self::push_capture_char(&mut content, '`');
2427                    self.advance(); // consume opening `
2428                    while let Some(c) = self.peek_char() {
2429                        Self::push_capture_char(&mut content, c);
2430                        self.advance();
2431                        if c == '`' {
2432                            break;
2433                        }
2434                        if c == '\\'
2435                            && let Some(next) = self.peek_char()
2436                        {
2437                            Self::push_capture_char(&mut content, next);
2438                            self.advance();
2439                        }
2440                    }
2441                    content_end = self.current_position();
2442                }
2443                _ => {
2444                    Self::push_capture_char(&mut content, ch);
2445                    self.advance();
2446                    content_end = self.current_position();
2447                }
2448            }
2449        }
2450
2451        if !closed {
2452            return Err(LexerErrorKind::DoubleQuote);
2453        }
2454
2455        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2456        let content_span = Some(Span::from_positions(content_start, content_end));
2457
2458        if borrowable {
2459            Ok(LexedWordSegment::borrowed_with_spans(
2460                if dollar {
2461                    LexedWordSegmentKind::DollarDoubleQuoted
2462                } else {
2463                    LexedWordSegmentKind::DoubleQuoted
2464                },
2465                &self.input[content_start.offset..content_end.offset],
2466                content_span,
2467                wrapper_span,
2468            ))
2469        } else {
2470            Ok(LexedWordSegment::owned_with_spans(
2471                if dollar {
2472                    LexedWordSegmentKind::DollarDoubleQuoted
2473                } else {
2474                    LexedWordSegmentKind::DoubleQuoted
2475                },
2476                content.unwrap_or_default(),
2477                content_span,
2478                wrapper_span,
2479            ))
2480        }
2481    }
2482
2483    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2484        debug_assert_eq!(self.peek_char(), Some('('));
2485        debug_assert_eq!(self.second_char(), Some('('));
2486
2487        Self::push_capture_char(content, '(');
2488        self.advance();
2489        Self::push_capture_char(content, '(');
2490        self.advance();
2491
2492        let mut depth = 2;
2493        while let Some(c) = self.peek_char() {
2494            match c {
2495                '\\' => {
2496                    Self::push_capture_char(content, c);
2497                    self.advance();
2498                    if let Some(next) = self.peek_char() {
2499                        Self::push_capture_char(content, next);
2500                        self.advance();
2501                    }
2502                }
2503                '\'' => {
2504                    Self::push_capture_char(content, c);
2505                    self.advance();
2506                    while let Some(quoted) = self.peek_char() {
2507                        Self::push_capture_char(content, quoted);
2508                        self.advance();
2509                        if quoted == '\'' {
2510                            break;
2511                        }
2512                    }
2513                }
2514                '"' => {
2515                    let mut escaped = false;
2516                    Self::push_capture_char(content, c);
2517                    self.advance();
2518                    while let Some(quoted) = self.peek_char() {
2519                        Self::push_capture_char(content, quoted);
2520                        self.advance();
2521                        if escaped {
2522                            escaped = false;
2523                            continue;
2524                        }
2525                        match quoted {
2526                            '\\' => escaped = true,
2527                            '"' => break,
2528                            _ => {}
2529                        }
2530                    }
2531                }
2532                '`' => {
2533                    let mut escaped = false;
2534                    Self::push_capture_char(content, c);
2535                    self.advance();
2536                    while let Some(quoted) = self.peek_char() {
2537                        Self::push_capture_char(content, quoted);
2538                        self.advance();
2539                        if escaped {
2540                            escaped = false;
2541                            continue;
2542                        }
2543                        match quoted {
2544                            '\\' => escaped = true,
2545                            '`' => break,
2546                            _ => {}
2547                        }
2548                    }
2549                }
2550                '(' => {
2551                    Self::push_capture_char(content, c);
2552                    self.advance();
2553                    depth += 1;
2554                }
2555                ')' => {
2556                    Self::push_capture_char(content, c);
2557                    self.advance();
2558                    depth -= 1;
2559                    if depth == 0 {
2560                        return true;
2561                    }
2562                }
2563                _ => {
2564                    Self::push_capture_char(content, c);
2565                    self.advance();
2566                }
2567            }
2568        }
2569
2570        false
2571    }
2572
2573    fn read_legacy_arithmetic_into(
2574        &mut self,
2575        content: &mut Option<String>,
2576        segment_start: Position,
2577    ) -> bool {
2578        let mut bracket_depth = 1;
2579
2580        while let Some(c) = self.peek_char() {
2581            match c {
2582                '\\' => {
2583                    Self::push_capture_char(content, c);
2584                    self.advance();
2585                    if let Some(next) = self.peek_char() {
2586                        Self::push_capture_char(content, next);
2587                        self.advance();
2588                    }
2589                }
2590                '\'' => {
2591                    Self::push_capture_char(content, c);
2592                    self.advance();
2593                    while let Some(quoted) = self.peek_char() {
2594                        Self::push_capture_char(content, quoted);
2595                        self.advance();
2596                        if quoted == '\'' {
2597                            break;
2598                        }
2599                    }
2600                }
2601                '"' => {
2602                    let mut escaped = false;
2603                    Self::push_capture_char(content, c);
2604                    self.advance();
2605                    while let Some(quoted) = self.peek_char() {
2606                        Self::push_capture_char(content, quoted);
2607                        self.advance();
2608                        if escaped {
2609                            escaped = false;
2610                            continue;
2611                        }
2612                        match quoted {
2613                            '\\' => escaped = true,
2614                            '"' => break,
2615                            _ => {}
2616                        }
2617                    }
2618                }
2619                '`' => {
2620                    let mut escaped = false;
2621                    Self::push_capture_char(content, c);
2622                    self.advance();
2623                    while let Some(quoted) = self.peek_char() {
2624                        Self::push_capture_char(content, quoted);
2625                        self.advance();
2626                        if escaped {
2627                            escaped = false;
2628                            continue;
2629                        }
2630                        match quoted {
2631                            '\\' => escaped = true,
2632                            '`' => break,
2633                            _ => {}
2634                        }
2635                    }
2636                }
2637                '[' => {
2638                    Self::push_capture_char(content, c);
2639                    self.advance();
2640                    bracket_depth += 1;
2641                }
2642                ']' => {
2643                    Self::push_capture_char(content, c);
2644                    self.advance();
2645                    bracket_depth -= 1;
2646                    if bracket_depth == 0 {
2647                        return true;
2648                    }
2649                }
2650                '$' => {
2651                    Self::push_capture_char(content, c);
2652                    self.advance();
2653                    if self.peek_char() == Some('(') {
2654                        if self.second_char() == Some('(') {
2655                            if !self.read_arithmetic_expansion_into(content) {
2656                                return false;
2657                            }
2658                        } else {
2659                            Self::push_capture_char(content, '(');
2660                            self.advance();
2661                            if !self.read_command_subst_into(content) {
2662                                return false;
2663                            }
2664                        }
2665                    } else if self.peek_char() == Some('{') {
2666                        Self::push_capture_char(content, '{');
2667                        self.advance();
2668                        if !self.read_param_expansion_into(content, segment_start) {
2669                            return false;
2670                        }
2671                    } else if self.peek_char() == Some('[') {
2672                        Self::push_capture_char(content, '[');
2673                        self.advance();
2674                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2675                            return false;
2676                        }
2677                    }
2678                }
2679                _ => {
2680                    Self::push_capture_char(content, c);
2681                    self.advance();
2682                }
2683            }
2684        }
2685
2686        false
2687    }
2688
2689    /// Read command substitution content after `$(`, handling nested parens and quotes.
2690    /// Appends chars to `content` and adds the closing `)`.
2691    /// `subst_depth` tracks nesting to prevent stack overflow.
2692    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2693        self.read_command_subst_into_depth(content, 0)
2694    }
2695
2696    fn flush_command_subst_keyword(
2697        current_word: &mut String,
2698        pending_case_headers: &mut usize,
2699        case_clause_depths: &mut SmallVec<[usize; 4]>,
2700        depth: usize,
2701        word_started_at_command_start: &mut bool,
2702    ) {
2703        if current_word.is_empty() {
2704            *word_started_at_command_start = false;
2705            return;
2706        }
2707
2708        match current_word.as_str() {
2709            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2710            "in" if *pending_case_headers > 0 => {
2711                *pending_case_headers -= 1;
2712                case_clause_depths.push(depth);
2713            }
2714            "esac" if *word_started_at_command_start => {
2715                case_clause_depths.pop();
2716            }
2717            _ => {}
2718        }
2719
2720        current_word.clear();
2721        *word_started_at_command_start = false;
2722    }
2723
2724    fn read_command_subst_heredoc_delimiter_into(
2725        &mut self,
2726        content: &mut Option<String>,
2727    ) -> Option<String> {
2728        while let Some(ch) = self.peek_char() {
2729            if !matches!(ch, ' ' | '\t') {
2730                break;
2731            }
2732            Self::push_capture_char(content, ch);
2733            self.advance();
2734        }
2735
2736        let mut cooked = String::new();
2737        let mut in_single = false;
2738        let mut in_double = false;
2739        let mut escaped = false;
2740        let mut saw_any = false;
2741
2742        while let Some(ch) = self.peek_char() {
2743            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2744                break;
2745            }
2746
2747            saw_any = true;
2748            Self::push_capture_char(content, ch);
2749            self.advance();
2750
2751            if escaped {
2752                cooked.push(ch);
2753                escaped = false;
2754                continue;
2755            }
2756
2757            match ch {
2758                '\\' if !in_single => escaped = true,
2759                '\'' if !in_double => in_single = !in_single,
2760                '"' if !in_single => in_double = !in_double,
2761                _ => cooked.push(ch),
2762            }
2763        }
2764
2765        saw_any.then_some(cooked)
2766    }
2767
2768    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2769        Self::push_capture_char(content, '`');
2770        self.advance();
2771        while let Some(ch) = self.peek_char() {
2772            Self::push_capture_char(content, ch);
2773            self.advance();
2774            if ch == '\\' {
2775                if let Some(esc) = self.peek_char() {
2776                    Self::push_capture_char(content, esc);
2777                    self.advance();
2778                }
2779                continue;
2780            }
2781            if ch == '`' {
2782                break;
2783            }
2784        }
2785    }
2786
2787    fn read_command_subst_pending_heredoc_into(
2788        &mut self,
2789        content: &mut Option<String>,
2790        delimiter: &str,
2791        strip_tabs: bool,
2792    ) -> bool {
2793        loop {
2794            let mut line = String::new();
2795            let mut saw_newline = false;
2796
2797            while let Some(ch) = self.peek_char() {
2798                self.advance();
2799                if ch == '\n' {
2800                    saw_newline = true;
2801                    break;
2802                }
2803                line.push(ch);
2804            }
2805
2806            Self::push_capture_str(content, &line);
2807            if saw_newline {
2808                Self::push_capture_char(content, '\n');
2809            }
2810
2811            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2812                return true;
2813            }
2814        }
2815    }
2816
2817    fn read_command_subst_into_depth(
2818        &mut self,
2819        content: &mut Option<String>,
2820        subst_depth: usize,
2821    ) -> bool {
2822        if subst_depth >= self.max_subst_depth {
2823            // Depth limit exceeded — consume until matching ')' and emit error token
2824            let mut depth = 1;
2825            while let Some(c) = self.peek_char() {
2826                self.advance();
2827                match c {
2828                    '(' => depth += 1,
2829                    ')' => {
2830                        depth -= 1;
2831                        if depth == 0 {
2832                            Self::push_capture_char(content, ')');
2833                            return true;
2834                        }
2835                    }
2836                    _ => {}
2837                }
2838            }
2839            return false;
2840        }
2841
2842        let mut depth = 1;
2843        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2844        let mut pending_case_headers = 0usize;
2845        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2846        let mut current_word = String::with_capacity(16);
2847        let mut at_command_start = true;
2848        let mut expecting_redirection_target = false;
2849        let mut current_word_started_at_command_start = false;
2850        while let Some(c) = self.peek_char() {
2851            match c {
2852                '#' if !self.should_treat_hash_as_word_char() => {
2853                    let had_word = !current_word.is_empty();
2854                    Self::flush_command_subst_keyword(
2855                        &mut current_word,
2856                        &mut pending_case_headers,
2857                        &mut case_clause_depths,
2858                        depth,
2859                        &mut current_word_started_at_command_start,
2860                    );
2861                    if had_word && expecting_redirection_target {
2862                        expecting_redirection_target = false;
2863                    }
2864                    Self::push_capture_char(content, '#');
2865                    self.advance();
2866                    while let Some(comment_ch) = self.peek_char() {
2867                        Self::push_capture_char(content, comment_ch);
2868                        self.advance();
2869                        if comment_ch == '\n' {
2870                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2871                                if !self.read_command_subst_pending_heredoc_into(
2872                                    content, &delimiter, strip_tabs,
2873                                ) {
2874                                    return false;
2875                                }
2876                            }
2877                            at_command_start = true;
2878                            expecting_redirection_target = false;
2879                            break;
2880                        }
2881                    }
2882                }
2883                '(' => {
2884                    Self::flush_command_subst_keyword(
2885                        &mut current_word,
2886                        &mut pending_case_headers,
2887                        &mut case_clause_depths,
2888                        depth,
2889                        &mut current_word_started_at_command_start,
2890                    );
2891                    depth += 1;
2892                    Self::push_capture_char(content, c);
2893                    self.advance();
2894                    at_command_start = true;
2895                    expecting_redirection_target = false;
2896                }
2897                ')' => {
2898                    Self::flush_command_subst_keyword(
2899                        &mut current_word,
2900                        &mut pending_case_headers,
2901                        &mut case_clause_depths,
2902                        depth,
2903                        &mut current_word_started_at_command_start,
2904                    );
2905                    if case_clause_depths
2906                        .last()
2907                        .is_some_and(|case_depth| *case_depth == depth)
2908                    {
2909                        Self::push_capture_char(content, ')');
2910                        self.advance();
2911                        at_command_start = true;
2912                        expecting_redirection_target = false;
2913                        continue;
2914                    }
2915                    depth -= 1;
2916                    self.advance();
2917                    if depth == 0 {
2918                        Self::push_capture_char(content, ')');
2919                        return true;
2920                    }
2921                    Self::push_capture_char(content, c);
2922                    at_command_start = false;
2923                    expecting_redirection_target = false;
2924                }
2925                '"' => {
2926                    let had_word = !current_word.is_empty();
2927                    Self::flush_command_subst_keyword(
2928                        &mut current_word,
2929                        &mut pending_case_headers,
2930                        &mut case_clause_depths,
2931                        depth,
2932                        &mut current_word_started_at_command_start,
2933                    );
2934                    if had_word && expecting_redirection_target {
2935                        expecting_redirection_target = false;
2936                    }
2937                    // Nested double-quoted string inside $()
2938                    Self::push_capture_char(content, '"');
2939                    self.advance();
2940                    while let Some(qc) = self.peek_char() {
2941                        match qc {
2942                            '"' => {
2943                                Self::push_capture_char(content, '"');
2944                                self.advance();
2945                                break;
2946                            }
2947                            '\\' => {
2948                                Self::push_capture_char(content, '\\');
2949                                self.advance();
2950                                if let Some(esc) = self.peek_char() {
2951                                    Self::push_capture_char(content, esc);
2952                                    self.advance();
2953                                }
2954                            }
2955                            '$' => {
2956                                Self::push_capture_char(content, '$');
2957                                self.advance();
2958                                if self.peek_char() == Some('(') {
2959                                    if self.second_char() == Some('(') {
2960                                        if !self.read_arithmetic_expansion_into(content) {
2961                                            return false;
2962                                        }
2963                                    } else {
2964                                        Self::push_capture_char(content, '(');
2965                                        self.advance();
2966                                        if !self
2967                                            .read_command_subst_into_depth(content, subst_depth + 1)
2968                                        {
2969                                            return false;
2970                                        }
2971                                    }
2972                                }
2973                            }
2974                            _ => {
2975                                Self::push_capture_char(content, qc);
2976                                self.advance();
2977                            }
2978                        }
2979                    }
2980                    if expecting_redirection_target {
2981                        expecting_redirection_target = false;
2982                    } else {
2983                        at_command_start = false;
2984                    }
2985                }
2986                '\'' => {
2987                    let had_word = !current_word.is_empty();
2988                    Self::flush_command_subst_keyword(
2989                        &mut current_word,
2990                        &mut pending_case_headers,
2991                        &mut case_clause_depths,
2992                        depth,
2993                        &mut current_word_started_at_command_start,
2994                    );
2995                    if had_word && expecting_redirection_target {
2996                        expecting_redirection_target = false;
2997                    }
2998                    // Single-quoted string inside $()
2999                    Self::push_capture_char(content, '\'');
3000                    self.advance();
3001                    while let Some(qc) = self.peek_char() {
3002                        Self::push_capture_char(content, qc);
3003                        self.advance();
3004                        if qc == '\'' {
3005                            break;
3006                        }
3007                    }
3008                    if expecting_redirection_target {
3009                        expecting_redirection_target = false;
3010                    } else {
3011                        at_command_start = false;
3012                    }
3013                }
3014                '`' => {
3015                    let had_word = !current_word.is_empty();
3016                    Self::flush_command_subst_keyword(
3017                        &mut current_word,
3018                        &mut pending_case_headers,
3019                        &mut case_clause_depths,
3020                        depth,
3021                        &mut current_word_started_at_command_start,
3022                    );
3023                    if had_word && expecting_redirection_target {
3024                        expecting_redirection_target = false;
3025                    }
3026                    self.read_command_subst_backtick_segment_into(content);
3027                    if expecting_redirection_target {
3028                        expecting_redirection_target = false;
3029                    } else {
3030                        at_command_start = false;
3031                    }
3032                }
3033                '$' if self.second_char() == Some('\'') => {
3034                    let had_word = !current_word.is_empty();
3035                    Self::flush_command_subst_keyword(
3036                        &mut current_word,
3037                        &mut pending_case_headers,
3038                        &mut case_clause_depths,
3039                        depth,
3040                        &mut current_word_started_at_command_start,
3041                    );
3042                    if had_word && expecting_redirection_target {
3043                        expecting_redirection_target = false;
3044                    }
3045                    Self::push_capture_char(content, '$');
3046                    self.advance();
3047                    Self::push_capture_char(content, '\'');
3048                    self.advance();
3049                    while let Some(qc) = self.peek_char() {
3050                        Self::push_capture_char(content, qc);
3051                        self.advance();
3052                        if qc == '\\' {
3053                            if let Some(esc) = self.peek_char() {
3054                                Self::push_capture_char(content, esc);
3055                                self.advance();
3056                            }
3057                            continue;
3058                        }
3059                        if qc == '\'' {
3060                            break;
3061                        }
3062                    }
3063                    if expecting_redirection_target {
3064                        expecting_redirection_target = false;
3065                    } else {
3066                        at_command_start = false;
3067                    }
3068                }
3069                '\\' => {
3070                    let had_word = !current_word.is_empty();
3071                    Self::flush_command_subst_keyword(
3072                        &mut current_word,
3073                        &mut pending_case_headers,
3074                        &mut case_clause_depths,
3075                        depth,
3076                        &mut current_word_started_at_command_start,
3077                    );
3078                    if had_word && expecting_redirection_target {
3079                        expecting_redirection_target = false;
3080                    }
3081                    Self::push_capture_char(content, '\\');
3082                    self.advance();
3083                    if let Some(esc) = self.peek_char() {
3084                        Self::push_capture_char(content, esc);
3085                        self.advance();
3086                    }
3087                    if expecting_redirection_target {
3088                        expecting_redirection_target = false;
3089                    } else {
3090                        at_command_start = false;
3091                    }
3092                }
3093                '<' if self.second_char() == Some('<') => {
3094                    let word_was_redirection_fd = current_word_started_at_command_start
3095                        && !current_word.is_empty()
3096                        && current_word.chars().all(|current| current.is_ascii_digit());
3097                    Self::flush_command_subst_keyword(
3098                        &mut current_word,
3099                        &mut pending_case_headers,
3100                        &mut case_clause_depths,
3101                        depth,
3102                        &mut current_word_started_at_command_start,
3103                    );
3104                    if word_was_redirection_fd {
3105                        at_command_start = true;
3106                    }
3107
3108                    Self::push_capture_char(content, '<');
3109                    self.advance();
3110                    Self::push_capture_char(content, '<');
3111                    self.advance();
3112
3113                    if self.peek_char() == Some('<') {
3114                        Self::push_capture_char(content, '<');
3115                        self.advance();
3116                        expecting_redirection_target = true;
3117                        continue;
3118                    }
3119
3120                    let strip_tabs = if self.peek_char() == Some('-') {
3121                        Self::push_capture_char(content, '-');
3122                        self.advance();
3123                        true
3124                    } else {
3125                        false
3126                    };
3127
3128                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3129                    {
3130                        pending_heredocs.push((delimiter, strip_tabs));
3131                        expecting_redirection_target = false;
3132                    } else {
3133                        expecting_redirection_target = true;
3134                    }
3135                }
3136                '>' | '<' => {
3137                    let word_was_redirection_fd = current_word_started_at_command_start
3138                        && !current_word.is_empty()
3139                        && current_word.chars().all(|current| current.is_ascii_digit());
3140                    Self::flush_command_subst_keyword(
3141                        &mut current_word,
3142                        &mut pending_case_headers,
3143                        &mut case_clause_depths,
3144                        depth,
3145                        &mut current_word_started_at_command_start,
3146                    );
3147                    if word_was_redirection_fd {
3148                        at_command_start = true;
3149                    }
3150                    Self::push_capture_char(content, c);
3151                    self.advance();
3152                    expecting_redirection_target = true;
3153                }
3154                '\n' => {
3155                    Self::flush_command_subst_keyword(
3156                        &mut current_word,
3157                        &mut pending_case_headers,
3158                        &mut case_clause_depths,
3159                        depth,
3160                        &mut current_word_started_at_command_start,
3161                    );
3162                    Self::push_capture_char(content, '\n');
3163                    self.advance();
3164                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3165                        if !self.read_command_subst_pending_heredoc_into(
3166                            content, &delimiter, strip_tabs,
3167                        ) {
3168                            return false;
3169                        }
3170                    }
3171                    at_command_start = true;
3172                    expecting_redirection_target = false;
3173                }
3174                _ => {
3175                    if c.is_ascii_alphanumeric() || c == '_' {
3176                        if current_word.is_empty()
3177                            && !expecting_redirection_target
3178                            && at_command_start
3179                        {
3180                            current_word_started_at_command_start = true;
3181                            at_command_start = false;
3182                        }
3183                        current_word.push(c);
3184                    } else {
3185                        let had_word = !current_word.is_empty();
3186                        Self::flush_command_subst_keyword(
3187                            &mut current_word,
3188                            &mut pending_case_headers,
3189                            &mut case_clause_depths,
3190                            depth,
3191                            &mut current_word_started_at_command_start,
3192                        );
3193                        if had_word && expecting_redirection_target {
3194                            expecting_redirection_target = false;
3195                        }
3196                        match c {
3197                            ' ' | '\t' => {}
3198                            ';' | '|' | '&' => {
3199                                at_command_start = true;
3200                                expecting_redirection_target = false;
3201                            }
3202                            _ => {
3203                                if !expecting_redirection_target {
3204                                    at_command_start = false;
3205                                }
3206                            }
3207                        }
3208                    }
3209                    Self::push_capture_char(content, c);
3210                    self.advance();
3211                }
3212            }
3213        }
3214
3215        false
3216    }
3217
3218    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3219    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3220    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3221    fn read_param_expansion_into(
3222        &mut self,
3223        content: &mut Option<String>,
3224        segment_start: Position,
3225    ) -> bool {
3226        let mut borrowable = true;
3227        let mut depth = 1;
3228        let mut literal_brace_depth = 0usize;
3229        let mut in_single = false;
3230        let mut in_double = false;
3231        let mut double_quote_depth = 0usize;
3232        while let Some(c) = self.peek_char() {
3233            if in_single {
3234                match c {
3235                    '\\' => {
3236                        let escape_start = self.current_position();
3237                        if self.second_char() == Some('"') {
3238                            self.advance();
3239                            borrowable = false;
3240                            self.ensure_capture_from_source(content, segment_start, escape_start);
3241                            Self::push_capture_char(content, '"');
3242                            self.advance();
3243                        } else {
3244                            Self::push_capture_char(content, '\\');
3245                            self.advance();
3246                        }
3247                    }
3248                    '\'' => {
3249                        Self::push_capture_char(content, c);
3250                        self.advance();
3251                        in_single = false;
3252                    }
3253                    _ => {
3254                        Self::push_capture_char(content, c);
3255                        self.advance();
3256                    }
3257                }
3258                continue;
3259            }
3260
3261            match c {
3262                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3263                    self.advance();
3264                    Self::push_capture_char(content, '}');
3265                    if depth == 1
3266                        && literal_brace_depth > 0
3267                        && self.has_later_top_level_param_expansion_closer(depth)
3268                    {
3269                        literal_brace_depth -= 1;
3270                        continue;
3271                    }
3272                    depth -= 1;
3273                    if depth == 0 {
3274                        break;
3275                    }
3276                }
3277                '{' if !in_single && !in_double => {
3278                    literal_brace_depth += 1;
3279                    Self::push_capture_char(content, '{');
3280                    self.advance();
3281                }
3282                '"' => {
3283                    // Quotes inside ${...} are part of the expansion, not string delimiters
3284                    Self::push_capture_char(content, '"');
3285                    self.advance();
3286                    in_double = !in_double;
3287                    double_quote_depth = if in_double { depth } else { 0 };
3288                }
3289                '\'' => {
3290                    Self::push_capture_char(content, '\'');
3291                    self.advance();
3292                    if !in_double {
3293                        in_single = true;
3294                    }
3295                }
3296                '\\' => {
3297                    // Inside ${...} within double quotes, same escape rules apply:
3298                    // \", \\, \$, \` produce the escaped char; others keep backslash
3299                    let escape_start = self.current_position();
3300                    self.advance();
3301                    if let Some(esc) = self.peek_char() {
3302                        match esc {
3303                            '$' => {
3304                                borrowable = false;
3305                                self.ensure_capture_from_source(
3306                                    content,
3307                                    segment_start,
3308                                    escape_start,
3309                                );
3310                                Self::push_capture_char(content, '\x00');
3311                                Self::push_capture_char(content, '$');
3312                                self.advance();
3313                            }
3314                            '"' | '\\' | '`' => {
3315                                borrowable = false;
3316                                self.ensure_capture_from_source(
3317                                    content,
3318                                    segment_start,
3319                                    escape_start,
3320                                );
3321                                Self::push_capture_char(content, esc);
3322                                self.advance();
3323                            }
3324                            '}' => {
3325                                // \} should be a literal } without closing the expansion
3326                                Self::push_capture_char(content, '\\');
3327                                Self::push_capture_char(content, '}');
3328                                self.advance();
3329                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3330                            }
3331                            _ => {
3332                                Self::push_capture_char(content, '\\');
3333                                Self::push_capture_char(content, esc);
3334                                self.advance();
3335                            }
3336                        }
3337                    } else {
3338                        Self::push_capture_char(content, '\\');
3339                    }
3340                }
3341                '$' => {
3342                    Self::push_capture_char(content, '$');
3343                    self.advance();
3344                    if self.peek_char() == Some('(') {
3345                        if self.second_char() == Some('(') {
3346                            if !self.read_arithmetic_expansion_into(content) {
3347                                borrowable = false;
3348                            }
3349                        } else {
3350                            Self::push_capture_char(content, '(');
3351                            self.advance();
3352                            self.read_command_subst_into(content);
3353                        }
3354                    } else if self.peek_char() == Some('{') {
3355                        Self::push_capture_char(content, '{');
3356                        self.advance();
3357                        borrowable &= self.read_param_expansion_into(content, segment_start);
3358                    }
3359                }
3360                _ => {
3361                    Self::push_capture_char(content, c);
3362                    self.advance();
3363                }
3364            }
3365        }
3366        borrowable
3367    }
3368
3369    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3370        let mut chars = self.lookahead_chars().peekable();
3371        let mut depth = target_depth;
3372        let mut in_single = false;
3373        let mut in_double = false;
3374        let mut double_quote_depth = 0usize;
3375
3376        while let Some(ch) = chars.next() {
3377            if in_single {
3378                match ch {
3379                    '\'' => in_single = false,
3380                    '\\' if chars.peek() == Some(&'"') => {
3381                        chars.next();
3382                    }
3383                    '\\' => {}
3384                    _ => {}
3385                }
3386                continue;
3387            }
3388
3389            if in_double {
3390                match ch {
3391                    '"' => {
3392                        in_double = false;
3393                        double_quote_depth = 0;
3394                    }
3395                    '\\' => {
3396                        chars.next();
3397                    }
3398                    '$' if chars.peek() == Some(&'{') => {
3399                        chars.next();
3400                        depth += 1;
3401                    }
3402                    '}' if depth > double_quote_depth => {
3403                        depth -= 1;
3404                    }
3405                    _ => {}
3406                }
3407                continue;
3408            }
3409
3410            match ch {
3411                '\n' if depth == target_depth => return false,
3412                '\'' => in_single = true,
3413                '"' => {
3414                    in_double = true;
3415                    double_quote_depth = depth;
3416                }
3417                '\\' => {
3418                    chars.next();
3419                }
3420                '$' if chars.peek() == Some(&'{') => {
3421                    chars.next();
3422                    depth += 1;
3423                }
3424                '}' => {
3425                    if depth == target_depth {
3426                        return true;
3427                    }
3428                    depth -= 1;
3429                }
3430                _ => {}
3431            }
3432        }
3433
3434        false
3435    }
3436
3437    /// Check if the content starting with { looks like a brace expansion
3438    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3439    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3440    /// Caps lookahead to prevent O(n^2) scanning when input
3441    /// contains many unmatched `{` characters (issue #997).
3442    fn looks_like_brace_expansion(&self) -> bool {
3443        const MAX_LOOKAHEAD: usize = 10_000;
3444
3445        let mut chars = self.lookahead_chars();
3446
3447        // Skip the opening {
3448        if chars.next() != Some('{') {
3449            return false;
3450        }
3451
3452        let mut depth = 1;
3453        let mut paren_depth = 0usize;
3454        let mut has_comma = false;
3455        let mut has_dot_dot = false;
3456        let mut escaped = false;
3457        let mut in_single = false;
3458        let mut in_double = false;
3459        let mut in_backtick = false;
3460        let mut prev_char = None;
3461        let mut scanned = 0usize;
3462
3463        for ch in chars {
3464            scanned += 1;
3465            if scanned > MAX_LOOKAHEAD {
3466                return false;
3467            }
3468
3469            let brace_surface_active = !in_single && !in_double && !in_backtick;
3470            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3471
3472            match ch {
3473                _ if escaped => {
3474                    escaped = false;
3475                }
3476                '\\' if !in_single => escaped = true,
3477                '\'' if !in_double && !in_backtick => in_single = !in_single,
3478                '"' if !in_single && !in_backtick => in_double = !in_double,
3479                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3480                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3481                    paren_depth += 1
3482                }
3483                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3484                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3485                '}' if !in_single && !in_double && !in_backtick => {
3486                    depth -= 1;
3487                    if depth == 0 {
3488                        // Found matching }, check if we have brace expansion markers
3489                        return has_comma || has_dot_dot;
3490                    }
3491                }
3492                ',' if at_top_level => has_comma = true,
3493                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3494                // Brace groups have whitespace/newlines/semicolons at depth 1
3495                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3496                _ => {}
3497            }
3498            prev_char = Some(ch);
3499        }
3500
3501        false
3502    }
3503
3504    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3505        let mut brace_depth = 1usize;
3506        let mut paren_depth = 0usize;
3507        let mut escaped = false;
3508        let mut in_single = false;
3509        let mut in_double = false;
3510        let mut in_backtick = false;
3511        let mut prev_char = None;
3512
3513        while let Some(ch) = self.peek_char() {
3514            Self::push_capture_char(word, ch);
3515            self.advance();
3516
3517            if escaped {
3518                escaped = false;
3519                prev_char = Some(ch);
3520                continue;
3521            }
3522
3523            match ch {
3524                '\\' if !in_single => escaped = true,
3525                '\'' if !in_double && !in_backtick => in_single = !in_single,
3526                '"' if !in_single && !in_backtick => in_double = !in_double,
3527                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3528                '(' if !in_single
3529                    && !in_double
3530                    && !in_backtick
3531                    && (paren_depth > 0 || prev_char == Some('$')) =>
3532                {
3533                    paren_depth += 1
3534                }
3535                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3536                    paren_depth -= 1
3537                }
3538                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3539                '}' if !in_single && !in_double && !in_backtick => {
3540                    brace_depth -= 1;
3541                    if brace_depth == 0 {
3542                        break;
3543                    }
3544                }
3545                _ => {}
3546            }
3547
3548            prev_char = Some(ch);
3549        }
3550    }
3551
3552    fn consume_brace_word_body(&mut self, word: &mut String) {
3553        let mut brace_depth = 1usize;
3554        let mut paren_depth = 0usize;
3555        let mut escaped = false;
3556        let mut in_single = false;
3557        let mut in_double = false;
3558        let mut in_backtick = false;
3559        let mut prev_char = None;
3560
3561        while let Some(ch) = self.peek_char() {
3562            word.push(ch);
3563            self.advance();
3564
3565            if escaped {
3566                escaped = false;
3567                prev_char = Some(ch);
3568                continue;
3569            }
3570
3571            match ch {
3572                '\\' if !in_single => escaped = true,
3573                '\'' if !in_double && !in_backtick => in_single = !in_single,
3574                '"' if !in_single && !in_backtick => in_double = !in_double,
3575                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3576                '(' if !in_single
3577                    && !in_double
3578                    && !in_backtick
3579                    && (paren_depth > 0 || prev_char == Some('$')) =>
3580                {
3581                    paren_depth += 1
3582                }
3583                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3584                    paren_depth -= 1
3585                }
3586                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3587                '}' if !in_single && !in_double && !in_backtick => {
3588                    brace_depth -= 1;
3589                    if brace_depth == 0 {
3590                        break;
3591                    }
3592                }
3593                _ => {}
3594            }
3595
3596            prev_char = Some(ch);
3597        }
3598    }
3599
3600    /// Check whether a mid-word `{...}` segment can stay attached to the current
3601    /// word without crossing a top-level word boundary.
3602    fn looks_like_mid_word_brace_segment(&self) -> bool {
3603        const MAX_LOOKAHEAD: usize = 10_000;
3604
3605        let mut chars = self.lookahead_chars();
3606        if chars.next() != Some('{') {
3607            return false;
3608        }
3609
3610        let mut brace_depth = 1;
3611        let mut paren_depth = 0usize;
3612        let mut escaped = false;
3613        let mut in_single = false;
3614        let mut in_double = false;
3615        let mut in_backtick = false;
3616        let mut prev_char = None;
3617        let mut scanned = 0usize;
3618
3619        for ch in chars {
3620            scanned += 1;
3621            if scanned > MAX_LOOKAHEAD {
3622                return false;
3623            }
3624
3625            if !in_single
3626                && !in_double
3627                && !in_backtick
3628                && !escaped
3629                && brace_depth == 1
3630                && paren_depth == 0
3631                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3632            {
3633                return false;
3634            }
3635
3636            if escaped {
3637                escaped = false;
3638                prev_char = Some(ch);
3639                continue;
3640            }
3641
3642            match ch {
3643                '\\' => escaped = true,
3644                '\'' if !in_double && !in_backtick => in_single = !in_single,
3645                '"' if !in_single && !in_backtick => in_double = !in_double,
3646                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3647                '(' if !in_single
3648                    && !in_double
3649                    && !in_backtick
3650                    && (paren_depth > 0 || prev_char == Some('$')) =>
3651                {
3652                    paren_depth += 1
3653                }
3654                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3655                    paren_depth -= 1
3656                }
3657                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3658                '}' if !in_single && !in_double && !in_backtick => {
3659                    brace_depth -= 1;
3660                    if brace_depth == 0 {
3661                        return true;
3662                    }
3663                }
3664                _ => {}
3665            }
3666
3667            prev_char = Some(ch);
3668        }
3669
3670        false
3671    }
3672
3673    /// Check if { is followed by whitespace (brace group start)
3674    fn is_brace_group_start(&self) -> bool {
3675        let mut chars = self.lookahead_chars();
3676        // Skip the opening {
3677        if chars.next() != Some('{') {
3678            return false;
3679        }
3680        // If next char is whitespace or newline, it's a brace group
3681        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3682    }
3683
3684    /// Check whether the text after an escaped `{` looks like a brace-expansion
3685    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3686    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3687        const MAX_LOOKAHEAD: usize = 10_000;
3688
3689        let mut chars = self.lookahead_chars();
3690        let mut depth = 1;
3691        let mut has_comma = false;
3692        let mut has_dot_dot = false;
3693        let mut prev_char = None;
3694        let mut scanned = 0usize;
3695
3696        for ch in chars.by_ref() {
3697            scanned += 1;
3698            if scanned > MAX_LOOKAHEAD {
3699                return false;
3700            }
3701            match ch {
3702                '{' => depth += 1,
3703                '}' => {
3704                    depth -= 1;
3705                    if depth == 0 {
3706                        return has_comma || has_dot_dot;
3707                    }
3708                }
3709                ',' if depth == 1 => has_comma = true,
3710                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3711                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3712                _ => {}
3713            }
3714            prev_char = Some(ch);
3715        }
3716
3717        false
3718    }
3719
3720    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3721        let mut chars = self.lookahead_chars();
3722        if chars.next() != Some('{') {
3723            return false;
3724        }
3725        chars.next() == Some(')')
3726    }
3727
3728    /// Read a {literal} pattern without comma/dot-dot as a word
3729    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3730        let mut word = String::with_capacity(16);
3731
3732        if let Some('{') = self.peek_char() {
3733            word.push('{');
3734            self.advance();
3735        } else {
3736            return None;
3737        }
3738
3739        self.consume_brace_word_body(&mut word);
3740
3741        while let Some(ch) = self.peek_char() {
3742            if Self::is_word_char(ch) {
3743                if self.reinject_buf.is_empty() {
3744                    let chunk = self.cursor.eat_while(Self::is_word_char);
3745                    word.push_str(chunk);
3746                    self.advance_scanned_source_bytes(chunk.len());
3747                } else {
3748                    word.push(ch);
3749                    self.advance();
3750                }
3751            } else {
3752                break;
3753            }
3754        }
3755
3756        Some(LexedToken::owned_word(TokenKind::Word, word))
3757    }
3758
3759    /// Read a brace expansion pattern as a word
3760    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3761        let mut word = String::with_capacity(16);
3762
3763        // Read the opening {
3764        if let Some('{') = self.peek_char() {
3765            word.push('{');
3766            self.advance();
3767        } else {
3768            return None;
3769        }
3770
3771        // Read until matching }
3772        self.consume_brace_word_body(&mut word);
3773
3774        // Continue reading any suffix after the brace pattern
3775        while let Some(ch) = self.peek_char() {
3776            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3777                if ch == '{' {
3778                    // Another brace pattern - include it
3779                    word.push(ch);
3780                    self.advance();
3781                    self.consume_brace_word_body(&mut word);
3782                } else {
3783                    word.push(ch);
3784                    self.advance();
3785                }
3786            } else {
3787                break;
3788            }
3789        }
3790
3791        Some(LexedToken::owned_word(TokenKind::Word, word))
3792    }
3793
3794    /// Peek ahead (without consuming) to see if `=(` starts an associative
3795    /// compound assignment like `([key]=val ...)`.  Returns true when the
3796    /// first non-whitespace char after `(` is `[`.
3797    fn looks_like_assoc_assign(&self) -> bool {
3798        let mut chars = self.lookahead_chars();
3799        // Skip the `(` we haven't consumed yet
3800        if chars.next() != Some('(') {
3801            return false;
3802        }
3803        // Skip optional whitespace
3804        for ch in chars {
3805            match ch {
3806                ' ' | '\t' => continue,
3807                '[' => return true,
3808                _ => return false,
3809            }
3810        }
3811        false
3812    }
3813
3814    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3815        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3816    }
3817
3818    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3819        word.segments().any(|segment| {
3820            matches!(
3821                segment.kind(),
3822                LexedWordSegmentKind::SingleQuoted
3823                    | LexedWordSegmentKind::DollarSingleQuoted
3824                    | LexedWordSegmentKind::DoubleQuoted
3825                    | LexedWordSegmentKind::DollarDoubleQuoted
3826            )
3827        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3828    }
3829
3830    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3831        text.contains(['*', '?'])
3832            || text.ends_with('}') && text.contains("${")
3833            || text.ends_with(']')
3834                && text
3835                    .rfind('[')
3836                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3837    }
3838
3839    fn is_word_char(ch: char) -> bool {
3840        !matches!(
3841            ch,
3842            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3843        )
3844    }
3845
3846    const fn is_ascii_word_byte(byte: u8) -> bool {
3847        !matches!(
3848            byte,
3849            b' ' | b'\t'
3850                | b'\n'
3851                | b';'
3852                | b'|'
3853                | b'&'
3854                | b'>'
3855                | b'<'
3856                | b'('
3857                | b')'
3858                | b'{'
3859                | b'}'
3860                | b'\''
3861                | b'"'
3862        )
3863    }
3864
3865    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3866        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3867    }
3868
3869    fn is_plain_word_char(ch: char) -> bool {
3870        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3871    }
3872
3873    /// Read here document content until the delimiter line is found
3874    pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3875        let mut content = String::with_capacity(64);
3876        let mut current_line = String::with_capacity(64);
3877
3878        // Save rest of current line (after the delimiter token on the command line).
3879        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3880        // tokenize the pipe and subsequent command after the heredoc body.
3881        //
3882        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3883        // so we track quoting state and continue across newlines until quotes close.
3884        let mut rest_of_line = String::with_capacity(32);
3885        let rest_of_line_start = self.current_position();
3886        let mut in_double_quote = false;
3887        let mut in_single_quote = false;
3888        let mut in_comment = false;
3889        let mut saw_non_whitespace_tail = false;
3890        let mut consecutive_backslashes = 0usize;
3891        let mut previous_tail_char = None;
3892        while let Some(ch) = self.peek_char() {
3893            self.advance();
3894            if in_comment {
3895                if ch == '\n' {
3896                    break;
3897                }
3898                rest_of_line.push(ch);
3899                previous_tail_char = Some(ch);
3900                continue;
3901            }
3902            if ch == '#'
3903                && !in_single_quote
3904                && !in_double_quote
3905                && self.comments_enabled()
3906                && heredoc_tail_hash_starts_comment(previous_tail_char)
3907            {
3908                in_comment = true;
3909                rest_of_line.push(ch);
3910                previous_tail_char = Some(ch);
3911                consecutive_backslashes = 0;
3912                continue;
3913            }
3914            let backslash_continues_line = ch == '\\'
3915                && !in_single_quote
3916                && self.peek_char() == Some('\n')
3917                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3918                && consecutive_backslashes.is_multiple_of(2);
3919            if backslash_continues_line {
3920                rest_of_line.push(ch);
3921                rest_of_line.push('\n');
3922                self.advance();
3923                consecutive_backslashes = 0;
3924                continue;
3925            }
3926            if ch == '\n' && !in_double_quote && !in_single_quote {
3927                break;
3928            }
3929            if ch == '"' && !in_single_quote {
3930                in_double_quote = !in_double_quote;
3931            } else if ch == '\'' && !in_double_quote {
3932                in_single_quote = !in_single_quote;
3933            } else if ch == '\\' && in_double_quote {
3934                // Escaped char inside double quotes — skip the next char too
3935                rest_of_line.push(ch);
3936                if let Some(next) = self.peek_char() {
3937                    rest_of_line.push(next);
3938                    self.advance();
3939                }
3940                continue;
3941            }
3942            rest_of_line.push(ch);
3943            if !ch.is_whitespace() {
3944                saw_non_whitespace_tail = true;
3945            }
3946            if ch == '\\' && !in_single_quote {
3947                consecutive_backslashes += 1;
3948            } else {
3949                consecutive_backslashes = 0;
3950            }
3951            previous_tail_char = Some(ch);
3952        }
3953
3954        // If we just drained a heredoc replay buffer (for example when multiple
3955        // heredocs share one command line), resume tracking from the true cursor
3956        // position before we measure the body span.
3957        self.sync_offset_to_cursor();
3958        let content_start = self.current_position();
3959        let mut current_line_start = content_start;
3960        let content_end;
3961
3962        // Read lines until we find the delimiter
3963        loop {
3964            if self.reinject_buf.is_empty() {
3965                // When the body reading drains a reinject buffer (from a
3966                // previous heredoc on the same command line), the virtual
3967                // offset drifts away from the cursor. Snap it back before
3968                // any source-based work so spans and `post_heredoc_offset`
3969                // stay within bounds.
3970                self.sync_offset_to_cursor();
3971                let rest = self.cursor.rest();
3972                if rest.is_empty() {
3973                    content_end = self.current_position();
3974                    break;
3975                }
3976
3977                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3978                let line = &rest[..line_len];
3979                let has_newline = line_len < rest.len();
3980
3981                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3982                    content_end = current_line_start;
3983                    self.consume_source_bytes(line_len);
3984                    if has_newline {
3985                        self.consume_ascii_chars(1);
3986                    }
3987                    break;
3988                }
3989
3990                content.push_str(line);
3991                self.consume_source_bytes(line_len);
3992
3993                if has_newline {
3994                    self.consume_ascii_chars(1);
3995                    content.push('\n');
3996                    current_line_start = self.current_position();
3997                    continue;
3998                }
3999
4000                content_end = self.current_position();
4001                break;
4002            }
4003
4004            match self.peek_char() {
4005                Some('\n') => {
4006                    self.advance();
4007                    // Check if current line matches delimiter
4008                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4009                        content_end = current_line_start;
4010                        break;
4011                    }
4012                    content.push_str(&current_line);
4013                    content.push('\n');
4014                    current_line.clear();
4015                    current_line_start = self.current_position();
4016                }
4017                Some(ch) => {
4018                    current_line.push(ch);
4019                    self.advance();
4020                }
4021                None => {
4022                    // End of input - check last line
4023                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4024                        content_end = current_line_start;
4025                        break;
4026                    }
4027                    if !current_line.is_empty() {
4028                        content.push_str(&current_line);
4029                    }
4030                    content_end = self.current_position();
4031                    break;
4032                }
4033            }
4034        }
4035
4036        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4037        // redirects, command words, additional heredocs) stay visible to the
4038        // parser. Always replay a terminating newline so parsing stops before
4039        // tokens that originally lived on later source lines, like `}` or `do`.
4040        let post_heredoc_offset = self.offset;
4041        self.offset = rest_of_line_start.offset;
4042        for ch in rest_of_line.chars() {
4043            self.reinject_buf.push_back(ch);
4044        }
4045        self.reinject_buf.push_back('\n');
4046        self.reinject_resume_offset = Some(post_heredoc_offset);
4047
4048        HeredocRead {
4049            content,
4050            content_span: Span::from_positions(content_start, content_end),
4051        }
4052    }
4053
4054    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4055        let mut chars = self.cursor.rest().chars();
4056        if chars.next() != Some('\n') {
4057            return false;
4058        }
4059
4060        for ch in chars {
4061            if matches!(ch, ' ' | '\t') {
4062                continue;
4063            }
4064            if ch == '\n' {
4065                return false;
4066            }
4067            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4068                || (ch == '#' && self.comments_enabled());
4069        }
4070
4071        false
4072    }
4073}
4074
4075fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4076    let line = if strip_tabs {
4077        line.trim_start_matches('\t')
4078    } else {
4079        line
4080    };
4081
4082    if line == delimiter {
4083        return true;
4084    }
4085
4086    let Some(trailing) = line.strip_prefix(delimiter) else {
4087        return false;
4088    };
4089
4090    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4091}
4092
4093fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4094    previous_tail_char.is_none_or(|prev| {
4095        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4096    })
4097}
4098
4099fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4100    let ch = input.get(index..)?.chars().next()?;
4101    Some((ch, index + ch.len_utf8()))
4102}
4103
4104fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4105    let mut index = 0usize;
4106    let mut depth = 0usize;
4107    let mut in_single = false;
4108    let mut in_double = false;
4109    let mut in_backtick = false;
4110    let mut escaped = false;
4111
4112    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4113        let was_escaped = escaped;
4114        if ch == '\\' && !in_single {
4115            escaped = !escaped;
4116            index = next_index;
4117            continue;
4118        }
4119        escaped = false;
4120
4121        match ch {
4122            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4123            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4124            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4125            '(' if !in_single
4126                && !in_double
4127                && !in_backtick
4128                && !was_escaped
4129                && prefix[next_index..].starts_with('(') =>
4130            {
4131                depth += 1;
4132                index = next_index + '('.len_utf8();
4133                continue;
4134            }
4135            ')' if !in_single
4136                && !in_double
4137                && !in_backtick
4138                && !was_escaped
4139                && prefix[next_index..].starts_with(')') =>
4140            {
4141                depth = depth.saturating_sub(1);
4142                index = next_index + ')'.len_utf8();
4143                continue;
4144            }
4145            _ => {}
4146        }
4147
4148        index = next_index;
4149    }
4150
4151    depth > 0
4152}
4153
4154fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4155    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4156    let prefix = &input[line_start..index];
4157    line_has_unclosed_double_paren(prefix)
4158}
4159
4160fn hash_starts_comment(input: &str, index: usize) -> bool {
4161    if inside_unclosed_double_paren_on_line(input, index) {
4162        return false;
4163    }
4164
4165    let next = &input[index + '#'.len_utf8()..];
4166    input[..index]
4167        .chars()
4168        .next_back()
4169        .is_none_or(|prev| match prev {
4170            '(' => {
4171                let whitespace_index = next.find(char::is_whitespace);
4172                let close_index = next.find(')');
4173
4174                match (whitespace_index, close_index) {
4175                    (Some(whitespace), Some(close)) => whitespace < close,
4176                    (Some(_), None) | (None, None) => true,
4177                    (None, Some(_)) => false,
4178                }
4179            }
4180            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4181        })
4182}
4183
4184fn heredoc_delimiter_is_terminator(
4185    ch: char,
4186    in_single: bool,
4187    in_double: bool,
4188    escaped: bool,
4189) -> bool {
4190    !in_single
4191        && !in_double
4192        && !escaped
4193        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4194}
4195
4196fn scan_double_quoted_command_substitution_segment(
4197    input: &str,
4198    mut index: usize,
4199    subst_depth: usize,
4200) -> Option<usize> {
4201    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4202        match ch {
4203            '"' => return Some(next_index),
4204            '\\' => {
4205                index = next_index;
4206                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4207                    index = escaped_next;
4208                }
4209            }
4210            '$' if input[next_index..].starts_with('{') => {
4211                let consumed = scan_command_subst_parameter_expansion_len(
4212                    &input[next_index + '{'.len_utf8()..],
4213                    subst_depth,
4214                )?;
4215                index = next_index + '{'.len_utf8() + consumed;
4216            }
4217            '$' if input[next_index..].starts_with('(')
4218                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4219            {
4220                let consumed = scan_command_substitution_body_len_inner(
4221                    &input[next_index + '('.len_utf8()..],
4222                    subst_depth + 1,
4223                )?;
4224                index = next_index + '('.len_utf8() + consumed;
4225            }
4226            _ => index = next_index,
4227        }
4228    }
4229
4230    None
4231}
4232
4233fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4234    let mut index = 0usize;
4235    let mut in_single = false;
4236    let mut in_double = false;
4237    let mut in_ansi_c_single = false;
4238    let mut in_backtick = false;
4239    let mut escaped = false;
4240    let mut ansi_c_quote_pending = false;
4241
4242    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4243        let was_escaped = escaped;
4244        if ch == '\\' && !in_single {
4245            escaped = !escaped;
4246            index = next_index;
4247            ansi_c_quote_pending = false;
4248            continue;
4249        }
4250        escaped = false;
4251
4252        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4253            if input[next_index..].starts_with('{')
4254                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4255                    &input[next_index + '{'.len_utf8()..],
4256                    subst_depth,
4257                )
4258            {
4259                index = next_index + '{'.len_utf8() + consumed;
4260                ansi_c_quote_pending = false;
4261                continue;
4262            }
4263
4264            if input[next_index..].starts_with('(')
4265                && !input[next_index + '('.len_utf8()..].starts_with('(')
4266                && let Some(consumed) = scan_command_substitution_body_len_inner(
4267                    &input[next_index + '('.len_utf8()..],
4268                    subst_depth + 1,
4269                )
4270            {
4271                index = next_index + '('.len_utf8() + consumed;
4272                ansi_c_quote_pending = false;
4273                continue;
4274            }
4275        }
4276
4277        if !in_single
4278            && !in_ansi_c_single
4279            && !in_double
4280            && !in_backtick
4281            && !was_escaped
4282            && matches!(ch, '<' | '>')
4283            && input[next_index..].starts_with('(')
4284            && let Some(consumed) = scan_command_substitution_body_len_inner(
4285                &input[next_index + '('.len_utf8()..],
4286                subst_depth + 1,
4287            )
4288        {
4289            index = next_index + '('.len_utf8() + consumed;
4290            ansi_c_quote_pending = false;
4291            continue;
4292        }
4293
4294        match ch {
4295            '\'' if !in_double && !in_backtick && !was_escaped => {
4296                if in_ansi_c_single {
4297                    in_ansi_c_single = false;
4298                } else if !in_single && ansi_c_quote_pending {
4299                    in_ansi_c_single = true;
4300                } else {
4301                    in_single = !in_single;
4302                }
4303            }
4304            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4305                in_double = !in_double
4306            }
4307            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4308                in_backtick = !in_backtick
4309            }
4310            '}' if !in_single
4311                && !in_ansi_c_single
4312                && !in_double
4313                && !in_backtick
4314                && !was_escaped =>
4315            {
4316                return Some(next_index);
4317            }
4318            _ => {}
4319        }
4320
4321        ansi_c_quote_pending = ch == '$'
4322            && !in_single
4323            && !in_ansi_c_single
4324            && !in_double
4325            && !in_backtick
4326            && !was_escaped;
4327        index = next_index;
4328    }
4329
4330    None
4331}
4332
4333fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4334    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4335        if !matches!(ch, ' ' | '\t') {
4336            break;
4337        }
4338        index = next_index;
4339    }
4340
4341    let start = index;
4342    let mut cooked = String::new();
4343    let mut in_single = false;
4344    let mut in_double = false;
4345    let mut escaped = false;
4346
4347    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4348        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4349            break;
4350        }
4351
4352        index = next_index;
4353        if escaped {
4354            cooked.push(ch);
4355            escaped = false;
4356            continue;
4357        }
4358
4359        match ch {
4360            '\\' if !in_single => escaped = true,
4361            '\'' if !in_double => in_single = !in_single,
4362            '"' if !in_single => in_double = !in_double,
4363            _ => cooked.push(ch),
4364        }
4365    }
4366
4367    (index > start).then_some((index, cooked))
4368}
4369
4370fn skip_command_subst_pending_heredoc(
4371    input: &str,
4372    mut index: usize,
4373    delimiter: &str,
4374    strip_tabs: bool,
4375) -> usize {
4376    while index <= input.len() {
4377        let rest = &input[index..];
4378        let line_len = rest.find('\n').unwrap_or(rest.len());
4379        let line = &rest[..line_len];
4380        let has_newline = line_len < rest.len();
4381
4382        index += line_len;
4383        if has_newline {
4384            index += '\n'.len_utf8();
4385        }
4386
4387        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4388            return index;
4389        }
4390    }
4391
4392    index
4393}
4394
4395fn scan_command_subst_ansi_c_single_quoted_segment(
4396    input: &str,
4397    quote_index: usize,
4398) -> Option<usize> {
4399    let mut index = quote_index + '\''.len_utf8();
4400
4401    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4402        index = next_index;
4403        if ch == '\\' {
4404            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4405                index = escaped_next;
4406            }
4407            continue;
4408        }
4409
4410        if ch == '\'' {
4411            return Some(index);
4412        }
4413    }
4414
4415    None
4416}
4417
4418fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4419    let mut index = start;
4420
4421    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4422        index = next_index;
4423        if ch == '\\' {
4424            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4425                index = escaped_next;
4426            }
4427            continue;
4428        }
4429
4430        if ch == '`' {
4431            return Some(index);
4432        }
4433    }
4434
4435    None
4436}
4437
4438fn flush_scanned_command_subst_keyword(
4439    current_word: &mut String,
4440    pending_case_headers: &mut usize,
4441    case_clause_depths: &mut SmallVec<[usize; 4]>,
4442    depth: usize,
4443    word_started_at_command_start: &mut bool,
4444) {
4445    if current_word.is_empty() {
4446        *word_started_at_command_start = false;
4447        return;
4448    }
4449
4450    match current_word.as_str() {
4451        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4452        "in" if *pending_case_headers > 0 => {
4453            *pending_case_headers -= 1;
4454            case_clause_depths.push(depth);
4455        }
4456        "esac" if *word_started_at_command_start => {
4457            case_clause_depths.pop();
4458        }
4459        _ => {}
4460    }
4461
4462    current_word.clear();
4463    *word_started_at_command_start = false;
4464}
4465
4466fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4467    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4468        return None;
4469    }
4470
4471    let mut index = 0usize;
4472    let mut depth = 1;
4473    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4474    let mut pending_case_headers = 0usize;
4475    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4476    let mut current_word = String::with_capacity(16);
4477    let mut at_command_start = true;
4478    let mut expecting_redirection_target = false;
4479    let mut current_word_started_at_command_start = false;
4480
4481    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4482        match ch {
4483            '#' if hash_starts_comment(input, index) => {
4484                let had_word = !current_word.is_empty();
4485                flush_scanned_command_subst_keyword(
4486                    &mut current_word,
4487                    &mut pending_case_headers,
4488                    &mut case_clause_depths,
4489                    depth,
4490                    &mut current_word_started_at_command_start,
4491                );
4492                if had_word && expecting_redirection_target {
4493                    expecting_redirection_target = false;
4494                }
4495                index = next_index;
4496                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4497                    index = comment_next;
4498                    if comment_ch == '\n' {
4499                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4500                            index = skip_command_subst_pending_heredoc(
4501                                input, index, &delimiter, strip_tabs,
4502                            );
4503                        }
4504                        at_command_start = true;
4505                        expecting_redirection_target = false;
4506                        break;
4507                    }
4508                }
4509            }
4510            '(' => {
4511                flush_scanned_command_subst_keyword(
4512                    &mut current_word,
4513                    &mut pending_case_headers,
4514                    &mut case_clause_depths,
4515                    depth,
4516                    &mut current_word_started_at_command_start,
4517                );
4518                depth += 1;
4519                index = next_index;
4520                at_command_start = true;
4521                expecting_redirection_target = false;
4522            }
4523            ')' => {
4524                flush_scanned_command_subst_keyword(
4525                    &mut current_word,
4526                    &mut pending_case_headers,
4527                    &mut case_clause_depths,
4528                    depth,
4529                    &mut current_word_started_at_command_start,
4530                );
4531                if case_clause_depths
4532                    .last()
4533                    .is_some_and(|case_depth| *case_depth == depth)
4534                {
4535                    index = next_index;
4536                    at_command_start = true;
4537                    expecting_redirection_target = false;
4538                    continue;
4539                }
4540                depth -= 1;
4541                index = next_index;
4542                if depth == 0 {
4543                    return Some(index);
4544                }
4545                at_command_start = false;
4546                expecting_redirection_target = false;
4547            }
4548            '"' => {
4549                let had_word = !current_word.is_empty();
4550                flush_scanned_command_subst_keyword(
4551                    &mut current_word,
4552                    &mut pending_case_headers,
4553                    &mut case_clause_depths,
4554                    depth,
4555                    &mut current_word_started_at_command_start,
4556                );
4557                if had_word && expecting_redirection_target {
4558                    expecting_redirection_target = false;
4559                }
4560                index = scan_double_quoted_command_substitution_segment(
4561                    input,
4562                    next_index,
4563                    subst_depth,
4564                )?;
4565                if expecting_redirection_target {
4566                    expecting_redirection_target = false;
4567                } else {
4568                    at_command_start = false;
4569                }
4570            }
4571            '\'' => {
4572                let had_word = !current_word.is_empty();
4573                flush_scanned_command_subst_keyword(
4574                    &mut current_word,
4575                    &mut pending_case_headers,
4576                    &mut case_clause_depths,
4577                    depth,
4578                    &mut current_word_started_at_command_start,
4579                );
4580                if had_word && expecting_redirection_target {
4581                    expecting_redirection_target = false;
4582                }
4583                index = next_index;
4584                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4585                    index = quoted_next;
4586                    if quoted_ch == '\'' {
4587                        break;
4588                    }
4589                }
4590                if expecting_redirection_target {
4591                    expecting_redirection_target = false;
4592                } else {
4593                    at_command_start = false;
4594                }
4595            }
4596            '`' => {
4597                let had_word = !current_word.is_empty();
4598                flush_scanned_command_subst_keyword(
4599                    &mut current_word,
4600                    &mut pending_case_headers,
4601                    &mut case_clause_depths,
4602                    depth,
4603                    &mut current_word_started_at_command_start,
4604                );
4605                if had_word && expecting_redirection_target {
4606                    expecting_redirection_target = false;
4607                }
4608                index = scan_command_subst_backtick_segment(input, next_index)?;
4609                if expecting_redirection_target {
4610                    expecting_redirection_target = false;
4611                } else {
4612                    at_command_start = false;
4613                }
4614            }
4615            '$' if input[next_index..].starts_with('\'') => {
4616                let had_word = !current_word.is_empty();
4617                flush_scanned_command_subst_keyword(
4618                    &mut current_word,
4619                    &mut pending_case_headers,
4620                    &mut case_clause_depths,
4621                    depth,
4622                    &mut current_word_started_at_command_start,
4623                );
4624                if had_word && expecting_redirection_target {
4625                    expecting_redirection_target = false;
4626                }
4627                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4628                if expecting_redirection_target {
4629                    expecting_redirection_target = false;
4630                } else {
4631                    at_command_start = false;
4632                }
4633            }
4634            '\\' => {
4635                let had_word = !current_word.is_empty();
4636                flush_scanned_command_subst_keyword(
4637                    &mut current_word,
4638                    &mut pending_case_headers,
4639                    &mut case_clause_depths,
4640                    depth,
4641                    &mut current_word_started_at_command_start,
4642                );
4643                if had_word && expecting_redirection_target {
4644                    expecting_redirection_target = false;
4645                }
4646                index = next_index;
4647                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4648                    index = escaped_next;
4649                }
4650                if expecting_redirection_target {
4651                    expecting_redirection_target = false;
4652                } else {
4653                    at_command_start = false;
4654                }
4655            }
4656            '>' => {
4657                let word_was_redirection_fd = current_word_started_at_command_start
4658                    && !current_word.is_empty()
4659                    && current_word.chars().all(|current| current.is_ascii_digit());
4660                flush_scanned_command_subst_keyword(
4661                    &mut current_word,
4662                    &mut pending_case_headers,
4663                    &mut case_clause_depths,
4664                    depth,
4665                    &mut current_word_started_at_command_start,
4666                );
4667                if word_was_redirection_fd {
4668                    at_command_start = true;
4669                }
4670                index = next_index;
4671                expecting_redirection_target = true;
4672            }
4673            '<' if input[next_index..].starts_with('<') => {
4674                let word_was_redirection_fd = current_word_started_at_command_start
4675                    && !current_word.is_empty()
4676                    && current_word.chars().all(|current| current.is_ascii_digit());
4677                let had_word = !current_word.is_empty();
4678                flush_scanned_command_subst_keyword(
4679                    &mut current_word,
4680                    &mut pending_case_headers,
4681                    &mut case_clause_depths,
4682                    depth,
4683                    &mut current_word_started_at_command_start,
4684                );
4685                if had_word && expecting_redirection_target {
4686                    expecting_redirection_target = false;
4687                }
4688                if word_was_redirection_fd {
4689                    at_command_start = true;
4690                }
4691                if inside_unclosed_double_paren_on_line(input, index) {
4692                    index = next_index + '<'.len_utf8();
4693                    continue;
4694                }
4695
4696                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4697                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4698                    expecting_redirection_target = true;
4699                    continue;
4700                }
4701
4702                let strip_tabs = input[next_index..].starts_with("<-");
4703                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4704                if let Some((delimiter_index, delimiter)) =
4705                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4706                {
4707                    pending_heredocs.push((delimiter, strip_tabs));
4708                    index = delimiter_index;
4709                    expecting_redirection_target = false;
4710                } else {
4711                    index = next_index;
4712                    expecting_redirection_target = true;
4713                }
4714            }
4715            '\n' => {
4716                flush_scanned_command_subst_keyword(
4717                    &mut current_word,
4718                    &mut pending_case_headers,
4719                    &mut case_clause_depths,
4720                    depth,
4721                    &mut current_word_started_at_command_start,
4722                );
4723                index = next_index;
4724                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4725                    index =
4726                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4727                }
4728                at_command_start = true;
4729                expecting_redirection_target = false;
4730            }
4731            '$' if input[next_index..].starts_with('{') => {
4732                let had_word = !current_word.is_empty();
4733                flush_scanned_command_subst_keyword(
4734                    &mut current_word,
4735                    &mut pending_case_headers,
4736                    &mut case_clause_depths,
4737                    depth,
4738                    &mut current_word_started_at_command_start,
4739                );
4740                if had_word && expecting_redirection_target {
4741                    expecting_redirection_target = false;
4742                }
4743                let consumed = scan_command_subst_parameter_expansion_len(
4744                    &input[next_index + '{'.len_utf8()..],
4745                    subst_depth,
4746                )?;
4747                index = next_index + '{'.len_utf8() + consumed;
4748                if expecting_redirection_target {
4749                    expecting_redirection_target = false;
4750                } else {
4751                    at_command_start = false;
4752                }
4753            }
4754            '$' if input[next_index..].starts_with('(')
4755                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4756            {
4757                let had_word = !current_word.is_empty();
4758                flush_scanned_command_subst_keyword(
4759                    &mut current_word,
4760                    &mut pending_case_headers,
4761                    &mut case_clause_depths,
4762                    depth,
4763                    &mut current_word_started_at_command_start,
4764                );
4765                if had_word && expecting_redirection_target {
4766                    expecting_redirection_target = false;
4767                }
4768                let consumed = scan_command_substitution_body_len_inner(
4769                    &input[next_index + '('.len_utf8()..],
4770                    subst_depth + 1,
4771                )?;
4772                index = next_index + '('.len_utf8() + consumed;
4773                if expecting_redirection_target {
4774                    expecting_redirection_target = false;
4775                } else {
4776                    at_command_start = false;
4777                }
4778            }
4779            _ => {
4780                if ch.is_ascii_alphanumeric() || ch == '_' {
4781                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4782                    {
4783                        current_word_started_at_command_start = true;
4784                        at_command_start = false;
4785                    }
4786                    current_word.push(ch);
4787                } else {
4788                    let had_word = !current_word.is_empty();
4789                    flush_scanned_command_subst_keyword(
4790                        &mut current_word,
4791                        &mut pending_case_headers,
4792                        &mut case_clause_depths,
4793                        depth,
4794                        &mut current_word_started_at_command_start,
4795                    );
4796                    if had_word && expecting_redirection_target {
4797                        expecting_redirection_target = false;
4798                    }
4799                    match ch {
4800                        ' ' | '\t' => {}
4801                        ';' | '|' | '&' => {
4802                            at_command_start = true;
4803                            expecting_redirection_target = false;
4804                        }
4805                        _ => {
4806                            if !expecting_redirection_target {
4807                                at_command_start = false;
4808                            }
4809                        }
4810                    }
4811                }
4812                index = next_index;
4813            }
4814        }
4815    }
4816
4817    None
4818}
4819
4820pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4821    scan_command_substitution_body_len_inner(input, 0)
4822}
4823
4824#[cfg(test)]
4825mod tests {
4826    use super::*;
4827
4828    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4829        match token.kind {
4830            kind if kind.is_word_like() => token.word_string(),
4831            TokenKind::Comment => token
4832                .span
4833                .slice(source)
4834                .strip_prefix('#')
4835                .map(str::to_string),
4836            TokenKind::Error => token
4837                .error_kind()
4838                .map(LexerErrorKind::message)
4839                .map(str::to_string),
4840            _ => None,
4841        }
4842    }
4843
4844    fn assert_next_token(
4845        lexer: &mut Lexer<'_>,
4846        expected_kind: TokenKind,
4847        expected_text: Option<&str>,
4848    ) {
4849        let token = lexer.next_lexed_token().unwrap();
4850        assert_eq!(token.kind, expected_kind);
4851        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4852    }
4853
4854    fn assert_next_token_with_comments(
4855        lexer: &mut Lexer<'_>,
4856        expected_kind: TokenKind,
4857        expected_text: Option<&str>,
4858    ) {
4859        let token = lexer.next_lexed_token_with_comments().unwrap();
4860        assert_eq!(token.kind, expected_kind);
4861        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4862    }
4863
4864    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4865        let mut lexer = Lexer::new(input);
4866
4867        while let Some(token) = lexer.next_lexed_token() {
4868            if token.kind == TokenKind::Newline {
4869                continue;
4870            }
4871
4872            assert_eq!(
4873                token.span.start.line, token.span.end.line,
4874                "token should stay on one line: {:?}",
4875                token
4876            );
4877        }
4878    }
4879
4880    #[test]
4881    fn test_simple_words() {
4882        let mut lexer = Lexer::new("echo hello world");
4883
4884        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4885        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4886        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4887        assert!(lexer.next_lexed_token().is_none());
4888    }
4889
4890    #[test]
4891    fn test_single_quoted_string() {
4892        let mut lexer = Lexer::new("echo 'hello world'");
4893
4894        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4895        // Single-quoted strings return LiteralWord (no variable expansion)
4896        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4897        assert!(lexer.next_lexed_token().is_none());
4898    }
4899
4900    #[test]
4901    fn test_double_quoted_string() {
4902        let mut lexer = Lexer::new("echo \"hello world\"");
4903
4904        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4905        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4906        assert!(lexer.next_lexed_token().is_none());
4907    }
4908
4909    #[test]
4910    fn test_brace_expansion_token_ignores_quoted_closers() {
4911        let mut lexer = Lexer::new("echo {\"}\",a}\n");
4912
4913        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4914        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
4915        assert_next_token(&mut lexer, TokenKind::Newline, None);
4916        assert!(lexer.next_lexed_token().is_none());
4917    }
4918
4919    #[test]
4920    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
4921        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
4922
4923        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4924        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
4925        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
4926        assert_next_token(&mut lexer, TokenKind::Newline, None);
4927        assert!(lexer.next_lexed_token().is_none());
4928    }
4929
4930    #[test]
4931    fn test_double_quoted_expansion_token_keeps_source_backing() {
4932        let source = r#""$bar""#;
4933        let mut lexer = Lexer::new(source);
4934
4935        let token = lexer.next_lexed_token().unwrap();
4936        assert_eq!(token.kind, TokenKind::QuotedWord);
4937        assert_eq!(token.word_text(), Some("$bar"));
4938
4939        let word = token.word().unwrap();
4940        let segment = word.single_segment().unwrap();
4941        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4942        assert_eq!(segment.span().unwrap().slice(source), "$bar");
4943    }
4944
4945    #[test]
4946    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4947        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4948        let mut lexer = Lexer::new(source);
4949
4950        let token = lexer.next_lexed_token().unwrap();
4951        assert_eq!(token.kind, TokenKind::QuotedWord);
4952        assert_eq!(
4953            token.word_text(),
4954            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4955        );
4956    }
4957
4958    #[test]
4959    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4960        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4961        let mut lexer = Lexer::new(source);
4962
4963        let token = lexer.next_lexed_token().unwrap();
4964        assert_eq!(token.kind, TokenKind::QuotedWord);
4965        assert_eq!(
4966            token.word_text(),
4967            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4968        );
4969    }
4970
4971    #[test]
4972    fn test_mixed_word_keeps_segment_kinds() {
4973        let source = r#"foo"bar"'baz'"#;
4974        let mut lexer = Lexer::new(source);
4975
4976        let token = lexer.next_lexed_token().unwrap();
4977        assert_eq!(token.kind, TokenKind::Word);
4978
4979        let word = token.word().unwrap();
4980        let segments: Vec<_> = word
4981            .segments()
4982            .map(|segment| (segment.kind(), segment.as_str().to_string()))
4983            .collect();
4984
4985        assert_eq!(
4986            segments,
4987            vec![
4988                (LexedWordSegmentKind::Plain, "foo".to_string()),
4989                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4990                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4991            ]
4992        );
4993        assert_eq!(word.joined_text(), "foobarbaz");
4994        assert_eq!(
4995            word.segments()
4996                .next()
4997                .and_then(LexedWordSegment::span)
4998                .unwrap()
4999                .slice(source),
5000            "foo"
5001        );
5002    }
5003
5004    #[test]
5005    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5006        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5007
5008        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5009        let body = &source[..consumed];
5010
5011        assert!(body.contains("field, direction"));
5012        assert!(body.ends_with(')'));
5013    }
5014
5015    #[test]
5016    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5017        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5018
5019        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5020        let body = &source[..consumed];
5021
5022        assert!(body.contains("printf '%s' y"));
5023        assert!(body.ends_with(')'));
5024    }
5025
5026    #[test]
5027    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5028        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5029
5030        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5031        let body = &source[..consumed];
5032
5033        assert!(body.contains("printf %s 1,2"));
5034        assert!(body.ends_with(')'));
5035    }
5036
5037    #[test]
5038    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5039        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5040
5041        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5042        let body = &source[..consumed];
5043
5044        assert!(body.contains("field, direction"));
5045        assert!(body.ends_with(')'));
5046    }
5047
5048    #[test]
5049    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5050        let source = "printf %s ${x//foo/)},1)\"";
5051
5052        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5053        let body = &source[..consumed];
5054
5055        assert!(body.contains("${x//foo/)},1"));
5056        assert!(body.ends_with(')'));
5057    }
5058
5059    #[test]
5060    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5061        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5062
5063        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5064        let body = &source[..consumed];
5065
5066        assert!(body.contains("printf %s 1,2"));
5067        assert!(body.ends_with(')'));
5068    }
5069
5070    #[test]
5071    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5072        let source = "[[ \"$buf\" == (#b)(*) ]]";
5073        let index = source.find('#').expect("expected hash");
5074
5075        assert!(!hash_starts_comment(source, index));
5076    }
5077
5078    #[test]
5079    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5080        let source = "(#comment with )";
5081        let index = source.find('#').expect("expected hash");
5082
5083        assert!(hash_starts_comment(source, index));
5084    }
5085
5086    #[test]
5087    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5088        let source = "(( #c < 256 ))";
5089        let index = source.find('#').expect("expected hash");
5090
5091        assert!(!hash_starts_comment(source, index));
5092    }
5093
5094    #[test]
5095    fn test_hash_starts_comment_respects_quoted_double_parens() {
5096        let source = "printf '((' # comment";
5097        let index = source.find('#').expect("expected hash");
5098
5099        assert!(hash_starts_comment(source, index));
5100    }
5101
5102    #[test]
5103    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5104        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5105
5106        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5107        let body = &source[..consumed];
5108
5109        assert!(body.contains("printf %s 1,2"));
5110        assert!(body.ends_with(')'));
5111    }
5112
5113    #[test]
5114    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5115        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5116
5117        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5118        let body = &source[..consumed];
5119
5120        assert!(body.contains("printf %s 1,2"));
5121        assert!(body.ends_with(')'));
5122    }
5123
5124    #[test]
5125    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5126        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5127
5128        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5129        let body = &source[..consumed];
5130
5131        assert!(body.contains("printf %s 1,2"));
5132        assert!(body.ends_with(')'));
5133    }
5134
5135    #[test]
5136    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5137        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5138
5139        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5140        let body = &source[..consumed];
5141
5142        assert!(body.contains("printf %s 1,2"));
5143        assert!(body.ends_with("))"));
5144    }
5145
5146    #[test]
5147    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5148        let source = "printf %s 1,2; echo case in)\"";
5149
5150        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5151        let body = &source[..consumed];
5152
5153        assert!(body.contains("echo case in"));
5154        assert!(body.ends_with(')'));
5155    }
5156
5157    #[test]
5158    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5159        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5160
5161        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5162        let body = &source[..consumed];
5163
5164        assert!(body.contains("$'a\\'b'"));
5165        assert!(body.contains("printf %s 1,2"));
5166        assert!(body.ends_with(')'));
5167    }
5168
5169    #[test]
5170    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5171        let source = "printf %s `echo foo)`; printf %s ok)\"";
5172
5173        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5174        let body = &source[..consumed];
5175
5176        assert!(body.contains("`echo foo)`"));
5177        assert!(body.contains("printf %s ok"));
5178        assert!(body.ends_with(')'));
5179    }
5180
5181    #[test]
5182    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5183        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5184
5185        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5186        let body = &source[..consumed];
5187
5188        assert!(body.contains("${x/`echo }`/foo)},1"));
5189        assert!(body.ends_with(')'));
5190    }
5191
5192    #[test]
5193    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5194     {
5195        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5196
5197        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5198        let body = &source[..consumed];
5199
5200        assert!(body.contains("${x/<(echo })/foo)},1"));
5201        assert!(body.ends_with(')'));
5202    }
5203
5204    #[test]
5205    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5206        let source = "printf %s 1,2; echo case in)";
5207
5208        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5209        let body = &source[..consumed];
5210
5211        assert_eq!(body, source);
5212    }
5213
5214    #[test]
5215    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5216        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5217
5218        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5219        let body = &source[..consumed];
5220
5221        assert_eq!(body, source);
5222    }
5223
5224    #[test]
5225    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5226        let source = "printf %s `echo foo)`; printf %s ok)";
5227
5228        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5229        let body = &source[..consumed];
5230
5231        assert_eq!(body, source);
5232    }
5233
5234    #[test]
5235    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5236        let source = "echo \"$line\" | cut -d' ' -f2-)";
5237
5238        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5239        let body = &source[..consumed];
5240
5241        assert_eq!(body, source);
5242    }
5243
5244    #[test]
5245    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5246        let source = "echo \"${@}\" | tr -d '[:space:]')";
5247
5248        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5249        let body = &source[..consumed];
5250
5251        assert_eq!(body, source);
5252    }
5253
5254    #[test]
5255    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5256        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5257
5258        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5259        let body = &source[..consumed];
5260
5261        assert_eq!(body, source);
5262    }
5263
5264    #[test]
5265    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5266        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5267
5268        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5269        let body = &source[..consumed];
5270
5271        assert_eq!(body, source);
5272    }
5273
5274    #[test]
5275    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5276        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5277        let mut lexer = Lexer::new(source);
5278
5279        let first = lexer.next_lexed_token().expect("expected first token");
5280        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5281        assert_eq!(first.word_string().as_deref(), Some("echo"));
5282
5283        let second = lexer.next_lexed_token().expect("expected second token");
5284        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5285        assert_eq!(
5286            second.word_string().as_deref(),
5287            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5288        );
5289    }
5290
5291    #[test]
5292    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5293        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5294        let start = source.find("$(").expect("expected command substitution") + 2;
5295        let consumed =
5296            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5297        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5298    }
5299
5300    #[test]
5301    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5302        let source = "echo $(echo $(basename $filename .fuzz))";
5303        let start = source.find("$(").expect("expected command substitution") + 2;
5304        let consumed =
5305            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5306        assert_eq!(
5307            &source[start..start + consumed],
5308            "echo $(basename $filename .fuzz))"
5309        );
5310    }
5311
5312    #[test]
5313    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5314        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5315        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5316        assert_eq!(consumed, source.len());
5317    }
5318
5319    #[test]
5320    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5321        let source = "'foo'bar";
5322        let mut lexer = Lexer::new(source);
5323
5324        let token = lexer.next_lexed_token().unwrap();
5325        assert_eq!(token.kind, TokenKind::LiteralWord);
5326
5327        let word = token.word().unwrap();
5328        let segments: Vec<_> = word
5329            .segments()
5330            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5331            .collect();
5332
5333        assert_eq!(
5334            segments,
5335            vec![
5336                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5337                (LexedWordSegmentKind::Plain, "bar".to_string()),
5338            ]
5339        );
5340        assert_eq!(word.joined_text(), "foobar");
5341        assert_eq!(
5342            word.segments()
5343                .nth(1)
5344                .and_then(LexedWordSegment::span)
5345                .unwrap()
5346                .slice(source),
5347            "bar"
5348        );
5349    }
5350
5351    #[test]
5352    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5353        let source = "$(printf hi)";
5354        let mut lexer = Lexer::new(source);
5355
5356        let token = lexer.next_lexed_token().unwrap();
5357        assert_eq!(token.kind, TokenKind::Word);
5358
5359        let word = token.word().unwrap();
5360        let segment = word.single_segment().unwrap();
5361        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5362        assert_eq!(segment.as_str(), source);
5363        assert_eq!(segment.span().unwrap().slice(source), source);
5364    }
5365
5366    #[test]
5367    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5368        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5369        let mut lexer = Lexer::new(source);
5370
5371        let token = lexer.next_lexed_token().unwrap();
5372        assert_eq!(token.kind, TokenKind::Word);
5373
5374        let word = token.word().unwrap();
5375        let segment = word.single_segment().unwrap();
5376        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5377        assert_eq!(segment.as_str(), source);
5378        assert_eq!(segment.span().unwrap().slice(source), source);
5379    }
5380
5381    #[test]
5382    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5383        let source = "\"foo\"$(printf hi)";
5384        let mut lexer = Lexer::new(source);
5385
5386        let token = lexer.next_lexed_token().unwrap();
5387        assert_eq!(token.kind, TokenKind::Word);
5388
5389        let word = token.word().unwrap();
5390        let continuation = word.segments().nth(1).unwrap();
5391        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5392        assert_eq!(continuation.as_str(), "$(printf hi)");
5393        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5394    }
5395
5396    #[test]
5397    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5398        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5399        let mut lexer = Lexer::new(source);
5400
5401        let token = lexer.next_lexed_token().unwrap();
5402        assert_eq!(token.kind, TokenKind::QuotedWord);
5403
5404        let word = token.word().unwrap();
5405        let segment = word.single_segment().unwrap();
5406        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5407        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5408        assert_eq!(
5409            segment.span().unwrap().slice(source),
5410            "${arr[$RANDOM % ${#arr[@]}]}"
5411        );
5412    }
5413
5414    #[test]
5415    fn test_ansi_c_control_escape_can_consume_quote() {
5416        let mut lexer = Lexer::new("echo $'\\c''");
5417
5418        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5419        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5420        assert!(lexer.next_lexed_token().is_none());
5421    }
5422
5423    #[test]
5424    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5425        let source = r#"out_line="${out_line//'"'/'\"'}"
5426"#;
5427        let mut lexer = Lexer::new(source);
5428
5429        assert_next_token(
5430            &mut lexer,
5431            TokenKind::Word,
5432            Some(r#"out_line=${out_line//'"'/'"'}"#),
5433        );
5434        assert_next_token(&mut lexer, TokenKind::Newline, None);
5435        assert!(lexer.next_lexed_token().is_none());
5436    }
5437
5438    #[test]
5439    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5440        let source = r#"out_line="${out_line//'"'/'\"'}"
5441echo "Error: Missing python3!"
5442cat << 'EOF' > "${pywrapper}"
5443import os
5444EOF
5445"#;
5446        let mut lexer = Lexer::new(source);
5447
5448        assert_next_token(
5449            &mut lexer,
5450            TokenKind::Word,
5451            Some(r#"out_line=${out_line//'"'/'"'}"#),
5452        );
5453        assert_next_token(&mut lexer, TokenKind::Newline, None);
5454        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5455        assert_next_token(
5456            &mut lexer,
5457            TokenKind::QuotedWord,
5458            Some("Error: Missing python3!"),
5459        );
5460        assert_next_token(&mut lexer, TokenKind::Newline, None);
5461        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5462        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5463        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5464        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5465        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5466    }
5467
5468    #[test]
5469    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5470        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5471        let mut lexer = Lexer::new(source);
5472
5473        let token = lexer.next_lexed_token().unwrap();
5474        assert_eq!(token.kind, TokenKind::Word);
5475        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5476        assert!(token.source_slice(source).is_none());
5477        assert_eq!(
5478            token.word_string().as_deref(),
5479            Some("crypt=${crypt//\\/\\\\}")
5480        );
5481        assert_next_token(&mut lexer, TokenKind::Newline, None);
5482        assert!(lexer.next_lexed_token().is_none());
5483    }
5484
5485    #[test]
5486    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5487        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5488        let mut lexer = Lexer::new(source);
5489
5490        assert_next_token(
5491            &mut lexer,
5492            TokenKind::Word,
5493            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5494        );
5495        assert_next_token(&mut lexer, TokenKind::Newline, None);
5496        assert_next_token(&mut lexer, TokenKind::Newline, None);
5497        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5498        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5499        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5500        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5501        assert_next_token(&mut lexer, TokenKind::Newline, None);
5502        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5503        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5504        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5505        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5506        assert_next_token(&mut lexer, TokenKind::Newline, None);
5507        assert_next_token(
5508            &mut lexer,
5509            TokenKind::Word,
5510            Some(
5511                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5512            ),
5513        );
5514        assert_next_token(&mut lexer, TokenKind::Newline, None);
5515        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5516        assert_next_token(&mut lexer, TokenKind::Newline, None);
5517        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5518        assert_next_token(&mut lexer, TokenKind::Newline, None);
5519        assert!(lexer.next_lexed_token().is_none());
5520    }
5521
5522    #[test]
5523    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5524        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5525        let mut lexer = Lexer::new(source);
5526
5527        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5528        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5529        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5530        assert_next_token(&mut lexer, TokenKind::Newline, None);
5531        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5532        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5533        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5534        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5535        assert_next_token(&mut lexer, TokenKind::Newline, None);
5536        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5537        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5538        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5539        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5540        assert_next_token(&mut lexer, TokenKind::Newline, None);
5541        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5542        assert_next_token(&mut lexer, TokenKind::Newline, None);
5543        assert!(lexer.next_lexed_token().is_none());
5544    }
5545
5546    #[test]
5547    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5548        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5549        let mut lexer = Lexer::new(source);
5550
5551        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5552        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5553        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5554        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5555        assert_next_token(&mut lexer, TokenKind::And, None);
5556        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5557        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5558        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5559        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5560        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5561        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5562        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5563        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5564        assert_next_token(&mut lexer, TokenKind::Newline, None);
5565        assert!(lexer.next_lexed_token().is_none());
5566    }
5567
5568    #[test]
5569    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5570        let source = "echo -{$(echo a),b}-\n";
5571        let mut lexer = Lexer::new(source);
5572
5573        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5574        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5575        assert_next_token(&mut lexer, TokenKind::Newline, None);
5576        assert!(lexer.next_lexed_token().is_none());
5577    }
5578
5579    #[test]
5580    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5581        let source = "echo -{$((1 + 2)),b}-\n";
5582        let mut lexer = Lexer::new(source);
5583
5584        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5585        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5586        assert_next_token(&mut lexer, TokenKind::Newline, None);
5587        assert!(lexer.next_lexed_token().is_none());
5588    }
5589
5590    #[test]
5591    fn test_operators() {
5592        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5593
5594        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5595        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5596        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5597        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5598        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5599        assert_next_token(&mut lexer, TokenKind::And, None);
5600        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5601        assert_next_token(&mut lexer, TokenKind::Or, None);
5602        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5603        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5604        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5605        assert_next_token(&mut lexer, TokenKind::Background, None);
5606        assert!(lexer.next_lexed_token().is_none());
5607    }
5608
5609    #[test]
5610    fn test_double_left_bracket_requires_separator() {
5611        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5612
5613        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5614        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5615        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5616        assert_next_token(&mut lexer, TokenKind::Newline, None);
5617        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5618        assert_next_token(&mut lexer, TokenKind::Newline, None);
5619        assert!(lexer.next_lexed_token().is_none());
5620    }
5621
5622    #[test]
5623    fn test_redirects() {
5624        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5625
5626        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5627        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5628        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5629        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5630        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5631        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5632        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5633        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5634        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5635        let token = lexer.next_lexed_token().unwrap();
5636        assert_eq!(token.kind, TokenKind::Clobber);
5637        assert_eq!(token.fd_value(), Some(2));
5638        assert_eq!(token_text(&token, lexer.input), None);
5639        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5640        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5641        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5642        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5643        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5644        assert_next_token(&mut lexer, TokenKind::HereString, None);
5645        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5646        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5647        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5648        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5649        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5650    }
5651
5652    #[test]
5653    fn test_comment() {
5654        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5655
5656        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5657        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5658        assert_next_token(&mut lexer, TokenKind::Newline, None);
5659        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5660        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5661    }
5662
5663    #[test]
5664    fn test_comment_token_with_span() {
5665        let mut lexer = Lexer::new("# lead\necho hi # tail");
5666
5667        let comment = lexer.next_lexed_token_with_comments().unwrap();
5668        assert_eq!(comment.kind, TokenKind::Comment);
5669        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5670        assert_eq!(comment.span.start.line, 1);
5671        assert_eq!(comment.span.start.column, 1);
5672        assert_eq!(comment.span.end.line, 1);
5673        assert_eq!(comment.span.end.column, 7);
5674
5675        assert_next_token(&mut lexer, TokenKind::Newline, None);
5676        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5677        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5678
5679        let inline = lexer.next_lexed_token_with_comments().unwrap();
5680        assert_eq!(inline.kind, TokenKind::Comment);
5681        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5682        assert_eq!(inline.span.start.line, 2);
5683        assert_eq!(inline.span.start.column, 9);
5684    }
5685
5686    #[test]
5687    fn test_comment_token_preserves_hash_boundaries() {
5688        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5689
5690        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5691        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5692        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5693        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5694        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5695        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5696        assert!(lexer.next_lexed_token_with_comments().is_none());
5697    }
5698
5699    #[test]
5700    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5701        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5702
5703        let mut saw_comment = false;
5704        while let Some(token) = lexer.next_lexed_token_with_comments() {
5705            if token.kind == TokenKind::Comment {
5706                saw_comment = true;
5707                break;
5708            }
5709        }
5710
5711        assert!(
5712            !saw_comment,
5713            "zsh inline glob controls inside [[ ]] should not lex as comments"
5714        );
5715    }
5716
5717    #[test]
5718    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5719        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5720
5721        let mut saw_comment = false;
5722        while let Some(token) = lexer.next_lexed_token_with_comments() {
5723            if token.kind == TokenKind::Comment {
5724                saw_comment = true;
5725                break;
5726            }
5727        }
5728
5729        assert!(
5730            !saw_comment,
5731            "zsh arithmetic char literals inside (( )) should not lex as comments"
5732        );
5733    }
5734
5735    #[test]
5736    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5737        let mut lexer = Lexer::new(
5738            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5739        );
5740
5741        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5742        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5743        assert_next_token(
5744            &mut lexer,
5745            TokenKind::LiteralWord,
5746            Some("\\e]133;C;cmdline_url=%s\\a"),
5747        );
5748        assert_next_token(
5749            &mut lexer,
5750            TokenKind::QuotedWord,
5751            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5752        );
5753        assert_next_token(&mut lexer, TokenKind::Newline, None);
5754    }
5755
5756    #[test]
5757    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5758        let mut lexer = Lexer::new(
5759            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5760        );
5761
5762        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5763        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5764        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5765        assert_next_token(&mut lexer, TokenKind::Newline, None);
5766        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5767        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5768        assert_next_token(
5769            &mut lexer,
5770            TokenKind::LiteralWord,
5771            Some("\\e]133;C;cmdline_url=%s\\a"),
5772        );
5773        assert_next_token(
5774            &mut lexer,
5775            TokenKind::QuotedWord,
5776            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5777        );
5778        assert_next_token(&mut lexer, TokenKind::Newline, None);
5779        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5780        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5781        assert_next_token(&mut lexer, TokenKind::Newline, None);
5782    }
5783
5784    #[test]
5785    fn test_variable_words() {
5786        let mut lexer = Lexer::new("echo $HOME $USER");
5787
5788        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5789        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5790        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5791        assert!(lexer.next_lexed_token().is_none());
5792    }
5793
5794    #[test]
5795    fn test_pipeline_tokens() {
5796        let mut lexer = Lexer::new("echo hello | cat");
5797
5798        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5799        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5800        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5801        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5802        assert!(lexer.next_lexed_token().is_none());
5803    }
5804
5805    #[test]
5806    fn test_read_heredoc() {
5807        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5808        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5809        let content = lexer.read_heredoc("EOF", false);
5810        assert_eq!(content.content, "hello\nworld\n");
5811    }
5812
5813    #[test]
5814    fn test_read_heredoc_single_line() {
5815        let mut lexer = Lexer::new("\ntest\nEOF");
5816        let content = lexer.read_heredoc("EOF", false);
5817        assert_eq!(content.content, "test\n");
5818    }
5819
5820    #[test]
5821    fn test_read_heredoc_full_scenario() {
5822        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5823        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5824
5825        // Parser would read these tokens
5826        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5827        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5828        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5829
5830        // Now read heredoc content
5831        let content = lexer.read_heredoc("EOF", false);
5832        assert_eq!(content.content, "hello\nworld\n");
5833    }
5834
5835    #[test]
5836    fn test_read_heredoc_with_redirect() {
5837        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5838        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5839        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5840        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5841        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5842        let content = lexer.read_heredoc("EOF", false);
5843        assert_eq!(content.content, "hello\n");
5844        // The redirect tokens are now available from the lexer
5845        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5846        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5847    }
5848
5849    #[test]
5850    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5851        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
5852        let mut lexer = Lexer::new(source);
5853
5854        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5855        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5856        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5857
5858        let heredoc = lexer.read_heredoc("EOF", false);
5859        assert_eq!(heredoc.content, "hello\n");
5860
5861        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5862        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5863        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5864        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5865        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5866        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5867        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5868    }
5869
5870    #[test]
5871    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5872        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5873        let mut lexer = Lexer::new(source);
5874
5875        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5876        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5877        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5878
5879        let heredoc = lexer.read_heredoc("EOF", false);
5880        assert_eq!(heredoc.content, "1\n2\n3\n");
5881    }
5882
5883    #[test]
5884    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5885        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5886        let mut lexer = Lexer::new(source);
5887
5888        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5889        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5890        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5891
5892        let heredoc = lexer.read_heredoc("EOF", false);
5893        assert_eq!(heredoc.content, "body\n");
5894    }
5895
5896    #[test]
5897    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5898        let source = "cat <<EOF # note \\\nbody\nEOF\n";
5899        let mut lexer = Lexer::new(source);
5900
5901        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5902        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5903        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5904
5905        let heredoc = lexer.read_heredoc("EOF", false);
5906        assert_eq!(heredoc.content, "body\n");
5907    }
5908
5909    #[test]
5910    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5911        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5912        let mut lexer = Lexer::new(source);
5913
5914        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5915        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5916        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5917        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5918
5919        let heredoc = lexer.read_heredoc("EOF", false);
5920        assert_eq!(heredoc.content, "body\n");
5921
5922        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5923    }
5924
5925    #[test]
5926    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5927        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5928        let mut lexer = Lexer::new(source);
5929
5930        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5931        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5932        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5933
5934        let heredoc = lexer.read_heredoc("EOF", false);
5935        assert_eq!(heredoc.content, "1\n");
5936
5937        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5938        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5939    }
5940
5941    #[test]
5942    fn test_read_heredoc_with_redirect_preserves_following_spans() {
5943        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5944        let mut lexer = Lexer::new(source);
5945
5946        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5947        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5948        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5949
5950        let heredoc = lexer.read_heredoc("EOF", false);
5951        assert_eq!(heredoc.content, "hello\n");
5952
5953        let redirect = lexer.next_lexed_token_with_comments().unwrap();
5954        assert_eq!(redirect.kind, TokenKind::RedirectOut);
5955        assert_eq!(redirect.span.slice(source), ">");
5956
5957        let target = lexer.next_lexed_token_with_comments().unwrap();
5958        assert_eq!(target.kind, TokenKind::Word);
5959        assert_eq!(
5960            token_text(&target, lexer.input).as_deref(),
5961            Some("file.txt")
5962        );
5963        assert_eq!(target.span.slice(source), "file.txt");
5964
5965        let newline = lexer.next_lexed_token_with_comments().unwrap();
5966        assert_eq!(newline.kind, TokenKind::Newline);
5967        assert_eq!(newline.span.slice(source), "\n");
5968
5969        let comment = lexer.next_lexed_token_with_comments().unwrap();
5970        assert_eq!(comment.kind, TokenKind::Comment);
5971        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5972        assert_eq!(comment.span.slice(source), "# done");
5973    }
5974
5975    #[test]
5976    fn test_comment_with_unicode() {
5977        // Comment containing multi-byte UTF-8 characters
5978        let source = "# café résumé\necho ok";
5979        let mut lexer = Lexer::new(source);
5980
5981        let comment = lexer.next_lexed_token_with_comments().unwrap();
5982        assert_eq!(comment.kind, TokenKind::Comment);
5983        assert_eq!(
5984            token_text(&comment, lexer.input).as_deref(),
5985            Some(" café résumé")
5986        );
5987        // Span should cover exactly the comment bytes (including #)
5988        let start = comment.span.start.offset;
5989        let end = comment.span.end.offset;
5990        assert_eq!(start, 0);
5991        assert_eq!(&source[start..end], "# café résumé");
5992        assert!(source.is_char_boundary(start));
5993        assert!(source.is_char_boundary(end));
5994
5995        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5996        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5997    }
5998
5999    #[test]
6000    fn test_comment_with_cjk_characters() {
6001        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
6002        let source = "# 你好世界\necho ok";
6003        let mut lexer = Lexer::new(source);
6004
6005        let comment = lexer.next_lexed_token_with_comments().unwrap();
6006        assert_eq!(comment.kind, TokenKind::Comment);
6007        assert_eq!(
6008            token_text(&comment, lexer.input).as_deref(),
6009            Some(" 你好世界")
6010        );
6011        let start = comment.span.start.offset;
6012        let end = comment.span.end.offset;
6013        assert_eq!(&source[start..end], "# 你好世界");
6014        assert!(source.is_char_boundary(start));
6015        assert!(source.is_char_boundary(end));
6016    }
6017
6018    #[test]
6019    fn test_heredoc_with_comments_inside() {
6020        // Comments inside heredoc body should NOT appear as comment tokens
6021        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6022        let mut lexer = Lexer::new(source);
6023
6024        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6025        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6026        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6027
6028        let heredoc = lexer.read_heredoc("EOF", false);
6029        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6030
6031        // After heredoc, replayed line termination should appear before
6032        // tokens from following source lines.
6033        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6034        let comment = lexer.next_lexed_token_with_comments().unwrap();
6035        assert_eq!(comment.kind, TokenKind::Comment);
6036        assert_eq!(
6037            token_text(&comment, lexer.input).as_deref(),
6038            Some(" real comment")
6039        );
6040    }
6041
6042    #[test]
6043    fn test_heredoc_with_hash_in_variable() {
6044        // ${var#pattern} inside heredoc should not produce comment tokens
6045        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6046        let mut lexer = Lexer::new(source);
6047
6048        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6049        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6050        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6051
6052        let heredoc = lexer.read_heredoc("EOF", false);
6053        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6054    }
6055
6056    #[test]
6057    fn test_heredoc_span_does_not_leak() {
6058        // Heredoc content span must be within source bounds and must not
6059        // overlap with content before or after.
6060        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6061        let mut lexer = Lexer::new(source);
6062
6063        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6064        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6065        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6066
6067        let heredoc = lexer.read_heredoc("EOF", false);
6068        let start = heredoc.content_span.start.offset;
6069        let end = heredoc.content_span.end.offset;
6070        assert!(
6071            end <= source.len(),
6072            "heredoc span end ({end}) exceeds source length ({})",
6073            source.len()
6074        );
6075        assert_eq!(&source[start..end], "hello\nworld\n");
6076
6077        // Tokens after heredoc should still parse correctly
6078        assert_next_token(&mut lexer, TokenKind::Newline, None);
6079        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6080        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6081    }
6082
6083    #[test]
6084    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6085        let source = "\
6086cat <<\\_ACEOF
6087Use these variables to override the choices made by `configure' or to help
6088it to find libraries and programs with nonstandard names/locations.
6089_ACEOF
6090ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6091ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6092";
6093        let mut lexer = Lexer::new(source);
6094
6095        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6096        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6097        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6098        assert_eq!(delimiter.kind, TokenKind::Word);
6099        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6100
6101        let heredoc = lexer.read_heredoc("_ACEOF", false);
6102        assert_eq!(
6103            heredoc.content,
6104            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6105        );
6106
6107        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6108
6109        let first = lexer.next_lexed_token_with_comments().unwrap();
6110        assert_eq!(first.kind, TokenKind::Word);
6111        assert_eq!(
6112            first.span.slice(source),
6113            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6114        );
6115        let first_segments = first
6116            .word()
6117            .unwrap()
6118            .segments()
6119            .map(|segment| {
6120                (
6121                    segment.kind(),
6122                    segment.as_str().to_string(),
6123                    segment.span().map(|span| span.slice(source).to_string()),
6124                )
6125            })
6126            .collect::<Vec<_>>();
6127        assert_eq!(
6128            first_segments,
6129            vec![
6130                (
6131                    LexedWordSegmentKind::Plain,
6132                    "ac_dir_suffix=/".to_string(),
6133                    Some("ac_dir_suffix=/".to_string()),
6134                ),
6135                (
6136                    LexedWordSegmentKind::Plain,
6137                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6138                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6139                ),
6140            ]
6141        );
6142
6143        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6144
6145        let second = lexer.next_lexed_token_with_comments().unwrap();
6146        assert_eq!(second.kind, TokenKind::Word);
6147        assert_eq!(
6148            second.span.slice(source),
6149            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6150        );
6151        let second_segments = second
6152            .word()
6153            .unwrap()
6154            .segments()
6155            .map(|segment| {
6156                (
6157                    segment.kind(),
6158                    segment.as_str().to_string(),
6159                    segment.span().map(|span| span.slice(source).to_string()),
6160                )
6161            })
6162            .collect::<Vec<_>>();
6163        assert_eq!(
6164            second_segments,
6165            vec![
6166                (
6167                    LexedWordSegmentKind::Plain,
6168                    "ac_top_builddir_sub=".to_string(),
6169                    Some("ac_top_builddir_sub=".to_string()),
6170                ),
6171                (
6172                    LexedWordSegmentKind::Plain,
6173                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6174                    Some(
6175                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6176                            .to_string(),
6177                    ),
6178                ),
6179            ]
6180        );
6181    }
6182
6183    #[test]
6184    fn test_heredoc_with_unicode_content() {
6185        // Heredoc containing multi-byte characters; spans must be on char boundaries
6186        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6187        let mut lexer = Lexer::new(source);
6188
6189        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6190        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6191        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6192
6193        let heredoc = lexer.read_heredoc("EOF", false);
6194        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6195        let start = heredoc.content_span.start.offset;
6196        let end = heredoc.content_span.end.offset;
6197        assert!(
6198            source.is_char_boundary(start),
6199            "heredoc span start ({start}) not on char boundary"
6200        );
6201        assert!(
6202            source.is_char_boundary(end),
6203            "heredoc span end ({end}) not on char boundary"
6204        );
6205        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6206    }
6207
6208    #[test]
6209    fn test_assoc_compound_assignment() {
6210        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6211        // assignment as a single Word token
6212        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6213        assert_next_token(
6214            &mut lexer,
6215            TokenKind::Word,
6216            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6217        );
6218        assert!(lexer.next_lexed_token().is_none());
6219    }
6220
6221    #[test]
6222    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6223        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6224        let mut lexer = Lexer::new(source);
6225
6226        let token = lexer.next_lexed_token().unwrap();
6227        assert_eq!(token.kind, TokenKind::Word);
6228        assert_eq!(token.span.slice(source), source);
6229        assert!(lexer.next_lexed_token().is_none());
6230    }
6231
6232    #[test]
6233    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6234        let source = r#"foo\_bar@(baz|qux)"#;
6235        let mut lexer = Lexer::new(source);
6236
6237        let token = lexer.next_lexed_token().unwrap();
6238        assert_eq!(token.kind, TokenKind::Word);
6239        assert_eq!(token.span.slice(source), source);
6240        assert!(lexer.next_lexed_token().is_none());
6241    }
6242
6243    #[test]
6244    fn test_indexed_array_not_collapsed() {
6245        // arr=("hello world") should NOT be collapsed — parser handles
6246        // quoted elements token-by-token via the LeftParen path
6247        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6248        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6249        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6250    }
6251
6252    #[test]
6253    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6254        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6255        let mut lexer = Lexer::new(source);
6256
6257        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6258        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6259
6260        let token = lexer.next_lexed_token().unwrap();
6261        assert_eq!(token.kind, TokenKind::Word);
6262        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6263
6264        let word = token.word().unwrap();
6265        let segments: Vec<_> = word
6266            .segments()
6267            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6268            .collect();
6269        assert_eq!(
6270            segments,
6271            vec![
6272                (
6273                    LexedWordSegmentKind::DoubleQuoted,
6274                    "$plugin_dir".to_string()
6275                ),
6276                (LexedWordSegmentKind::Plain, "/*".to_string()),
6277                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6278            ]
6279        );
6280
6281        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6282        assert!(lexer.next_lexed_token().is_none());
6283    }
6284
6285    #[test]
6286    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6287        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6288        let mut lexer = Lexer::new(source);
6289
6290        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6291        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6292
6293        let token = lexer.next_lexed_token().unwrap();
6294        assert_eq!(token.kind, TokenKind::Word);
6295        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6296
6297        let word = token.word().unwrap();
6298        let segments: Vec<_> = word
6299            .segments()
6300            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6301            .collect();
6302        assert_eq!(
6303            segments,
6304            vec![
6305                (
6306                    LexedWordSegmentKind::DoubleQuoted,
6307                    "$__GREP_CACHE_FILE".to_string()
6308                ),
6309                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6310            ]
6311        );
6312
6313        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6314        assert!(lexer.next_lexed_token().is_none());
6315    }
6316
6317    #[test]
6318    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6319        let source = r#"$dir/${~pats}(N)"#;
6320        let mut lexer = Lexer::new(source);
6321
6322        let token = lexer.next_lexed_token().unwrap();
6323        assert_eq!(token.kind, TokenKind::Word);
6324        assert_eq!(token.span.slice(source), source);
6325        assert!(lexer.next_lexed_token().is_none());
6326    }
6327
6328    #[test]
6329    fn test_dollar_word_does_not_absorb_function_parens() {
6330        let mut lexer = Lexer::new(r#"foo$x()"#);
6331
6332        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6333        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6334        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6335        assert!(lexer.next_lexed_token().is_none());
6336    }
6337
6338    #[test]
6339    fn test_command_substitution_word_does_not_absorb_function_parens() {
6340        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6341
6342        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6343        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6344        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6345        assert!(lexer.next_lexed_token().is_none());
6346    }
6347
6348    /// Regression test for fuzz crash: single digit at EOF should not panic
6349    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6350    #[test]
6351    fn test_digit_at_eof_no_panic() {
6352        // A lone digit with no following redirect operator must not panic
6353        let mut lexer = Lexer::new("2");
6354        let token = lexer.next_lexed_token();
6355        assert!(token.is_some());
6356    }
6357
6358    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6359    #[test]
6360    fn test_nested_brace_expansion_single_token() {
6361        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6362        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6363        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6364        // No more tokens — everything was consumed
6365        assert!(lexer.next_lexed_token().is_none());
6366    }
6367
6368    /// Simple ${var} still works after brace depth change.
6369    #[test]
6370    fn test_simple_brace_expansion_unchanged() {
6371        let mut lexer = Lexer::new("${foo}");
6372        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6373        assert!(lexer.next_lexed_token().is_none());
6374    }
6375
6376    #[test]
6377    fn test_nvm_fixture_lexes_without_stalling() {
6378        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6379        let mut lexer = Lexer::new(input);
6380        let mut tokens = 0usize;
6381
6382        while lexer.next_lexed_token().is_some() {
6383            tokens += 1;
6384            assert!(
6385                tokens < 100_000,
6386                "lexer should continue making progress on the nvm fixture"
6387            );
6388        }
6389
6390        assert!(tokens > 0, "nvm fixture should produce at least one token");
6391    }
6392
6393    #[test]
6394    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6395        let input = concat!(
6396            "case \"${_input_type:-}\" in\n",
6397            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6398            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6399            "esac\n",
6400        );
6401
6402        assert_non_newline_tokens_stay_on_one_line(input);
6403
6404        let mut lexer = Lexer::new(input);
6405        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6406            .map(|token| (token.kind, token_text(&token, input)))
6407            .collect::<Vec<_>>();
6408        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6409        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6410    }
6411
6412    #[test]
6413    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6414        let input = concat!(
6415            "case $2 in\n",
6416            "  cygwin*) bin='cygwin32/bin' ;|\n",
6417            "esac\n",
6418        );
6419
6420        let mut lexer = Lexer::new(input);
6421        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6422            .map(|token| (token.kind, token_text(&token, input)))
6423            .collect::<Vec<_>>();
6424
6425        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6426        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6427        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6428    }
6429
6430    #[test]
6431    fn test_inline_if_with_array_append_stays_line_local() {
6432        let input = concat!(
6433            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6434            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6435        );
6436
6437        assert_non_newline_tokens_stay_on_one_line(input);
6438    }
6439
6440    #[test]
6441    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6442        let source = "unsetopt interactive_comments\n#literal\n";
6443        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6444        let mut lexer = Lexer::with_profile(source, &profile);
6445
6446        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6447        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6448        assert_next_token(&mut lexer, TokenKind::Newline, None);
6449        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6450    }
6451
6452    #[test]
6453    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6454        let source = "setopt rc_quotes\nprint 'a''b'\n";
6455        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6456        let mut lexer = Lexer::with_profile(source, &profile);
6457
6458        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6459        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6460        assert_next_token(&mut lexer, TokenKind::Newline, None);
6461        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6462        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6463    }
6464
6465    #[test]
6466    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6467        let source = "setopt ignore_braces\n{ echo }\n";
6468        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6469        let mut lexer = Lexer::with_profile(source, &profile);
6470
6471        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6472        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6473        assert_next_token(&mut lexer, TokenKind::Newline, None);
6474        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6475        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6476        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6477    }
6478
6479    #[test]
6480    fn test_heredoc_in_arithmetic_fuzz_crash() {
6481        // Regression test: the fuzzer found that heredoc re-injection inside
6482        // arithmetic context can push self.offset past self.input.len(),
6483        // causing a panic in read_unquoted_segment's borrowed-slice path.
6484        let data: &[u8] = &[
6485            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6486            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6487            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6488            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6489            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6490            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6491            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6492            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6493            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6494            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6495            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6496            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6497            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6498            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6499            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6500            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6501            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6502            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6503            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6504        ];
6505        let input = std::str::from_utf8(data).unwrap();
6506        let script = format!("echo $(({input}))\n");
6507        // Must not panic.
6508        let _ = crate::parser::Parser::new(&script).parse();
6509    }
6510}
shuck_parser/parser/lexer.rs

shuck_parser/parser/
lexer.rs