Skip to main content

shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub(crate) fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub(crate) const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub(crate) fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub(crate) fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub(crate) fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub(crate) const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333///
334/// Public consumers can inspect the token kind and source span. Word payloads,
335/// descriptor payloads, and lexer recovery details are currently parser-internal
336/// so the lexer can evolve without expanding the public API.
337#[derive(Debug, Clone, PartialEq, Eq)]
338pub struct LexedToken<'a> {
339    /// Token kind used by the parser.
340    pub kind: TokenKind,
341    /// Source span covered by the token.
342    pub span: Span,
343    pub(crate) flags: TokenFlags,
344    payload: TokenPayload<'a>,
345}
346
347impl<'a> LexedToken<'a> {
348    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
349        match kind {
350            TokenKind::Word => LexedWordSegmentKind::Plain,
351            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
352            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
353            _ => LexedWordSegmentKind::Composite,
354        }
355    }
356
357    pub(crate) fn punctuation(kind: TokenKind) -> Self {
358        Self {
359            kind,
360            span: Span::new(),
361            flags: TokenFlags::empty(),
362            payload: TokenPayload::None,
363        }
364    }
365
366    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
367        let flags = if word.has_cooked_text() {
368            TokenFlags::cooked_text()
369        } else {
370            TokenFlags::empty()
371        };
372
373        Self {
374            kind,
375            span: Span::new(),
376            flags,
377            payload: TokenPayload::Word(word),
378        }
379    }
380
381    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
382        Self::with_word_payload(
383            kind,
384            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
385        )
386    }
387
388    fn owned_word(kind: TokenKind, text: String) -> Self {
389        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
390    }
391
392    fn comment() -> Self {
393        Self {
394            kind: TokenKind::Comment,
395            span: Span::new(),
396            flags: TokenFlags::empty(),
397            payload: TokenPayload::None,
398        }
399    }
400
401    fn fd(kind: TokenKind, fd: i32) -> Self {
402        Self {
403            kind,
404            span: Span::new(),
405            flags: TokenFlags::empty(),
406            payload: TokenPayload::Fd(fd),
407        }
408    }
409
410    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
411        Self {
412            kind,
413            span: Span::new(),
414            flags: TokenFlags::empty(),
415            payload: TokenPayload::FdPair(src_fd, dst_fd),
416        }
417    }
418
419    fn error(kind: LexerErrorKind) -> Self {
420        Self {
421            kind: TokenKind::Error,
422            span: Span::new(),
423            flags: TokenFlags::empty(),
424            payload: TokenPayload::Error(kind),
425        }
426    }
427
428    pub(crate) fn with_span(mut self, span: Span) -> Self {
429        self.span = span;
430        self
431    }
432
433    pub(crate) fn rebased(mut self, base: Position) -> Self {
434        self.span = self.span.rebased(base);
435        self.payload = match self.payload {
436            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
437            payload => payload,
438        };
439        self
440    }
441
442    pub(crate) fn with_synthetic_flag(mut self) -> Self {
443        self.flags = self.flags.with_synthetic();
444        self
445    }
446
447    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
448        let payload = match self.payload {
449            TokenPayload::None => TokenPayload::None,
450            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
451            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
452            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
453            TokenPayload::Error(kind) => TokenPayload::Error(kind),
454        };
455
456        LexedToken {
457            kind: self.kind,
458            span: self.span,
459            flags: self.flags,
460            payload,
461        }
462    }
463
464    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
465        let payload = match self.payload {
466            TokenPayload::None => TokenPayload::None,
467            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
468            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
469            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
470            TokenPayload::Error(kind) => TokenPayload::Error(kind),
471        };
472
473        LexedToken {
474            kind: self.kind,
475            span: self.span,
476            flags: self.flags,
477            payload,
478        }
479    }
480
481    /// Borrow the token text when it is a single-segment word token.
482    pub(crate) fn word_text(&self) -> Option<&str> {
483        self.kind
484            .is_word_like()
485            .then_some(())
486            .and_then(|_| match &self.payload {
487                TokenPayload::Word(word) => word.text(),
488                _ => None,
489            })
490    }
491
492    /// Return an owned string containing the token's word text.
493    pub(crate) fn word_string(&self) -> Option<String> {
494        self.kind
495            .is_word_like()
496            .then_some(())
497            .and_then(|_| match &self.payload {
498                TokenPayload::Word(word) => Some(word.joined_text()),
499                _ => None,
500            })
501    }
502
503    /// Borrow the structured word payload for word-like tokens.
504    pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
505        match &self.payload {
506            TokenPayload::Word(word) => Some(word),
507            _ => None,
508        }
509    }
510
511    /// Borrow the original source slice when the token is source-backed and uncooked.
512    pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
513        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
514            return None;
515        }
516
517        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
518            .then(|| &source[self.span.start.offset..self.span.end.offset])
519    }
520
521    /// Return the file-descriptor payload for redirection tokens that carry one.
522    pub(crate) fn fd_value(&self) -> Option<i32> {
523        match self.payload {
524            TokenPayload::Fd(fd) => Some(fd),
525            _ => None,
526        }
527    }
528
529    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
530    pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
531        match self.payload {
532            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
533            _ => None,
534        }
535    }
536
537    /// Return the lexer error payload when this token represents `TokenKind::Error`.
538    pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
539        match self.payload {
540            TokenPayload::Error(kind) => Some(kind),
541            _ => None,
542        }
543    }
544}
545
546/// Result of reading a heredoc body from the source.
547#[derive(Debug, Clone, PartialEq)]
548pub(crate) struct HeredocRead {
549    /// Decoded heredoc content.
550    pub content: String,
551    /// Source span covering the heredoc body content.
552    pub content_span: Span,
553}
554
555/// Maximum nesting depth for command substitution in the lexer.
556/// Prevents stack overflow from deeply nested $() patterns.
557const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
558const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
559
560#[derive(Clone, Debug)]
561struct Cursor<'a> {
562    rest: &'a str,
563}
564
565impl<'a> Cursor<'a> {
566    fn new(source: &'a str) -> Self {
567        Self { rest: source }
568    }
569
570    fn first(&self) -> Option<char> {
571        self.rest.chars().next()
572    }
573
574    fn second(&self) -> Option<char> {
575        let mut chars = self.rest.chars();
576        chars.next()?;
577        chars.next()
578    }
579
580    fn third(&self) -> Option<char> {
581        let mut chars = self.rest.chars();
582        chars.next()?;
583        chars.next()?;
584        chars.next()
585    }
586
587    fn bump(&mut self) -> Option<char> {
588        let ch = self.first()?;
589        self.rest = &self.rest[ch.len_utf8()..];
590        Some(ch)
591    }
592
593    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
594        let start = self.rest;
595        let mut end = 0;
596
597        for ch in start.chars() {
598            if !predicate(ch) {
599                break;
600            }
601            end += ch.len_utf8();
602        }
603
604        self.rest = &start[end..];
605        &start[..end]
606    }
607
608    fn rest(&self) -> &'a str {
609        self.rest
610    }
611
612    fn skip_bytes(&mut self, count: usize) {
613        self.rest = &self.rest[count..];
614    }
615
616    fn find_byte(&self, byte: u8) -> Option<usize> {
617        memchr(byte, self.rest.as_bytes())
618    }
619}
620
621#[derive(Clone, Debug)]
622struct PositionMap<'a> {
623    source: &'a str,
624    line_starts: Arc<[usize]>,
625    cached: Position,
626}
627
628#[cfg(feature = "benchmarking")]
629#[derive(Clone, Copy, Debug, Default)]
630pub(crate) struct LexerBenchmarkCounters {
631    pub(crate) current_position_calls: u64,
632}
633
634impl<'a> PositionMap<'a> {
635    fn new(source: &'a str) -> Self {
636        let mut line_starts =
637            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
638        line_starts.push(0);
639        line_starts.extend(
640            source
641                .bytes()
642                .enumerate()
643                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
644        );
645
646        Self {
647            source,
648            line_starts: line_starts.into(),
649            cached: Position::new(),
650        }
651    }
652
653    fn position(&mut self, offset: usize) -> Position {
654        if offset == self.cached.offset {
655            return self.cached;
656        }
657
658        let position = if offset > self.cached.offset && offset <= self.source.len() {
659            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
660        } else {
661            self.position_uncached(offset)
662        };
663        self.cached = position;
664        position
665    }
666
667    fn position_uncached(&self, offset: usize) -> Position {
668        let offset = offset.min(self.source.len());
669        let line_index = self
670            .line_starts
671            .partition_point(|start| *start <= offset)
672            .saturating_sub(1);
673        let line_start = self.line_starts[line_index];
674        let line_text = &self.source[line_start..offset];
675        let column = if line_text.is_ascii() {
676            line_text.len() + 1
677        } else {
678            line_text.chars().count() + 1
679        };
680
681        Position {
682            line: line_index + 1,
683            column,
684            offset,
685        }
686    }
687
688    fn advance_from(mut position: Position, text: &str) -> Position {
689        position.offset += text.len();
690        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
691        if newline_count == 0 {
692            position.column += if text.is_ascii() {
693                text.len()
694            } else {
695                text.chars().count()
696            };
697            return position;
698        }
699
700        position.line += newline_count;
701        let tail_start = memrchr(b'\n', text.as_bytes())
702            .map(|index| index + 1)
703            .unwrap_or_default();
704        let tail = &text[tail_start..];
705        position.column = if tail.is_ascii() {
706            tail.len() + 1
707        } else {
708            tail.chars().count() + 1
709        };
710        position
711    }
712}
713
714/// Source-backed lexer for shell scripts.
715///
716/// The public lexer surface is intended for lower-level tooling and
717/// benchmarks. It tokenizes using the default bash profile; use the parser
718/// constructors when dialect or zsh option state matters.
719#[derive(Clone)]
720pub struct Lexer<'a> {
721    input: &'a str,
722    /// Current byte offset in the input/reinjected stream.
723    offset: usize,
724    cursor: Cursor<'a>,
725    position_map: PositionMap<'a>,
726    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
727    /// Consumed before `cursor`.
728    reinject_buf: VecDeque<char>,
729    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
730    reinject_resume_offset: Option<usize>,
731    /// Maximum allowed nesting depth for command substitution
732    max_subst_depth: usize,
733    initial_zsh_options: Option<ZshOptionState>,
734    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
735    zsh_timeline_index: usize,
736    #[cfg(feature = "benchmarking")]
737    benchmark_counters: Option<LexerBenchmarkCounters>,
738}
739
740impl<'a> Lexer<'a> {
741    /// Create a new bash-profile lexer for the given input.
742    pub fn new(input: &'a str) -> Self {
743        Self::with_max_subst_depth_and_profile(
744            input,
745            DEFAULT_MAX_SUBST_DEPTH,
746            &ShellProfile::native(super::ShellDialect::Bash),
747            None,
748        )
749    }
750
751    /// Create a new lexer with a custom max substitution nesting depth.
752    /// Limits recursion in read_command_subst_into().
753    pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
754        Self::with_max_subst_depth_and_profile(
755            input,
756            max_depth,
757            &ShellProfile::native(super::ShellDialect::Bash),
758            None,
759        )
760    }
761
762    /// Create a new lexer using the provided shell profile.
763    #[cfg(test)]
764    fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
765        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
766            .then(|| ZshOptionTimeline::build(input, shell_profile))
767            .flatten()
768            .map(Arc::new);
769        Self::with_max_subst_depth_and_profile(
770            input,
771            DEFAULT_MAX_SUBST_DEPTH,
772            shell_profile,
773            zsh_timeline,
774        )
775    }
776
777    pub(crate) fn with_max_subst_depth_and_profile(
778        input: &'a str,
779        max_depth: usize,
780        shell_profile: &ShellProfile,
781        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
782    ) -> Self {
783        Self {
784            input,
785            offset: 0,
786            cursor: Cursor::new(input),
787            position_map: PositionMap::new(input),
788            reinject_buf: VecDeque::new(),
789            reinject_resume_offset: None,
790            max_subst_depth: max_depth,
791            initial_zsh_options: shell_profile.zsh_options().cloned(),
792            zsh_timeline,
793            zsh_timeline_index: 0,
794            #[cfg(feature = "benchmarking")]
795            benchmark_counters: None,
796        }
797    }
798
799    pub(super) fn position_at_offset(&self, offset: usize) -> Position {
800        self.position_map.position_uncached(offset)
801    }
802
803    fn current_position(&mut self) -> Position {
804        #[cfg(feature = "benchmarking")]
805        self.maybe_record_current_position_call();
806        self.position_map.position(self.offset)
807    }
808
809    #[cfg(feature = "benchmarking")]
810    pub(crate) fn enable_benchmark_counters(&mut self) {
811        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
812    }
813
814    #[cfg(feature = "benchmarking")]
815    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
816        self.benchmark_counters.unwrap_or_default()
817    }
818
819    #[cfg(feature = "benchmarking")]
820    fn maybe_record_current_position_call(&mut self) {
821        if let Some(counters) = &mut self.benchmark_counters {
822            counters.current_position_calls += 1;
823        }
824    }
825
826    fn sync_offset_to_cursor(&mut self) {
827        if self.reinject_buf.is_empty()
828            && let Some(offset) = self.reinject_resume_offset.take()
829        {
830            self.offset = offset;
831        }
832    }
833
834    /// Get the next token kind from the input.
835    ///
836    /// This skips whitespace and line comments, matching
837    /// [`Lexer::next_lexed_token`]. It is useful for callers that only need the
838    /// token stream shape.
839    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
840        self.next_lexed_token().map(|token| token.kind)
841    }
842
843    fn peek_char(&mut self) -> Option<char> {
844        self.sync_offset_to_cursor();
845        if let Some(&ch) = self.reinject_buf.front() {
846            Some(ch)
847        } else {
848            self.cursor.first()
849        }
850    }
851
852    fn advance(&mut self) -> Option<char> {
853        self.sync_offset_to_cursor();
854        let ch = if !self.reinject_buf.is_empty() {
855            self.reinject_buf.pop_front()
856        } else {
857            self.cursor.bump()
858        };
859        if let Some(c) = ch {
860            self.offset += c.len_utf8();
861        }
862        ch
863    }
864
865    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
866        self.reinject_buf
867            .iter()
868            .copied()
869            .chain(self.cursor.rest().chars())
870    }
871
872    fn second_char(&self) -> Option<char> {
873        match self.reinject_buf.len() {
874            0 => self.cursor.second(),
875            1 => self.cursor.first(),
876            _ => self.reinject_buf.get(1).copied(),
877        }
878    }
879
880    fn third_char(&self) -> Option<char> {
881        match self.reinject_buf.len() {
882            0 => self.cursor.third(),
883            1 => self.cursor.second(),
884            2 => self.cursor.first(),
885            _ => self.reinject_buf.get(2).copied(),
886        }
887    }
888
889    fn fourth_char(&self) -> Option<char> {
890        match self.reinject_buf.len() {
891            0 => self.cursor.rest().chars().nth(3),
892            1 => self.cursor.third(),
893            2 => self.cursor.second(),
894            3 => self.cursor.first(),
895            _ => self.reinject_buf.get(3).copied(),
896        }
897    }
898
899    fn consume_source_bytes(&mut self, byte_len: usize) {
900        debug_assert!(self.reinject_buf.is_empty());
901        self.sync_offset_to_cursor();
902        self.offset += byte_len;
903        self.cursor.skip_bytes(byte_len);
904    }
905
906    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
907        debug_assert!(self.reinject_buf.is_empty());
908        self.offset += byte_len;
909    }
910
911    fn consume_ascii_chars(&mut self, count: usize) {
912        if self.reinject_buf.is_empty() {
913            self.consume_source_bytes(count);
914            return;
915        }
916
917        for _ in 0..count {
918            self.advance();
919        }
920    }
921
922    fn source_horizontal_whitespace_len(&self) -> usize {
923        self.cursor
924            .rest()
925            .as_bytes()
926            .iter()
927            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
928            .count()
929    }
930
931    fn source_ascii_plain_word_len(&self) -> usize {
932        self.cursor
933            .rest()
934            .as_bytes()
935            .iter()
936            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
937            .count()
938    }
939
940    fn find_double_quote_special(source: &str) -> Option<usize> {
941        source
942            .as_bytes()
943            .iter()
944            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
945    }
946
947    fn ensure_capture_from_source(
948        &self,
949        capture: &mut Option<String>,
950        start: Position,
951        end: Position,
952    ) {
953        if capture.is_none() {
954            *capture = Some(self.input[start.offset..end.offset].to_string());
955        }
956    }
957
958    fn push_capture_char(capture: &mut Option<String>, ch: char) {
959        if let Some(text) = capture.as_mut() {
960            text.push(ch);
961        }
962    }
963
964    fn push_capture_str(capture: &mut Option<String>, text: &str) {
965        if let Some(current) = capture.as_mut() {
966            current.push_str(text);
967        }
968    }
969
970    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
971        if let Some(timeline) = self.zsh_timeline.as_ref() {
972            while self.zsh_timeline_index < timeline.entries.len()
973                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
974            {
975                self.zsh_timeline_index += 1;
976            }
977            return if self.zsh_timeline_index == 0 {
978                self.initial_zsh_options.as_ref()
979            } else {
980                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
981            };
982        }
983
984        self.initial_zsh_options.as_ref()
985    }
986
987    fn comments_enabled(&mut self) -> bool {
988        !self
989            .current_zsh_options()
990            .is_some_and(|options| options.interactive_comments.is_definitely_off())
991    }
992
993    fn rc_quotes_enabled(&mut self) -> bool {
994        self.current_zsh_options()
995            .is_some_and(|options| options.rc_quotes.is_definitely_on())
996    }
997
998    fn ignore_braces_enabled(&mut self) -> bool {
999        self.current_zsh_options()
1000            .is_some_and(|options| options.ignore_braces.is_definitely_on())
1001    }
1002
1003    fn ignore_close_braces_enabled(&mut self) -> bool {
1004        self.current_zsh_options().is_some_and(|options| {
1005            options.ignore_braces.is_definitely_on()
1006                || options.ignore_close_braces.is_definitely_on()
1007        })
1008    }
1009
1010    fn brace_ccl_enabled(&mut self) -> bool {
1011        self.current_zsh_options()
1012            .is_some_and(|options| options.brace_ccl.is_definitely_on())
1013    }
1014
1015    fn should_treat_hash_as_word_char(&mut self) -> bool {
1016        if !self.comments_enabled() {
1017            return true;
1018        }
1019        self.reinject_buf.is_empty()
1020            && (self
1021                .input
1022                .get(..self.offset)
1023                .and_then(|prefix| prefix.chars().next_back())
1024                .is_some_and(|prev| {
1025                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1026                })
1027                || self.is_inside_unclosed_double_paren_on_line())
1028    }
1029
1030    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1031        capture
1032            .as_deref()
1033            .unwrap_or(&self.input[start.offset..self.offset])
1034    }
1035
1036    fn current_word_surface_is_single_char(
1037        &self,
1038        start: Position,
1039        capture: &Option<String>,
1040        target: char,
1041    ) -> bool {
1042        let text = self.current_word_text(start, capture);
1043        if !text.contains('\x00') {
1044            let mut encoded = [0; 4];
1045            return text == target.encode_utf8(&mut encoded);
1046        }
1047
1048        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1049        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1050    }
1051
1052    fn current_word_surface_last_char<'b>(
1053        &'b self,
1054        start: Position,
1055        capture: &'b Option<String>,
1056    ) -> Option<char> {
1057        self.current_word_text(start, capture)
1058            .chars()
1059            .rev()
1060            .find(|&ch| ch != '\x00')
1061    }
1062
1063    fn current_word_surface_ends_with_char(
1064        &self,
1065        start: Position,
1066        capture: &Option<String>,
1067        target: char,
1068    ) -> bool {
1069        self.current_word_surface_last_char(start, capture) == Some(target)
1070    }
1071
1072    fn current_word_surface_ends_with_extglob_prefix(
1073        &self,
1074        start: Position,
1075        capture: &Option<String>,
1076    ) -> bool {
1077        self.current_word_surface_last_char(start, capture)
1078            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1079    }
1080
1081    fn current_word_surface_can_take_zsh_glob_modifier_suffix(
1082        &mut self,
1083        start: Position,
1084        capture: &Option<String>,
1085    ) -> bool {
1086        if self.current_zsh_options().is_none() || self.peek_char() != Some('(') {
1087            return false;
1088        }
1089
1090        let text = self.current_word_text(start, capture);
1091        if !text.contains('/') {
1092            return false;
1093        }
1094
1095        let mut chars = self.lookahead_chars();
1096        matches!((chars.next(), chars.next()), (Some('('), Some(':')))
1097    }
1098
1099    /// Get the next source-backed token from the input, skipping line comments.
1100    ///
1101    /// Returned tokens expose their [`TokenKind`] and source [`Span`]. Comments
1102    /// are omitted from this public stream; the parser uses an internal variant
1103    /// when it needs to preserve them for AST attachment.
1104    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1105        self.skip_whitespace();
1106        let start = self.current_position();
1107        let token = self.next_lexed_token_inner(false)?;
1108        let end = self.current_position();
1109        Some(token.with_span(Span::from_positions(start, end)))
1110    }
1111
1112    /// Get the next source-backed token from the input, preserving line comments.
1113    pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1114        self.skip_whitespace();
1115        let start = self.current_position();
1116        let token = self.next_lexed_token_inner(true)?;
1117        let end = self.current_position();
1118        Some(token.with_span(Span::from_positions(start, end)))
1119    }
1120
1121    /// Internal: get next token without recording position (called after whitespace skip)
1122    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1123        let ch = self.peek_char()?;
1124
1125        match ch {
1126            '\n' => {
1127                self.consume_ascii_chars(1);
1128                Some(LexedToken::punctuation(TokenKind::Newline))
1129            }
1130            ';' => {
1131                if self.second_char() == Some(';') {
1132                    if self.third_char() == Some('&') {
1133                        self.consume_ascii_chars(3);
1134                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1135                    } else {
1136                        self.consume_ascii_chars(2);
1137                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1138                    }
1139                } else if self.second_char() == Some('|') {
1140                    self.consume_ascii_chars(2);
1141                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1142                } else if self.second_char() == Some('&') {
1143                    self.consume_ascii_chars(2);
1144                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1145                } else {
1146                    self.consume_ascii_chars(1);
1147                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1148                }
1149            }
1150            '|' => {
1151                if self.second_char() == Some('|') {
1152                    self.consume_ascii_chars(2);
1153                    Some(LexedToken::punctuation(TokenKind::Or))
1154                } else if self.second_char() == Some('&') {
1155                    self.consume_ascii_chars(2);
1156                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1157                } else {
1158                    self.consume_ascii_chars(1);
1159                    Some(LexedToken::punctuation(TokenKind::Pipe))
1160                }
1161            }
1162            '&' => {
1163                if self.second_char() == Some('&') {
1164                    self.consume_ascii_chars(2);
1165                    Some(LexedToken::punctuation(TokenKind::And))
1166                } else if self.second_char() == Some('>') {
1167                    if self.third_char() == Some('>') {
1168                        self.consume_ascii_chars(3);
1169                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1170                    } else {
1171                        self.consume_ascii_chars(2);
1172                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1173                    }
1174                } else if self.second_char() == Some('|') {
1175                    self.consume_ascii_chars(2);
1176                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1177                } else if self.second_char() == Some('!') {
1178                    self.consume_ascii_chars(2);
1179                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1180                } else {
1181                    self.consume_ascii_chars(1);
1182                    Some(LexedToken::punctuation(TokenKind::Background))
1183                }
1184            }
1185            '>' => {
1186                if self.second_char() == Some('>') {
1187                    if self.third_char() == Some('|') {
1188                        self.consume_ascii_chars(3);
1189                    } else {
1190                        self.consume_ascii_chars(2);
1191                    }
1192                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1193                } else if self.second_char() == Some('|') {
1194                    self.consume_ascii_chars(2);
1195                    Some(LexedToken::punctuation(TokenKind::Clobber))
1196                } else if self.second_char() == Some('(') {
1197                    self.consume_ascii_chars(2);
1198                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1199                } else if self.second_char() == Some('&') {
1200                    self.consume_ascii_chars(2);
1201                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1202                } else {
1203                    self.consume_ascii_chars(1);
1204                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1205                }
1206            }
1207            '<' => {
1208                if self.second_char() == Some('<') {
1209                    if self.third_char() == Some('<') {
1210                        self.consume_ascii_chars(3);
1211                        Some(LexedToken::punctuation(TokenKind::HereString))
1212                    } else if self.third_char() == Some('-') {
1213                        self.consume_ascii_chars(3);
1214                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1215                    } else {
1216                        self.consume_ascii_chars(2);
1217                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1218                    }
1219                } else if self.second_char() == Some('>') {
1220                    self.consume_ascii_chars(2);
1221                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1222                } else if self.second_char() == Some('(') {
1223                    self.consume_ascii_chars(2);
1224                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1225                } else if self.second_char() == Some('&') {
1226                    self.consume_ascii_chars(2);
1227                    Some(LexedToken::punctuation(TokenKind::DupInput))
1228                } else {
1229                    self.consume_ascii_chars(1);
1230                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1231                }
1232            }
1233            '(' => {
1234                if self.second_char() == Some('(') {
1235                    self.consume_ascii_chars(2);
1236                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1237                } else {
1238                    self.consume_ascii_chars(1);
1239                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1240                }
1241            }
1242            ')' => {
1243                if self.second_char() == Some(')') {
1244                    self.consume_ascii_chars(2);
1245                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1246                } else {
1247                    self.consume_ascii_chars(1);
1248                    Some(LexedToken::punctuation(TokenKind::RightParen))
1249                }
1250            }
1251            '{' => {
1252                let start = self.current_position();
1253                if self.ignore_braces_enabled() {
1254                    self.consume_ascii_chars(1);
1255                    match self.peek_char() {
1256                        Some(' ') | Some('\t') | Some('\n') | None => {
1257                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1258                        }
1259                        _ => self.read_word_starting_with("{", start),
1260                    }
1261                } else if self.looks_like_brace_expansion() {
1262                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1263                    // vs a brace group like { cmd; }
1264                    // Note: { must be followed by space/newline to be a brace group
1265                    self.read_brace_expansion_word()
1266                } else if self.is_brace_group_start() {
1267                    self.advance();
1268                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1269                } else if self.brace_literal_starts_case_pattern_delimiter() {
1270                    self.read_word_starting_with("{", start)
1271                } else {
1272                    self.read_brace_literal_word()
1273                }
1274            }
1275            '}' => {
1276                self.consume_ascii_chars(1);
1277                if self.ignore_close_braces_enabled() {
1278                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1279                } else {
1280                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1281                }
1282            }
1283            '[' => {
1284                let start = self.current_position();
1285                self.consume_ascii_chars(1);
1286                if self.peek_char() == Some('[')
1287                    && matches!(
1288                        self.second_char(),
1289                        Some(' ') | Some('\t') | Some('\n') | None
1290                    )
1291                {
1292                    self.consume_ascii_chars(1);
1293                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1294                } else {
1295                    // `[` can start the test command when followed by whitespace, or it can be
1296                    // ordinary word text such as a glob bracket expression.
1297                    //
1298                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1299                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1300                    // structural tokens mid-word.
1301                    match self.peek_char() {
1302                        Some(' ') | Some('\t') | Some('\n') | None => {
1303                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1304                        }
1305                        _ => self.read_word_starting_with("[", start),
1306                    }
1307                }
1308            }
1309            ']' => {
1310                if self.second_char() == Some(']') {
1311                    self.consume_ascii_chars(2);
1312                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1313                } else {
1314                    self.consume_ascii_chars(1);
1315                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1316                }
1317            }
1318            '\'' => self.read_single_quoted_string(),
1319            '"' => self.read_double_quoted_string(),
1320            '#' => {
1321                if self.should_treat_hash_as_word_char() {
1322                    let start = self.current_position();
1323                    return self.read_word_starting_with("#", start);
1324                }
1325                if preserve_comments {
1326                    self.read_comment();
1327                    Some(LexedToken::comment())
1328                } else {
1329                    self.skip_comment();
1330                    self.next_lexed_token_inner(false)
1331                }
1332            }
1333            // Handle file descriptor redirects like 2> or 2>&1
1334            '0'..='9' => self.read_word_or_fd_redirect(),
1335            _ => self.read_word(),
1336        }
1337    }
1338
1339    fn skip_whitespace(&mut self) {
1340        while let Some(ch) = self.peek_char() {
1341            if self.reinject_buf.is_empty() {
1342                let whitespace_len = self.source_horizontal_whitespace_len();
1343                if whitespace_len > 0 {
1344                    self.consume_source_bytes(whitespace_len);
1345                    continue;
1346                }
1347
1348                if self.cursor.rest().starts_with("\\\n") {
1349                    self.consume_source_bytes(2);
1350                    continue;
1351                }
1352            }
1353
1354            if ch == ' ' || ch == '\t' {
1355                self.consume_ascii_chars(1);
1356            } else if ch == '\\' {
1357                // Check for backslash-newline (line continuation) between tokens
1358                if self.second_char() == Some('\n') {
1359                    self.consume_ascii_chars(2);
1360                } else {
1361                    break;
1362                }
1363            } else {
1364                break;
1365            }
1366        }
1367    }
1368
1369    fn skip_comment(&mut self) {
1370        if self.reinject_buf.is_empty() {
1371            let end = self
1372                .cursor
1373                .find_byte(b'\n')
1374                .unwrap_or(self.cursor.rest().len());
1375            self.consume_source_bytes(end);
1376            return;
1377        }
1378
1379        while let Some(ch) = self.peek_char() {
1380            if ch == '\n' {
1381                break;
1382            }
1383            self.advance();
1384        }
1385    }
1386
1387    fn read_comment(&mut self) {
1388        debug_assert_eq!(self.peek_char(), Some('#'));
1389
1390        if self.reinject_buf.is_empty() {
1391            let rest = self.cursor.rest();
1392            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1393            self.consume_source_bytes(end);
1394            return;
1395        }
1396
1397        self.advance(); // consume '#'
1398
1399        while let Some(ch) = self.peek_char() {
1400            if ch == '\n' {
1401                break;
1402            }
1403            self.advance();
1404        }
1405    }
1406
1407    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1408        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1409            return false;
1410        }
1411
1412        let line_start = self.input[..self.offset]
1413            .rfind('\n')
1414            .map_or(0, |index| index + 1);
1415        let prefix = &self.input[line_start..self.offset];
1416        line_has_unclosed_double_paren(prefix)
1417    }
1418
1419    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1420    /// or just a regular word starting with a digit
1421    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1422        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1423            let Some(fd) = first_digit.to_digit(10) else {
1424                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1425            };
1426            let fd = fd as i32;
1427
1428            match (self.second_char(), self.third_char()) {
1429                (Some('>'), Some('>')) => {
1430                    if self.fourth_char() == Some('|') {
1431                        self.consume_ascii_chars(4);
1432                    } else {
1433                        self.consume_ascii_chars(3);
1434                    }
1435                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1436                }
1437                (Some('>'), Some('|')) => {
1438                    self.consume_ascii_chars(3);
1439                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1440                }
1441                (Some('>'), Some('&')) => {
1442                    self.consume_ascii_chars(3);
1443
1444                    let mut target_str = String::with_capacity(4);
1445                    while let Some(c) = self.peek_char() {
1446                        if c.is_ascii_digit() {
1447                            target_str.push(c);
1448                            self.advance();
1449                        } else {
1450                            break;
1451                        }
1452                    }
1453
1454                    if target_str.is_empty() {
1455                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1456                    }
1457
1458                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1459                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1460                }
1461                (Some('>'), _) => {
1462                    self.consume_ascii_chars(2);
1463                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1464                }
1465                (Some('<'), Some('&')) => {
1466                    self.consume_ascii_chars(3);
1467
1468                    let mut target_str = String::with_capacity(4);
1469                    while let Some(c) = self.peek_char() {
1470                        if c.is_ascii_digit() || c == '-' {
1471                            target_str.push(c);
1472                            self.advance();
1473                            if c == '-' {
1474                                break;
1475                            }
1476                        } else {
1477                            break;
1478                        }
1479                    }
1480
1481                    if target_str == "-" {
1482                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1483                    }
1484                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1485                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1486                }
1487                (Some('<'), Some('>')) => {
1488                    self.consume_ascii_chars(3);
1489                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1490                }
1491                (Some('<'), Some('<')) => {}
1492                (Some('<'), _) => {
1493                    self.consume_ascii_chars(2);
1494                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1495                }
1496                _ => {}
1497            }
1498        }
1499
1500        // Not a fd redirect pattern, read as regular word
1501        self.read_word()
1502    }
1503
1504    fn read_word_starting_with(
1505        &mut self,
1506        _prefix: &str,
1507        start: Position,
1508    ) -> Option<LexedToken<'a>> {
1509        let segment = match self.read_unquoted_segment(start) {
1510            Ok(segment) => segment,
1511            Err(kind) => return Some(LexedToken::error(kind)),
1512        };
1513        if segment.as_str().is_empty() {
1514            return None;
1515        }
1516        let mut lexed_word = LexedWord::from_segment(segment);
1517        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1518            return Some(LexedToken::error(kind));
1519        }
1520        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1521    }
1522
1523    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1524        let start = self.current_position();
1525
1526        if self.reinject_buf.is_empty() {
1527            let ascii_len = self.source_ascii_plain_word_len();
1528            let chunk = if ascii_len > 0
1529                && self
1530                    .cursor
1531                    .rest()
1532                    .as_bytes()
1533                    .get(ascii_len)
1534                    .is_none_or(|byte| byte.is_ascii())
1535            {
1536                self.consume_source_bytes(ascii_len);
1537                &self.input[start.offset..self.offset]
1538            } else {
1539                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1540                self.advance_scanned_source_bytes(chunk.len());
1541                chunk
1542            };
1543            if !chunk.is_empty() {
1544                let continues = matches!(
1545                    self.peek_char(),
1546                    Some(next)
1547                        if Self::is_word_char(next)
1548                            || next == '$'
1549                            || matches!(next, '\'' | '"')
1550                            || next == '{'
1551                            || (next == '\\' && self.second_char() == Some('\n'))
1552                            || (next == '('
1553                                && (chunk.ends_with('=')
1554                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1555                );
1556                let continues = continues
1557                    || (self.peek_char() == Some('(')
1558                        && (self.looks_like_zsh_alternative_glob_suffix(chunk)
1559                            || self.looks_like_zsh_glob_modifier_suffix(chunk)));
1560
1561                if !continues {
1562                    let end = self.current_position();
1563                    return Some(LexedToken::borrowed_word(
1564                        TokenKind::Word,
1565                        &self.input[start.offset..self.offset],
1566                        Some(Span::from_positions(start, end)),
1567                    ));
1568                }
1569
1570                if self.peek_char() == Some('(')
1571                    && (chunk.ends_with('=')
1572                        || Self::word_can_take_parenthesized_suffix(chunk)
1573                        || self.looks_like_zsh_alternative_glob_suffix(chunk)
1574                        || self.looks_like_zsh_glob_modifier_suffix(chunk))
1575                {
1576                    return self.read_complex_word(start);
1577                }
1578
1579                let end = self.current_position();
1580                return self.finish_segmented_word(LexedWord::borrowed(
1581                    LexedWordSegmentKind::Plain,
1582                    &self.input[start.offset..self.offset],
1583                    Some(Span::from_positions(start, end)),
1584                ));
1585            }
1586        }
1587
1588        self.read_complex_word(start)
1589    }
1590
1591    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1592        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1593            return Some(LexedToken::error(kind));
1594        }
1595
1596        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1597    }
1598
1599    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1600        if self.peek_char() == Some('$') {
1601            match self.second_char() {
1602                Some('\'') => return self.read_dollar_single_quoted_string(),
1603                Some('"') => return self.read_dollar_double_quoted_string(),
1604                _ => {}
1605            }
1606        }
1607
1608        let segment = match self.read_unquoted_segment(start) {
1609            Ok(segment) => segment,
1610            Err(kind) => return Some(LexedToken::error(kind)),
1611        };
1612
1613        if segment.as_str().is_empty() {
1614            return None;
1615        }
1616
1617        self.finish_segmented_word(LexedWord::from_segment(segment))
1618    }
1619
1620    fn read_unquoted_segment(
1621        &mut self,
1622        start: Position,
1623    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1624        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1625        while let Some(ch) = self.peek_char() {
1626            if ch == '"' || ch == '\'' {
1627                break;
1628            } else if ch == '$' {
1629                if matches!(self.second_char(), Some('\'') | Some('"'))
1630                    && (self.current_position().offset > start.offset
1631                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1632                {
1633                    break;
1634                }
1635
1636                // Handle variable references and command substitution
1637                self.advance();
1638
1639                Self::push_capture_char(&mut word, ch); // push the '$'
1640
1641                // Check for $[ / $( / ${ forms before falling back to variable text.
1642                if self.peek_char() == Some('[') {
1643                    Self::push_capture_char(&mut word, '[');
1644                    self.advance();
1645                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1646                        return Err(LexerErrorKind::CommandSubstitution);
1647                    }
1648                } else if self.peek_char() == Some('(') {
1649                    if self.second_char() == Some('(') {
1650                        if !self.read_arithmetic_expansion_into(&mut word) {
1651                            return Err(LexerErrorKind::CommandSubstitution);
1652                        }
1653                    } else {
1654                        Self::push_capture_char(&mut word, '(');
1655                        self.advance();
1656                        if !self.read_command_subst_into(&mut word) {
1657                            return Err(LexerErrorKind::CommandSubstitution);
1658                        }
1659                    }
1660                } else if self.peek_char() == Some('{') {
1661                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1662                    // doesn't stop at the inner }.
1663                    Self::push_capture_char(&mut word, '{');
1664                    self.advance();
1665                    let _ = self.read_param_expansion_into(&mut word, start);
1666                } else {
1667                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1668                    if let Some(c) = self.peek_char() {
1669                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1670                            || c.is_ascii_digit()
1671                        {
1672                            Self::push_capture_char(&mut word, c);
1673                            self.advance();
1674                        } else {
1675                            // Read variable name (alphanumeric + _)
1676                            while let Some(c) = self.peek_char() {
1677                                if c.is_ascii_alphanumeric() || c == '_' {
1678                                    Self::push_capture_char(&mut word, c);
1679                                    self.advance();
1680                                } else {
1681                                    break;
1682                                }
1683                            }
1684                        }
1685                    }
1686                }
1687            } else if ch == '{' {
1688                if self.looks_like_mid_word_brace_segment() {
1689                    // Keep balanced {...} forms attached to the current word so
1690                    // plain literals like foo{bar} and brace expansions stay intact.
1691                    Self::push_capture_char(&mut word, ch);
1692                    self.advance();
1693                    self.consume_mid_word_brace_segment(&mut word);
1694                } else {
1695                    // Unmatched literal braces in regexes like ^{ should not swallow
1696                    // trailing delimiters such as ]] or then.
1697                    Self::push_capture_char(&mut word, ch);
1698                    self.advance();
1699                }
1700            } else if ch == '`' {
1701                // Preserve legacy backticks verbatim so the parser can keep the
1702                // original syntax form.
1703                let capture_end = self.current_position();
1704                self.ensure_capture_from_source(&mut word, start, capture_end);
1705                Self::push_capture_char(&mut word, ch);
1706                self.advance(); // consume opening `
1707                let mut closed = false;
1708                while let Some(c) = self.peek_char() {
1709                    Self::push_capture_char(&mut word, c);
1710                    self.advance();
1711                    if c == '`' {
1712                        closed = true;
1713                        break;
1714                    }
1715                    if c == '\\'
1716                        && let Some(next) = self.peek_char()
1717                    {
1718                        Self::push_capture_char(&mut word, next);
1719                        self.advance();
1720                    }
1721                }
1722                if !closed {
1723                    return Err(LexerErrorKind::BacktickSubstitution);
1724                }
1725            } else if ch == '\\' {
1726                let capture_end = self.current_position();
1727                self.ensure_capture_from_source(&mut word, start, capture_end);
1728                self.advance();
1729                if let Some(next) = self.peek_char() {
1730                    if next == '\n' {
1731                        // Line continuation: skip backslash + newline
1732                        self.advance();
1733                    } else {
1734                        // Escaped character: backslash quotes the next char
1735                        // (quote removal — only the literal char survives).
1736                        // Preserve source/decoded alignment with a sentinel so
1737                        // downstream word decoding keeps later spans anchored.
1738                        Self::push_capture_char(&mut word, '\x00');
1739                        Self::push_capture_char(&mut word, next);
1740                        self.advance();
1741                        if next == '{'
1742                            && self.current_word_surface_is_single_char(start, &word, '{')
1743                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1744                        {
1745                            let mut depth = 1;
1746                            while let Some(c) = self.peek_char() {
1747                                Self::push_capture_char(&mut word, c);
1748                                self.advance();
1749                                match c {
1750                                    '{' => depth += 1,
1751                                    '}' => {
1752                                        depth -= 1;
1753                                        if depth == 0 {
1754                                            break;
1755                                        }
1756                                    }
1757                                    _ => {}
1758                                }
1759                            }
1760                        }
1761                    }
1762                } else {
1763                    Self::push_capture_char(&mut word, '\\');
1764                }
1765            } else if ch == '('
1766                && self.current_word_surface_ends_with_char(start, &word, '=')
1767                && self.looks_like_assoc_assign()
1768            {
1769                // Associative compound assignment: var=([k]="v" ...) — keep entire
1770                // (...) as part of word so declare -A m=([k]="v") stays one token.
1771                Self::push_capture_char(&mut word, ch);
1772                self.advance();
1773                let mut depth = 1;
1774                while let Some(c) = self.peek_char() {
1775                    Self::push_capture_char(&mut word, c);
1776                    self.advance();
1777                    match c {
1778                        '(' => depth += 1,
1779                        ')' => {
1780                            depth -= 1;
1781                            if depth == 0 {
1782                                break;
1783                            }
1784                        }
1785                        '"' => {
1786                            while let Some(qc) = self.peek_char() {
1787                                Self::push_capture_char(&mut word, qc);
1788                                self.advance();
1789                                if qc == '"' {
1790                                    break;
1791                                }
1792                                if qc == '\\'
1793                                    && let Some(esc) = self.peek_char()
1794                                {
1795                                    Self::push_capture_char(&mut word, esc);
1796                                    self.advance();
1797                                }
1798                            }
1799                        }
1800                        '\'' => {
1801                            while let Some(qc) = self.peek_char() {
1802                                Self::push_capture_char(&mut word, qc);
1803                                self.advance();
1804                                if qc == '\'' {
1805                                    break;
1806                                }
1807                            }
1808                        }
1809                        '\\' => {
1810                            if let Some(esc) = self.peek_char() {
1811                                Self::push_capture_char(&mut word, esc);
1812                                self.advance();
1813                            }
1814                        }
1815                        _ => {}
1816                    }
1817                }
1818            } else if ch == '('
1819                && (self.current_word_surface_ends_with_extglob_prefix(start, &word)
1820                    || self.current_word_surface_can_take_zsh_glob_modifier_suffix(start, &word))
1821            {
1822                // Extglob and zsh glob modifiers consume through matching )
1823                // including nested parens.
1824                Self::push_capture_char(&mut word, ch);
1825                self.advance();
1826                let mut depth = 1;
1827                while let Some(c) = self.peek_char() {
1828                    Self::push_capture_char(&mut word, c);
1829                    self.advance();
1830                    match c {
1831                        '(' => depth += 1,
1832                        ')' => {
1833                            depth -= 1;
1834                            if depth == 0 {
1835                                break;
1836                            }
1837                        }
1838                        '\\' => {
1839                            if let Some(esc) = self.peek_char() {
1840                                Self::push_capture_char(&mut word, esc);
1841                                self.advance();
1842                            }
1843                        }
1844                        _ => {}
1845                    }
1846                }
1847            } else if Self::is_plain_word_char(ch) {
1848                if self.reinject_buf.is_empty() {
1849                    let ascii_len = self.source_ascii_plain_word_len();
1850                    let chunk = if ascii_len > 0
1851                        && self
1852                            .cursor
1853                            .rest()
1854                            .as_bytes()
1855                            .get(ascii_len)
1856                            .is_none_or(|byte| byte.is_ascii())
1857                    {
1858                        self.consume_source_bytes(ascii_len);
1859                        &self.input[self.offset - ascii_len..self.offset]
1860                    } else {
1861                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1862                        self.advance_scanned_source_bytes(chunk.len());
1863                        chunk
1864                    };
1865                    Self::push_capture_str(&mut word, chunk);
1866                } else {
1867                    Self::push_capture_char(&mut word, ch);
1868                    self.advance();
1869                }
1870            } else {
1871                break;
1872            }
1873        }
1874
1875        if let Some(word) = word {
1876            let span = Some(Span::from_positions(start, self.current_position()));
1877            Ok(LexedWordSegment::owned_with_spans(
1878                LexedWordSegmentKind::Plain,
1879                word,
1880                span,
1881                span,
1882            ))
1883        } else {
1884            let end = self.current_position();
1885            Ok(LexedWordSegment::borrowed(
1886                LexedWordSegmentKind::Plain,
1887                &self.input[start.offset..self.offset],
1888                Some(Span::from_positions(start, end)),
1889            ))
1890        }
1891    }
1892
1893    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1894        let segment = match self.read_single_quoted_segment() {
1895            Ok(segment) => segment,
1896            Err(kind) => return Some(LexedToken::error(kind)),
1897        };
1898        let mut word = LexedWord::from_segment(segment);
1899        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1900            return Some(LexedToken::error(kind));
1901        }
1902
1903        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1904    }
1905
1906    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1907        debug_assert_eq!(self.peek_char(), Some('\''));
1908
1909        let wrapper_start = self.current_position();
1910        self.consume_ascii_chars(1); // consume opening '
1911        let content_start = self.current_position();
1912        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1913        let mut content_end = content_start;
1914        let mut content = String::with_capacity(16);
1915        let mut closed = false;
1916
1917        if can_borrow {
1918            let rest = self.cursor.rest();
1919            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1920                self.consume_source_bytes(quote_index);
1921                content_end = self.current_position();
1922                self.consume_ascii_chars(1); // consume closing '
1923                closed = true;
1924            } else {
1925                self.consume_source_bytes(rest.len());
1926            }
1927        }
1928
1929        while let Some(ch) = self.peek_char() {
1930            if closed {
1931                break;
1932            }
1933            if ch == '\'' {
1934                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1935                    if !can_borrow {
1936                        content.push('\'');
1937                    }
1938                    self.advance();
1939                    self.advance();
1940                    continue;
1941                }
1942                content_end = self.current_position();
1943                self.consume_ascii_chars(1); // consume closing '
1944                closed = true;
1945                break;
1946            }
1947            if !can_borrow {
1948                content.push(ch);
1949            }
1950            self.advance();
1951        }
1952
1953        if !closed {
1954            return Err(LexerErrorKind::SingleQuote);
1955        }
1956
1957        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1958        let content_span = Some(Span::from_positions(content_start, content_end));
1959
1960        if can_borrow {
1961            Ok(LexedWordSegment::borrowed_with_spans(
1962                LexedWordSegmentKind::SingleQuoted,
1963                &self.input[content_start.offset..content_end.offset],
1964                content_span,
1965                wrapper_span,
1966            ))
1967        } else {
1968            Ok(LexedWordSegment::owned_with_spans(
1969                LexedWordSegmentKind::SingleQuoted,
1970                content,
1971                content_span,
1972                wrapper_span,
1973            ))
1974        }
1975    }
1976
1977    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1978        let segment = match self.read_dollar_single_quoted_segment() {
1979            Ok(segment) => segment,
1980            Err(kind) => return Some(LexedToken::error(kind)),
1981        };
1982        let mut word = LexedWord::from_segment(segment);
1983        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1984            return Some(LexedToken::error(kind));
1985        }
1986
1987        let kind = if word.single_segment().is_some() {
1988            TokenKind::LiteralWord
1989        } else {
1990            TokenKind::Word
1991        };
1992
1993        Some(LexedToken::with_word_payload(kind, word))
1994    }
1995
1996    fn read_dollar_single_quoted_segment(
1997        &mut self,
1998    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1999        debug_assert_eq!(self.peek_char(), Some('$'));
2000        debug_assert_eq!(self.second_char(), Some('\''));
2001
2002        let wrapper_start = self.current_position();
2003        self.consume_ascii_chars(2); // consume $'
2004        let content_start = self.current_position();
2005        let mut out = String::with_capacity(16);
2006
2007        while let Some(ch) = self.peek_char() {
2008            if ch == '\'' {
2009                let content_end = self.current_position();
2010                self.advance();
2011                let wrapper_span =
2012                    Some(Span::from_positions(wrapper_start, self.current_position()));
2013                let content_span = Some(Span::from_positions(content_start, content_end));
2014                return Ok(LexedWordSegment::owned_with_spans(
2015                    LexedWordSegmentKind::DollarSingleQuoted,
2016                    out,
2017                    content_span,
2018                    wrapper_span,
2019                ));
2020            }
2021
2022            if ch == '\\' {
2023                self.advance();
2024                if let Some(esc) = self.peek_char() {
2025                    self.advance();
2026                    match esc {
2027                        'n' => out.push('\n'),
2028                        't' => out.push('\t'),
2029                        'r' => out.push('\r'),
2030                        'a' => out.push('\x07'),
2031                        'b' => out.push('\x08'),
2032                        'f' => out.push('\x0C'),
2033                        'v' => out.push('\x0B'),
2034                        'e' | 'E' => out.push('\x1B'),
2035                        '\\' => out.push('\\'),
2036                        '\'' => out.push('\''),
2037                        '"' => out.push('"'),
2038                        '?' => out.push('?'),
2039                        'c' => {
2040                            if let Some(control) = self.peek_char() {
2041                                self.advance();
2042                                out.push(((control as u32 & 0x1F) as u8) as char);
2043                            } else {
2044                                out.push('\\');
2045                                out.push('c');
2046                            }
2047                        }
2048                        'x' => {
2049                            let mut hex = String::new();
2050                            for _ in 0..2 {
2051                                if let Some(h) = self.peek_char() {
2052                                    if h.is_ascii_hexdigit() {
2053                                        hex.push(h);
2054                                        self.advance();
2055                                    } else {
2056                                        break;
2057                                    }
2058                                }
2059                            }
2060                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2061                                out.push(val as char);
2062                            }
2063                        }
2064                        'u' => {
2065                            let mut hex = String::new();
2066                            for _ in 0..4 {
2067                                if let Some(h) = self.peek_char() {
2068                                    if h.is_ascii_hexdigit() {
2069                                        hex.push(h);
2070                                        self.advance();
2071                                    } else {
2072                                        break;
2073                                    }
2074                                }
2075                            }
2076                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2077                                && let Some(c) = char::from_u32(val)
2078                            {
2079                                out.push(c);
2080                            }
2081                        }
2082                        'U' => {
2083                            let mut hex = String::new();
2084                            for _ in 0..8 {
2085                                if let Some(h) = self.peek_char() {
2086                                    if h.is_ascii_hexdigit() {
2087                                        hex.push(h);
2088                                        self.advance();
2089                                    } else {
2090                                        break;
2091                                    }
2092                                }
2093                            }
2094                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2095                                && let Some(c) = char::from_u32(val)
2096                            {
2097                                out.push(c);
2098                            }
2099                        }
2100                        '0'..='7' => {
2101                            let mut oct = String::new();
2102                            oct.push(esc);
2103                            for _ in 0..2 {
2104                                if let Some(o) = self.peek_char() {
2105                                    if o.is_ascii_digit() && o < '8' {
2106                                        oct.push(o);
2107                                        self.advance();
2108                                    } else {
2109                                        break;
2110                                    }
2111                                }
2112                            }
2113                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2114                                out.push(val as char);
2115                            }
2116                        }
2117                        _ => {
2118                            out.push('\\');
2119                            out.push(esc);
2120                        }
2121                    }
2122                } else {
2123                    out.push('\\');
2124                }
2125                continue;
2126            }
2127
2128            out.push(ch);
2129            self.advance();
2130        }
2131
2132        Err(LexerErrorKind::SingleQuote)
2133    }
2134
2135    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2136        let start = self.current_position();
2137
2138        if self.reinject_buf.is_empty() {
2139            let ascii_len = self.source_ascii_plain_word_len();
2140            let chunk = if ascii_len > 0
2141                && self
2142                    .cursor
2143                    .rest()
2144                    .as_bytes()
2145                    .get(ascii_len)
2146                    .is_none_or(|byte| byte.is_ascii())
2147            {
2148                self.consume_source_bytes(ascii_len);
2149                &self.input[start.offset..self.offset]
2150            } else {
2151                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2152                self.advance_scanned_source_bytes(chunk.len());
2153                chunk
2154            };
2155            if chunk.is_empty() {
2156                return None;
2157            }
2158
2159            let end = self.current_position();
2160            return Some(LexedWordSegment::borrowed(
2161                LexedWordSegmentKind::Plain,
2162                &self.input[start.offset..self.offset],
2163                Some(Span::from_positions(start, end)),
2164            ));
2165        }
2166
2167        let ch = self.peek_char()?;
2168        if !Self::is_plain_word_char(ch) {
2169            return None;
2170        }
2171
2172        let mut text = String::with_capacity(16);
2173        while let Some(ch) = self.peek_char() {
2174            if !Self::is_plain_word_char(ch) {
2175                break;
2176            }
2177            text.push(ch);
2178            self.advance();
2179        }
2180
2181        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2182    }
2183
2184    /// After a closing quote, read any adjacent quoted or unquoted word chars
2185    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2186    fn append_segmented_continuation(
2187        &mut self,
2188        word: &mut LexedWord<'a>,
2189    ) -> Result<(), LexerErrorKind> {
2190        loop {
2191            match self.peek_char() {
2192                Some('\\') if self.second_char() == Some('\n') => {
2193                    self.advance();
2194                    self.advance();
2195                    continue;
2196                }
2197                Some('\'') => {
2198                    word.push_segment(self.read_single_quoted_segment()?);
2199                }
2200                Some('"') => {
2201                    word.push_segment(self.read_double_quoted_segment()?);
2202                }
2203                Some('$') if self.second_char() == Some('\'') => {
2204                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2205                }
2206                Some('$') if self.second_char() == Some('"') => {
2207                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2208                }
2209                Some('(')
2210                    if Self::lexed_word_can_take_parenthesized_suffix(word)
2211                        || self.looks_like_zsh_alternative_glob_suffix(&word.joined_text())
2212                        || self.looks_like_zsh_glob_modifier_suffix(&word.joined_text()) =>
2213                {
2214                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2215                        unreachable!("peeked '(' should produce a suffix segment");
2216                    };
2217                    word.push_segment(segment);
2218                }
2219                _ => {
2220                    if let Some(segment) = self.read_plain_continuation_segment() {
2221                        word.push_segment(segment);
2222                        continue;
2223                    }
2224
2225                    let start = self.current_position();
2226                    let plain = self.read_unquoted_segment(start)?;
2227                    if plain.as_str().is_empty() {
2228                        break;
2229                    }
2230                    word.push_segment(plain);
2231                }
2232            }
2233        }
2234
2235        Ok(())
2236    }
2237
2238    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2239        debug_assert_eq!(self.peek_char(), Some('('));
2240
2241        let start = self.current_position();
2242        let mut depth = 0usize;
2243        let mut escaped = false;
2244        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2245
2246        while let Some(ch) = self.peek_char() {
2247            if let Some(text) = text.as_mut() {
2248                text.push(ch);
2249            }
2250            self.advance();
2251
2252            if escaped {
2253                escaped = false;
2254                continue;
2255            }
2256
2257            match ch {
2258                '\\' => escaped = true,
2259                '(' => depth += 1,
2260                ')' => {
2261                    depth = depth.saturating_sub(1);
2262                    if depth == 0 {
2263                        break;
2264                    }
2265                }
2266                _ => {}
2267            }
2268        }
2269
2270        let end = self.current_position();
2271        let span = Some(Span::from_positions(start, end));
2272        if let Some(text) = text {
2273            Some(LexedWordSegment::owned_with_spans(
2274                LexedWordSegmentKind::Plain,
2275                text,
2276                span,
2277                span,
2278            ))
2279        } else {
2280            Some(LexedWordSegment::borrowed_with_spans(
2281                LexedWordSegmentKind::Plain,
2282                &self.input[start.offset..end.offset],
2283                span,
2284                span,
2285            ))
2286        }
2287    }
2288
2289    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2290        self.read_double_quoted_word(false)
2291    }
2292
2293    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2294        self.read_double_quoted_word(true)
2295    }
2296
2297    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2298        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2299            Ok(segment) => segment,
2300            Err(kind) => return Some(LexedToken::error(kind)),
2301        };
2302        let mut word = LexedWord::from_segment(segment);
2303        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2304            return Some(LexedToken::error(kind));
2305        }
2306
2307        let kind = if word.single_segment().is_some() {
2308            TokenKind::QuotedWord
2309        } else {
2310            TokenKind::Word
2311        };
2312
2313        Some(LexedToken::with_word_payload(kind, word))
2314    }
2315
2316    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2317        self.read_double_quoted_segment_with_dollar(false)
2318    }
2319
2320    fn read_dollar_double_quoted_segment(
2321        &mut self,
2322    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2323        self.read_double_quoted_segment_with_dollar(true)
2324    }
2325
2326    fn read_double_quoted_segment_with_dollar(
2327        &mut self,
2328        dollar: bool,
2329    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2330        if dollar {
2331            debug_assert_eq!(self.peek_char(), Some('$'));
2332            debug_assert_eq!(self.second_char(), Some('"'));
2333        } else {
2334            debug_assert_eq!(self.peek_char(), Some('"'));
2335        }
2336
2337        let wrapper_start = self.current_position();
2338        if dollar {
2339            self.consume_ascii_chars(2); // consume $"
2340        } else {
2341            self.consume_ascii_chars(1); // consume opening "
2342        }
2343        let content_start = self.current_position();
2344        let mut content_end = content_start;
2345        let mut simple = self.reinject_buf.is_empty();
2346        let mut borrowable = self.reinject_buf.is_empty();
2347        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2348        let mut closed = false;
2349
2350        while let Some(ch) = self.peek_char() {
2351            if simple {
2352                if self.reinject_buf.is_empty() {
2353                    let rest = self.cursor.rest();
2354                    match Self::find_double_quote_special(rest) {
2355                        Some(index) if index > 0 => {
2356                            self.consume_source_bytes(index);
2357                            continue;
2358                        }
2359                        None => {
2360                            self.consume_source_bytes(rest.len());
2361                            return Err(LexerErrorKind::DoubleQuote);
2362                        }
2363                        _ => {}
2364                    }
2365                }
2366
2367                match ch {
2368                    '"' => {
2369                        content_end = self.current_position();
2370                        self.consume_ascii_chars(1); // consume closing "
2371                        closed = true;
2372                        break;
2373                    }
2374                    '\\' | '$' | '`' => {
2375                        simple = false;
2376                        if ch == '`' {
2377                            borrowable = false;
2378                            let capture_end = self.current_position();
2379                            self.ensure_capture_from_source(
2380                                &mut content,
2381                                content_start,
2382                                capture_end,
2383                            );
2384                        }
2385                    }
2386                    _ => {
2387                        self.advance();
2388                    }
2389                }
2390                if simple {
2391                    continue;
2392                }
2393            }
2394
2395            match ch {
2396                '"' => {
2397                    if borrowable {
2398                        content_end = self.current_position();
2399                    }
2400                    self.consume_ascii_chars(1); // consume closing "
2401                    closed = true;
2402                    break;
2403                }
2404                '\\' => {
2405                    let escape_start = self.current_position();
2406                    self.advance();
2407                    if let Some(next) = self.peek_char() {
2408                        match next {
2409                            '\n' => {
2410                                borrowable = false;
2411                                self.ensure_capture_from_source(
2412                                    &mut content,
2413                                    content_start,
2414                                    escape_start,
2415                                );
2416                                self.advance();
2417                            }
2418                            '$' => {
2419                                borrowable = false;
2420                                self.ensure_capture_from_source(
2421                                    &mut content,
2422                                    content_start,
2423                                    escape_start,
2424                                );
2425                                Self::push_capture_char(&mut content, '\x00');
2426                                Self::push_capture_char(&mut content, '$');
2427                                self.advance();
2428                            }
2429                            '"' | '\\' | '`' => {
2430                                borrowable = false;
2431                                self.ensure_capture_from_source(
2432                                    &mut content,
2433                                    content_start,
2434                                    escape_start,
2435                                );
2436                                if next == '\\' {
2437                                    Self::push_capture_char(&mut content, '\x00');
2438                                }
2439                                if next == '`' {
2440                                    Self::push_capture_char(&mut content, '\x00');
2441                                }
2442                                Self::push_capture_char(&mut content, next);
2443                                self.advance();
2444                                content_end = self.current_position();
2445                            }
2446                            _ => {
2447                                Self::push_capture_char(&mut content, '\\');
2448                                Self::push_capture_char(&mut content, next);
2449                                self.advance();
2450                                content_end = self.current_position();
2451                            }
2452                        }
2453                    }
2454                }
2455                '$' => {
2456                    Self::push_capture_char(&mut content, '$');
2457                    self.advance();
2458                    if self.peek_char() == Some('(') {
2459                        if self.second_char() == Some('(') {
2460                            self.read_arithmetic_expansion_into(&mut content);
2461                        } else {
2462                            Self::push_capture_char(&mut content, '(');
2463                            self.advance();
2464                            self.read_command_subst_into(&mut content);
2465                        }
2466                    } else if self.peek_char() == Some('{') {
2467                        Self::push_capture_char(&mut content, '{');
2468                        self.advance();
2469                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2470                    }
2471                    content_end = self.current_position();
2472                }
2473                '`' => {
2474                    borrowable = false;
2475                    let capture_end = self.current_position();
2476                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2477                    Self::push_capture_char(&mut content, '`');
2478                    self.advance(); // consume opening `
2479                    while let Some(c) = self.peek_char() {
2480                        Self::push_capture_char(&mut content, c);
2481                        self.advance();
2482                        if c == '`' {
2483                            break;
2484                        }
2485                        if c == '\\'
2486                            && let Some(next) = self.peek_char()
2487                        {
2488                            Self::push_capture_char(&mut content, next);
2489                            self.advance();
2490                        }
2491                    }
2492                    content_end = self.current_position();
2493                }
2494                _ => {
2495                    Self::push_capture_char(&mut content, ch);
2496                    self.advance();
2497                    content_end = self.current_position();
2498                }
2499            }
2500        }
2501
2502        if !closed {
2503            return Err(LexerErrorKind::DoubleQuote);
2504        }
2505
2506        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2507        let content_span = Some(Span::from_positions(content_start, content_end));
2508
2509        if borrowable {
2510            Ok(LexedWordSegment::borrowed_with_spans(
2511                if dollar {
2512                    LexedWordSegmentKind::DollarDoubleQuoted
2513                } else {
2514                    LexedWordSegmentKind::DoubleQuoted
2515                },
2516                &self.input[content_start.offset..content_end.offset],
2517                content_span,
2518                wrapper_span,
2519            ))
2520        } else {
2521            Ok(LexedWordSegment::owned_with_spans(
2522                if dollar {
2523                    LexedWordSegmentKind::DollarDoubleQuoted
2524                } else {
2525                    LexedWordSegmentKind::DoubleQuoted
2526                },
2527                content.unwrap_or_default(),
2528                content_span,
2529                wrapper_span,
2530            ))
2531        }
2532    }
2533
2534    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2535        debug_assert_eq!(self.peek_char(), Some('('));
2536        debug_assert_eq!(self.second_char(), Some('('));
2537
2538        Self::push_capture_char(content, '(');
2539        self.advance();
2540        Self::push_capture_char(content, '(');
2541        self.advance();
2542
2543        let mut depth = 2;
2544        while let Some(c) = self.peek_char() {
2545            match c {
2546                '\\' => {
2547                    Self::push_capture_char(content, c);
2548                    self.advance();
2549                    if let Some(next) = self.peek_char() {
2550                        Self::push_capture_char(content, next);
2551                        self.advance();
2552                    }
2553                }
2554                '\'' => {
2555                    Self::push_capture_char(content, c);
2556                    self.advance();
2557                    while let Some(quoted) = self.peek_char() {
2558                        Self::push_capture_char(content, quoted);
2559                        self.advance();
2560                        if quoted == '\'' {
2561                            break;
2562                        }
2563                    }
2564                }
2565                '"' => {
2566                    let mut escaped = false;
2567                    Self::push_capture_char(content, c);
2568                    self.advance();
2569                    while let Some(quoted) = self.peek_char() {
2570                        Self::push_capture_char(content, quoted);
2571                        self.advance();
2572                        if escaped {
2573                            escaped = false;
2574                            continue;
2575                        }
2576                        match quoted {
2577                            '\\' => escaped = true,
2578                            '"' => break,
2579                            _ => {}
2580                        }
2581                    }
2582                }
2583                '`' => {
2584                    let mut escaped = false;
2585                    Self::push_capture_char(content, c);
2586                    self.advance();
2587                    while let Some(quoted) = self.peek_char() {
2588                        Self::push_capture_char(content, quoted);
2589                        self.advance();
2590                        if escaped {
2591                            escaped = false;
2592                            continue;
2593                        }
2594                        match quoted {
2595                            '\\' => escaped = true,
2596                            '`' => break,
2597                            _ => {}
2598                        }
2599                    }
2600                }
2601                '(' => {
2602                    Self::push_capture_char(content, c);
2603                    self.advance();
2604                    depth += 1;
2605                }
2606                ')' => {
2607                    Self::push_capture_char(content, c);
2608                    self.advance();
2609                    depth -= 1;
2610                    if depth == 0 {
2611                        return true;
2612                    }
2613                }
2614                _ => {
2615                    Self::push_capture_char(content, c);
2616                    self.advance();
2617                }
2618            }
2619        }
2620
2621        false
2622    }
2623
2624    fn read_legacy_arithmetic_into(
2625        &mut self,
2626        content: &mut Option<String>,
2627        segment_start: Position,
2628    ) -> bool {
2629        let mut bracket_depth = 1;
2630
2631        while let Some(c) = self.peek_char() {
2632            match c {
2633                '\\' => {
2634                    Self::push_capture_char(content, c);
2635                    self.advance();
2636                    if let Some(next) = self.peek_char() {
2637                        Self::push_capture_char(content, next);
2638                        self.advance();
2639                    }
2640                }
2641                '\'' => {
2642                    Self::push_capture_char(content, c);
2643                    self.advance();
2644                    while let Some(quoted) = self.peek_char() {
2645                        Self::push_capture_char(content, quoted);
2646                        self.advance();
2647                        if quoted == '\'' {
2648                            break;
2649                        }
2650                    }
2651                }
2652                '"' => {
2653                    let mut escaped = false;
2654                    Self::push_capture_char(content, c);
2655                    self.advance();
2656                    while let Some(quoted) = self.peek_char() {
2657                        Self::push_capture_char(content, quoted);
2658                        self.advance();
2659                        if escaped {
2660                            escaped = false;
2661                            continue;
2662                        }
2663                        match quoted {
2664                            '\\' => escaped = true,
2665                            '"' => break,
2666                            _ => {}
2667                        }
2668                    }
2669                }
2670                '`' => {
2671                    let mut escaped = false;
2672                    Self::push_capture_char(content, c);
2673                    self.advance();
2674                    while let Some(quoted) = self.peek_char() {
2675                        Self::push_capture_char(content, quoted);
2676                        self.advance();
2677                        if escaped {
2678                            escaped = false;
2679                            continue;
2680                        }
2681                        match quoted {
2682                            '\\' => escaped = true,
2683                            '`' => break,
2684                            _ => {}
2685                        }
2686                    }
2687                }
2688                '[' => {
2689                    Self::push_capture_char(content, c);
2690                    self.advance();
2691                    bracket_depth += 1;
2692                }
2693                ']' => {
2694                    Self::push_capture_char(content, c);
2695                    self.advance();
2696                    bracket_depth -= 1;
2697                    if bracket_depth == 0 {
2698                        return true;
2699                    }
2700                }
2701                '$' => {
2702                    Self::push_capture_char(content, c);
2703                    self.advance();
2704                    if self.peek_char() == Some('(') {
2705                        if self.second_char() == Some('(') {
2706                            if !self.read_arithmetic_expansion_into(content) {
2707                                return false;
2708                            }
2709                        } else {
2710                            Self::push_capture_char(content, '(');
2711                            self.advance();
2712                            if !self.read_command_subst_into(content) {
2713                                return false;
2714                            }
2715                        }
2716                    } else if self.peek_char() == Some('{') {
2717                        Self::push_capture_char(content, '{');
2718                        self.advance();
2719                        if !self.read_param_expansion_into(content, segment_start) {
2720                            return false;
2721                        }
2722                    } else if self.peek_char() == Some('[') {
2723                        Self::push_capture_char(content, '[');
2724                        self.advance();
2725                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2726                            return false;
2727                        }
2728                    }
2729                }
2730                _ => {
2731                    Self::push_capture_char(content, c);
2732                    self.advance();
2733                }
2734            }
2735        }
2736
2737        false
2738    }
2739
2740    /// Read command substitution content after `$(`, handling nested parens and quotes.
2741    /// Appends chars to `content` and adds the closing `)`.
2742    /// `subst_depth` tracks nesting to prevent stack overflow.
2743    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2744        self.read_command_subst_into_depth(content, 0)
2745    }
2746
2747    fn flush_command_subst_keyword(
2748        current_word: &mut String,
2749        pending_case_headers: &mut usize,
2750        case_clause_depths: &mut SmallVec<[usize; 4]>,
2751        depth: usize,
2752        word_started_at_command_start: &mut bool,
2753    ) {
2754        if current_word.is_empty() {
2755            *word_started_at_command_start = false;
2756            return;
2757        }
2758
2759        match current_word.as_str() {
2760            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2761            "in" if *pending_case_headers > 0 => {
2762                *pending_case_headers -= 1;
2763                case_clause_depths.push(depth);
2764            }
2765            "esac" if *word_started_at_command_start => {
2766                case_clause_depths.pop();
2767            }
2768            _ => {}
2769        }
2770
2771        current_word.clear();
2772        *word_started_at_command_start = false;
2773    }
2774
2775    fn read_command_subst_heredoc_delimiter_into(
2776        &mut self,
2777        content: &mut Option<String>,
2778    ) -> Option<String> {
2779        while let Some(ch) = self.peek_char() {
2780            if !matches!(ch, ' ' | '\t') {
2781                break;
2782            }
2783            Self::push_capture_char(content, ch);
2784            self.advance();
2785        }
2786
2787        let mut cooked = String::new();
2788        let mut in_single = false;
2789        let mut in_double = false;
2790        let mut escaped = false;
2791        let mut saw_any = false;
2792
2793        while let Some(ch) = self.peek_char() {
2794            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2795                break;
2796            }
2797
2798            saw_any = true;
2799            Self::push_capture_char(content, ch);
2800            self.advance();
2801
2802            if escaped {
2803                cooked.push(ch);
2804                escaped = false;
2805                continue;
2806            }
2807
2808            match ch {
2809                '\\' if !in_single => escaped = true,
2810                '\'' if !in_double => in_single = !in_single,
2811                '"' if !in_single => in_double = !in_double,
2812                _ => cooked.push(ch),
2813            }
2814        }
2815
2816        saw_any.then_some(cooked)
2817    }
2818
2819    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2820        Self::push_capture_char(content, '`');
2821        self.advance();
2822        while let Some(ch) = self.peek_char() {
2823            Self::push_capture_char(content, ch);
2824            self.advance();
2825            if ch == '\\' {
2826                if let Some(esc) = self.peek_char() {
2827                    Self::push_capture_char(content, esc);
2828                    self.advance();
2829                }
2830                continue;
2831            }
2832            if ch == '`' {
2833                break;
2834            }
2835        }
2836    }
2837
2838    fn read_command_subst_pending_heredoc_into(
2839        &mut self,
2840        content: &mut Option<String>,
2841        delimiter: &str,
2842        strip_tabs: bool,
2843    ) -> bool {
2844        loop {
2845            let mut line = String::new();
2846            let mut saw_newline = false;
2847
2848            while let Some(ch) = self.peek_char() {
2849                self.advance();
2850                if ch == '\n' {
2851                    saw_newline = true;
2852                    break;
2853                }
2854                line.push(ch);
2855            }
2856
2857            Self::push_capture_str(content, &line);
2858            if saw_newline {
2859                Self::push_capture_char(content, '\n');
2860            }
2861
2862            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2863                return true;
2864            }
2865        }
2866    }
2867
2868    fn read_command_subst_into_depth(
2869        &mut self,
2870        content: &mut Option<String>,
2871        subst_depth: usize,
2872    ) -> bool {
2873        if subst_depth >= self.max_subst_depth {
2874            // Depth limit exceeded — consume until matching ')' and emit error token
2875            let mut depth = 1;
2876            while let Some(c) = self.peek_char() {
2877                self.advance();
2878                match c {
2879                    '(' => depth += 1,
2880                    ')' => {
2881                        depth -= 1;
2882                        if depth == 0 {
2883                            Self::push_capture_char(content, ')');
2884                            return true;
2885                        }
2886                    }
2887                    _ => {}
2888                }
2889            }
2890            return false;
2891        }
2892
2893        let mut depth = 1;
2894        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2895        let mut pending_case_headers = 0usize;
2896        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2897        let mut current_word = String::with_capacity(16);
2898        let mut at_command_start = true;
2899        let mut expecting_redirection_target = false;
2900        let mut current_word_started_at_command_start = false;
2901        while let Some(c) = self.peek_char() {
2902            match c {
2903                '#' if !self.should_treat_hash_as_word_char() => {
2904                    let had_word = !current_word.is_empty();
2905                    Self::flush_command_subst_keyword(
2906                        &mut current_word,
2907                        &mut pending_case_headers,
2908                        &mut case_clause_depths,
2909                        depth,
2910                        &mut current_word_started_at_command_start,
2911                    );
2912                    if had_word && expecting_redirection_target {
2913                        expecting_redirection_target = false;
2914                    }
2915                    Self::push_capture_char(content, '#');
2916                    self.advance();
2917                    while let Some(comment_ch) = self.peek_char() {
2918                        Self::push_capture_char(content, comment_ch);
2919                        self.advance();
2920                        if comment_ch == '\n' {
2921                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2922                                if !self.read_command_subst_pending_heredoc_into(
2923                                    content, &delimiter, strip_tabs,
2924                                ) {
2925                                    return false;
2926                                }
2927                            }
2928                            at_command_start = true;
2929                            expecting_redirection_target = false;
2930                            break;
2931                        }
2932                    }
2933                }
2934                '(' => {
2935                    Self::flush_command_subst_keyword(
2936                        &mut current_word,
2937                        &mut pending_case_headers,
2938                        &mut case_clause_depths,
2939                        depth,
2940                        &mut current_word_started_at_command_start,
2941                    );
2942                    depth += 1;
2943                    Self::push_capture_char(content, c);
2944                    self.advance();
2945                    at_command_start = true;
2946                    expecting_redirection_target = false;
2947                }
2948                ')' => {
2949                    Self::flush_command_subst_keyword(
2950                        &mut current_word,
2951                        &mut pending_case_headers,
2952                        &mut case_clause_depths,
2953                        depth,
2954                        &mut current_word_started_at_command_start,
2955                    );
2956                    if case_clause_depths
2957                        .last()
2958                        .is_some_and(|case_depth| *case_depth == depth)
2959                    {
2960                        Self::push_capture_char(content, ')');
2961                        self.advance();
2962                        at_command_start = true;
2963                        expecting_redirection_target = false;
2964                        continue;
2965                    }
2966                    depth -= 1;
2967                    self.advance();
2968                    if depth == 0 {
2969                        Self::push_capture_char(content, ')');
2970                        return true;
2971                    }
2972                    Self::push_capture_char(content, c);
2973                    at_command_start = false;
2974                    expecting_redirection_target = false;
2975                }
2976                '"' => {
2977                    let had_word = !current_word.is_empty();
2978                    Self::flush_command_subst_keyword(
2979                        &mut current_word,
2980                        &mut pending_case_headers,
2981                        &mut case_clause_depths,
2982                        depth,
2983                        &mut current_word_started_at_command_start,
2984                    );
2985                    if had_word && expecting_redirection_target {
2986                        expecting_redirection_target = false;
2987                    }
2988                    // Nested double-quoted string inside $()
2989                    Self::push_capture_char(content, '"');
2990                    self.advance();
2991                    while let Some(qc) = self.peek_char() {
2992                        match qc {
2993                            '"' => {
2994                                Self::push_capture_char(content, '"');
2995                                self.advance();
2996                                break;
2997                            }
2998                            '\\' => {
2999                                Self::push_capture_char(content, '\\');
3000                                self.advance();
3001                                if let Some(esc) = self.peek_char() {
3002                                    Self::push_capture_char(content, esc);
3003                                    self.advance();
3004                                }
3005                            }
3006                            '$' => {
3007                                Self::push_capture_char(content, '$');
3008                                self.advance();
3009                                if self.peek_char() == Some('(') {
3010                                    if self.second_char() == Some('(') {
3011                                        if !self.read_arithmetic_expansion_into(content) {
3012                                            return false;
3013                                        }
3014                                    } else {
3015                                        Self::push_capture_char(content, '(');
3016                                        self.advance();
3017                                        if !self
3018                                            .read_command_subst_into_depth(content, subst_depth + 1)
3019                                        {
3020                                            return false;
3021                                        }
3022                                    }
3023                                }
3024                            }
3025                            _ => {
3026                                Self::push_capture_char(content, qc);
3027                                self.advance();
3028                            }
3029                        }
3030                    }
3031                    if expecting_redirection_target {
3032                        expecting_redirection_target = false;
3033                    } else {
3034                        at_command_start = false;
3035                    }
3036                }
3037                '\'' => {
3038                    let had_word = !current_word.is_empty();
3039                    Self::flush_command_subst_keyword(
3040                        &mut current_word,
3041                        &mut pending_case_headers,
3042                        &mut case_clause_depths,
3043                        depth,
3044                        &mut current_word_started_at_command_start,
3045                    );
3046                    if had_word && expecting_redirection_target {
3047                        expecting_redirection_target = false;
3048                    }
3049                    // Single-quoted string inside $()
3050                    Self::push_capture_char(content, '\'');
3051                    self.advance();
3052                    while let Some(qc) = self.peek_char() {
3053                        Self::push_capture_char(content, qc);
3054                        self.advance();
3055                        if qc == '\'' {
3056                            break;
3057                        }
3058                    }
3059                    if expecting_redirection_target {
3060                        expecting_redirection_target = false;
3061                    } else {
3062                        at_command_start = false;
3063                    }
3064                }
3065                '`' => {
3066                    let had_word = !current_word.is_empty();
3067                    Self::flush_command_subst_keyword(
3068                        &mut current_word,
3069                        &mut pending_case_headers,
3070                        &mut case_clause_depths,
3071                        depth,
3072                        &mut current_word_started_at_command_start,
3073                    );
3074                    if had_word && expecting_redirection_target {
3075                        expecting_redirection_target = false;
3076                    }
3077                    self.read_command_subst_backtick_segment_into(content);
3078                    if expecting_redirection_target {
3079                        expecting_redirection_target = false;
3080                    } else {
3081                        at_command_start = false;
3082                    }
3083                }
3084                '$' if self.second_char() == Some('\'') => {
3085                    let had_word = !current_word.is_empty();
3086                    Self::flush_command_subst_keyword(
3087                        &mut current_word,
3088                        &mut pending_case_headers,
3089                        &mut case_clause_depths,
3090                        depth,
3091                        &mut current_word_started_at_command_start,
3092                    );
3093                    if had_word && expecting_redirection_target {
3094                        expecting_redirection_target = false;
3095                    }
3096                    Self::push_capture_char(content, '$');
3097                    self.advance();
3098                    Self::push_capture_char(content, '\'');
3099                    self.advance();
3100                    while let Some(qc) = self.peek_char() {
3101                        Self::push_capture_char(content, qc);
3102                        self.advance();
3103                        if qc == '\\' {
3104                            if let Some(esc) = self.peek_char() {
3105                                Self::push_capture_char(content, esc);
3106                                self.advance();
3107                            }
3108                            continue;
3109                        }
3110                        if qc == '\'' {
3111                            break;
3112                        }
3113                    }
3114                    if expecting_redirection_target {
3115                        expecting_redirection_target = false;
3116                    } else {
3117                        at_command_start = false;
3118                    }
3119                }
3120                '\\' => {
3121                    let had_word = !current_word.is_empty();
3122                    Self::flush_command_subst_keyword(
3123                        &mut current_word,
3124                        &mut pending_case_headers,
3125                        &mut case_clause_depths,
3126                        depth,
3127                        &mut current_word_started_at_command_start,
3128                    );
3129                    if had_word && expecting_redirection_target {
3130                        expecting_redirection_target = false;
3131                    }
3132                    Self::push_capture_char(content, '\\');
3133                    self.advance();
3134                    if let Some(esc) = self.peek_char() {
3135                        Self::push_capture_char(content, esc);
3136                        self.advance();
3137                    }
3138                    if expecting_redirection_target {
3139                        expecting_redirection_target = false;
3140                    } else {
3141                        at_command_start = false;
3142                    }
3143                }
3144                '<' if self.second_char() == Some('<') => {
3145                    let word_was_redirection_fd = current_word_started_at_command_start
3146                        && !current_word.is_empty()
3147                        && current_word.chars().all(|current| current.is_ascii_digit());
3148                    Self::flush_command_subst_keyword(
3149                        &mut current_word,
3150                        &mut pending_case_headers,
3151                        &mut case_clause_depths,
3152                        depth,
3153                        &mut current_word_started_at_command_start,
3154                    );
3155                    if word_was_redirection_fd {
3156                        at_command_start = true;
3157                    }
3158
3159                    Self::push_capture_char(content, '<');
3160                    self.advance();
3161                    Self::push_capture_char(content, '<');
3162                    self.advance();
3163
3164                    if self.peek_char() == Some('<') {
3165                        Self::push_capture_char(content, '<');
3166                        self.advance();
3167                        expecting_redirection_target = true;
3168                        continue;
3169                    }
3170
3171                    let strip_tabs = if self.peek_char() == Some('-') {
3172                        Self::push_capture_char(content, '-');
3173                        self.advance();
3174                        true
3175                    } else {
3176                        false
3177                    };
3178
3179                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3180                    {
3181                        pending_heredocs.push((delimiter, strip_tabs));
3182                        expecting_redirection_target = false;
3183                    } else {
3184                        expecting_redirection_target = true;
3185                    }
3186                }
3187                '>' | '<' => {
3188                    let word_was_redirection_fd = current_word_started_at_command_start
3189                        && !current_word.is_empty()
3190                        && current_word.chars().all(|current| current.is_ascii_digit());
3191                    Self::flush_command_subst_keyword(
3192                        &mut current_word,
3193                        &mut pending_case_headers,
3194                        &mut case_clause_depths,
3195                        depth,
3196                        &mut current_word_started_at_command_start,
3197                    );
3198                    if word_was_redirection_fd {
3199                        at_command_start = true;
3200                    }
3201                    Self::push_capture_char(content, c);
3202                    self.advance();
3203                    expecting_redirection_target = true;
3204                }
3205                '\n' => {
3206                    Self::flush_command_subst_keyword(
3207                        &mut current_word,
3208                        &mut pending_case_headers,
3209                        &mut case_clause_depths,
3210                        depth,
3211                        &mut current_word_started_at_command_start,
3212                    );
3213                    Self::push_capture_char(content, '\n');
3214                    self.advance();
3215                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3216                        if !self.read_command_subst_pending_heredoc_into(
3217                            content, &delimiter, strip_tabs,
3218                        ) {
3219                            return false;
3220                        }
3221                    }
3222                    at_command_start = true;
3223                    expecting_redirection_target = false;
3224                }
3225                _ => {
3226                    if c.is_ascii_alphanumeric() || c == '_' {
3227                        if current_word.is_empty()
3228                            && !expecting_redirection_target
3229                            && at_command_start
3230                        {
3231                            current_word_started_at_command_start = true;
3232                            at_command_start = false;
3233                        }
3234                        current_word.push(c);
3235                    } else {
3236                        let had_word = !current_word.is_empty();
3237                        Self::flush_command_subst_keyword(
3238                            &mut current_word,
3239                            &mut pending_case_headers,
3240                            &mut case_clause_depths,
3241                            depth,
3242                            &mut current_word_started_at_command_start,
3243                        );
3244                        if had_word && expecting_redirection_target {
3245                            expecting_redirection_target = false;
3246                        }
3247                        match c {
3248                            ' ' | '\t' => {}
3249                            ';' | '|' | '&' => {
3250                                at_command_start = true;
3251                                expecting_redirection_target = false;
3252                            }
3253                            _ => {
3254                                if !expecting_redirection_target {
3255                                    at_command_start = false;
3256                                }
3257                            }
3258                        }
3259                    }
3260                    Self::push_capture_char(content, c);
3261                    self.advance();
3262                }
3263            }
3264        }
3265
3266        false
3267    }
3268
3269    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3270    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3271    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3272    fn read_param_expansion_into(
3273        &mut self,
3274        content: &mut Option<String>,
3275        segment_start: Position,
3276    ) -> bool {
3277        let mut borrowable = true;
3278        let mut depth = 1;
3279        let mut literal_brace_depth = 0usize;
3280        let mut in_single = false;
3281        let mut in_double = false;
3282        let mut double_quote_depth = 0usize;
3283        while let Some(c) = self.peek_char() {
3284            if in_single {
3285                match c {
3286                    '\\' => {
3287                        let escape_start = self.current_position();
3288                        if self.second_char() == Some('"') {
3289                            self.advance();
3290                            borrowable = false;
3291                            self.ensure_capture_from_source(content, segment_start, escape_start);
3292                            Self::push_capture_char(content, '"');
3293                            self.advance();
3294                        } else {
3295                            Self::push_capture_char(content, '\\');
3296                            self.advance();
3297                        }
3298                    }
3299                    '\'' => {
3300                        Self::push_capture_char(content, c);
3301                        self.advance();
3302                        in_single = false;
3303                    }
3304                    _ => {
3305                        Self::push_capture_char(content, c);
3306                        self.advance();
3307                    }
3308                }
3309                continue;
3310            }
3311
3312            match c {
3313                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3314                    self.advance();
3315                    Self::push_capture_char(content, '}');
3316                    if depth == 1
3317                        && literal_brace_depth > 0
3318                        && self.has_later_top_level_param_expansion_closer(depth)
3319                    {
3320                        literal_brace_depth -= 1;
3321                        continue;
3322                    }
3323                    depth -= 1;
3324                    if depth == 0 {
3325                        break;
3326                    }
3327                }
3328                '{' if !in_single && !in_double => {
3329                    literal_brace_depth += 1;
3330                    Self::push_capture_char(content, '{');
3331                    self.advance();
3332                }
3333                '"' => {
3334                    // Quotes inside ${...} are part of the expansion, not string delimiters
3335                    Self::push_capture_char(content, '"');
3336                    self.advance();
3337                    in_double = !in_double;
3338                    double_quote_depth = if in_double { depth } else { 0 };
3339                }
3340                '\'' => {
3341                    Self::push_capture_char(content, '\'');
3342                    self.advance();
3343                    if !in_double {
3344                        in_single = true;
3345                    }
3346                }
3347                '\\' => {
3348                    // Inside ${...} within double quotes, same escape rules apply:
3349                    // \", \\, \$, \` produce the escaped char; others keep backslash
3350                    let escape_start = self.current_position();
3351                    self.advance();
3352                    if let Some(esc) = self.peek_char() {
3353                        match esc {
3354                            '$' => {
3355                                borrowable = false;
3356                                self.ensure_capture_from_source(
3357                                    content,
3358                                    segment_start,
3359                                    escape_start,
3360                                );
3361                                Self::push_capture_char(content, '\x00');
3362                                Self::push_capture_char(content, '$');
3363                                self.advance();
3364                            }
3365                            '"' | '\\' | '`' => {
3366                                borrowable = false;
3367                                self.ensure_capture_from_source(
3368                                    content,
3369                                    segment_start,
3370                                    escape_start,
3371                                );
3372                                Self::push_capture_char(content, esc);
3373                                self.advance();
3374                            }
3375                            '}' => {
3376                                // \} should be a literal } without closing the expansion
3377                                Self::push_capture_char(content, '\\');
3378                                Self::push_capture_char(content, '}');
3379                                self.advance();
3380                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3381                            }
3382                            _ => {
3383                                Self::push_capture_char(content, '\\');
3384                                Self::push_capture_char(content, esc);
3385                                self.advance();
3386                            }
3387                        }
3388                    } else {
3389                        Self::push_capture_char(content, '\\');
3390                    }
3391                }
3392                '$' => {
3393                    Self::push_capture_char(content, '$');
3394                    self.advance();
3395                    if self.peek_char() == Some('(') {
3396                        if self.second_char() == Some('(') {
3397                            if !self.read_arithmetic_expansion_into(content) {
3398                                borrowable = false;
3399                            }
3400                        } else {
3401                            Self::push_capture_char(content, '(');
3402                            self.advance();
3403                            self.read_command_subst_into(content);
3404                        }
3405                    } else if self.peek_char() == Some('{') {
3406                        Self::push_capture_char(content, '{');
3407                        self.advance();
3408                        borrowable &= self.read_param_expansion_into(content, segment_start);
3409                    }
3410                }
3411                _ => {
3412                    Self::push_capture_char(content, c);
3413                    self.advance();
3414                }
3415            }
3416        }
3417        borrowable
3418    }
3419
3420    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3421        let mut chars = self.lookahead_chars().peekable();
3422        let mut depth = target_depth;
3423        let mut in_single = false;
3424        let mut in_double = false;
3425        let mut double_quote_depth = 0usize;
3426
3427        while let Some(ch) = chars.next() {
3428            if in_single {
3429                match ch {
3430                    '\'' => in_single = false,
3431                    '\\' if chars.peek() == Some(&'"') => {
3432                        chars.next();
3433                    }
3434                    '\\' => {}
3435                    _ => {}
3436                }
3437                continue;
3438            }
3439
3440            if in_double {
3441                match ch {
3442                    '"' => {
3443                        in_double = false;
3444                        double_quote_depth = 0;
3445                    }
3446                    '\\' => {
3447                        chars.next();
3448                    }
3449                    '$' if chars.peek() == Some(&'{') => {
3450                        chars.next();
3451                        depth += 1;
3452                    }
3453                    '}' if depth > double_quote_depth => {
3454                        depth -= 1;
3455                    }
3456                    _ => {}
3457                }
3458                continue;
3459            }
3460
3461            match ch {
3462                '\n' if depth == target_depth => return false,
3463                '\'' => in_single = true,
3464                '"' => {
3465                    in_double = true;
3466                    double_quote_depth = depth;
3467                }
3468                '\\' => {
3469                    chars.next();
3470                }
3471                '$' if chars.peek() == Some(&'{') => {
3472                    chars.next();
3473                    depth += 1;
3474                }
3475                '}' => {
3476                    if depth == target_depth {
3477                        return true;
3478                    }
3479                    depth -= 1;
3480                }
3481                _ => {}
3482            }
3483        }
3484
3485        false
3486    }
3487
3488    /// Check if the content starting with { looks like a brace expansion
3489    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3490    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3491    /// Caps lookahead to prevent O(n^2) scanning when input
3492    /// contains many unmatched `{` characters (issue #997).
3493    fn looks_like_brace_expansion(&mut self) -> bool {
3494        const MAX_LOOKAHEAD: usize = 10_000;
3495        let brace_ccl_enabled = self.brace_ccl_enabled();
3496
3497        let mut chars = self.lookahead_chars();
3498
3499        // Skip the opening {
3500        if chars.next() != Some('{') {
3501            return false;
3502        }
3503
3504        let mut depth = 1;
3505        let mut paren_depth = 0usize;
3506        let mut has_comma = false;
3507        let mut has_dot_dot = false;
3508        let mut escaped = false;
3509        let mut in_single = false;
3510        let mut in_double = false;
3511        let mut in_backtick = false;
3512        let mut prev_char = None;
3513        let mut scanned = 0usize;
3514
3515        for ch in chars {
3516            scanned += 1;
3517            if scanned > MAX_LOOKAHEAD {
3518                return false;
3519            }
3520
3521            let brace_surface_active = !in_single && !in_double && !in_backtick;
3522            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3523
3524            match ch {
3525                _ if escaped => {
3526                    escaped = false;
3527                }
3528                '\\' if !in_single => escaped = true,
3529                '\'' if !in_double && !in_backtick => in_single = !in_single,
3530                '"' if !in_single && !in_backtick => in_double = !in_double,
3531                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3532                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3533                    paren_depth += 1
3534                }
3535                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3536                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3537                '}' if !in_single && !in_double && !in_backtick => {
3538                    depth -= 1;
3539                    if depth == 0 {
3540                        // Found matching }, check if we have brace expansion markers
3541                        return has_comma || has_dot_dot || (brace_ccl_enabled && scanned > 1);
3542                    }
3543                }
3544                ',' if at_top_level => has_comma = true,
3545                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3546                // Brace groups have whitespace/newlines/semicolons at depth 1
3547                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3548                _ => {}
3549            }
3550            prev_char = Some(ch);
3551        }
3552
3553        false
3554    }
3555
3556    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3557        let mut brace_depth = 1usize;
3558        let mut paren_depth = 0usize;
3559        let mut escaped = false;
3560        let mut in_single = false;
3561        let mut in_double = false;
3562        let mut in_backtick = false;
3563        let mut prev_char = None;
3564
3565        while let Some(ch) = self.peek_char() {
3566            Self::push_capture_char(word, ch);
3567            self.advance();
3568
3569            if escaped {
3570                escaped = false;
3571                prev_char = Some(ch);
3572                continue;
3573            }
3574
3575            match ch {
3576                '\\' if !in_single => escaped = true,
3577                '\'' if !in_double && !in_backtick => in_single = !in_single,
3578                '"' if !in_single && !in_backtick => in_double = !in_double,
3579                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3580                '(' if !in_single
3581                    && !in_double
3582                    && !in_backtick
3583                    && (paren_depth > 0 || prev_char == Some('$')) =>
3584                {
3585                    paren_depth += 1
3586                }
3587                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3588                    paren_depth -= 1
3589                }
3590                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3591                '}' if !in_single && !in_double && !in_backtick => {
3592                    brace_depth -= 1;
3593                    if brace_depth == 0 {
3594                        break;
3595                    }
3596                }
3597                _ => {}
3598            }
3599
3600            prev_char = Some(ch);
3601        }
3602    }
3603
3604    fn consume_brace_word_body(&mut self, word: &mut String) {
3605        let mut brace_depth = 1usize;
3606        let mut paren_depth = 0usize;
3607        let mut escaped = false;
3608        let mut in_single = false;
3609        let mut in_double = false;
3610        let mut in_backtick = false;
3611        let mut prev_char = None;
3612
3613        while let Some(ch) = self.peek_char() {
3614            word.push(ch);
3615            self.advance();
3616
3617            if escaped {
3618                escaped = false;
3619                prev_char = Some(ch);
3620                continue;
3621            }
3622
3623            match ch {
3624                '\\' if !in_single => escaped = true,
3625                '\'' if !in_double && !in_backtick => in_single = !in_single,
3626                '"' if !in_single && !in_backtick => in_double = !in_double,
3627                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3628                '(' if !in_single
3629                    && !in_double
3630                    && !in_backtick
3631                    && (paren_depth > 0 || prev_char == Some('$')) =>
3632                {
3633                    paren_depth += 1
3634                }
3635                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3636                    paren_depth -= 1
3637                }
3638                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3639                '}' if !in_single && !in_double && !in_backtick => {
3640                    brace_depth -= 1;
3641                    if brace_depth == 0 {
3642                        break;
3643                    }
3644                }
3645                _ => {}
3646            }
3647
3648            prev_char = Some(ch);
3649        }
3650    }
3651
3652    /// Check whether a mid-word `{...}` segment can stay attached to the current
3653    /// word without crossing a top-level word boundary.
3654    fn looks_like_mid_word_brace_segment(&self) -> bool {
3655        const MAX_LOOKAHEAD: usize = 10_000;
3656
3657        let mut chars = self.lookahead_chars();
3658        if chars.next() != Some('{') {
3659            return false;
3660        }
3661
3662        let mut brace_depth = 1;
3663        let mut paren_depth = 0usize;
3664        let mut escaped = false;
3665        let mut in_single = false;
3666        let mut in_double = false;
3667        let mut in_backtick = false;
3668        let mut prev_char = None;
3669        let mut scanned = 0usize;
3670
3671        for ch in chars {
3672            scanned += 1;
3673            if scanned > MAX_LOOKAHEAD {
3674                return false;
3675            }
3676
3677            if !in_single
3678                && !in_double
3679                && !in_backtick
3680                && !escaped
3681                && brace_depth == 1
3682                && paren_depth == 0
3683                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3684            {
3685                return false;
3686            }
3687
3688            if escaped {
3689                escaped = false;
3690                prev_char = Some(ch);
3691                continue;
3692            }
3693
3694            match ch {
3695                '\\' => escaped = true,
3696                '\'' if !in_double && !in_backtick => in_single = !in_single,
3697                '"' if !in_single && !in_backtick => in_double = !in_double,
3698                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3699                '(' if !in_single
3700                    && !in_double
3701                    && !in_backtick
3702                    && (paren_depth > 0 || prev_char == Some('$')) =>
3703                {
3704                    paren_depth += 1
3705                }
3706                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3707                    paren_depth -= 1
3708                }
3709                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3710                '}' if !in_single && !in_double && !in_backtick => {
3711                    brace_depth -= 1;
3712                    if brace_depth == 0 {
3713                        return true;
3714                    }
3715                }
3716                _ => {}
3717            }
3718
3719            prev_char = Some(ch);
3720        }
3721
3722        false
3723    }
3724
3725    /// Check if { is followed by whitespace (brace group start)
3726    fn is_brace_group_start(&self) -> bool {
3727        let mut chars = self.lookahead_chars();
3728        // Skip the opening {
3729        if chars.next() != Some('{') {
3730            return false;
3731        }
3732        // If next char is whitespace or newline, it's a brace group
3733        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3734    }
3735
3736    /// Check whether the text after an escaped `{` looks like a brace-expansion
3737    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3738    fn escaped_brace_sequence_looks_like_brace_expansion(&mut self) -> bool {
3739        const MAX_LOOKAHEAD: usize = 10_000;
3740        let brace_ccl_enabled = self.brace_ccl_enabled();
3741
3742        let mut chars = self.lookahead_chars();
3743        let mut depth = 1;
3744        let mut has_comma = false;
3745        let mut has_dot_dot = false;
3746        let mut prev_char = None;
3747        let mut scanned = 0usize;
3748
3749        for ch in chars.by_ref() {
3750            scanned += 1;
3751            if scanned > MAX_LOOKAHEAD {
3752                return false;
3753            }
3754            match ch {
3755                '{' => depth += 1,
3756                '}' => {
3757                    depth -= 1;
3758                    if depth == 0 {
3759                        return has_comma || has_dot_dot || (brace_ccl_enabled && scanned > 1);
3760                    }
3761                }
3762                ',' if depth == 1 => has_comma = true,
3763                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3764                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3765                _ => {}
3766            }
3767            prev_char = Some(ch);
3768        }
3769
3770        false
3771    }
3772
3773    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3774        let mut chars = self.lookahead_chars();
3775        if chars.next() != Some('{') {
3776            return false;
3777        }
3778        chars.next() == Some(')')
3779    }
3780
3781    /// Read a {literal} pattern without comma/dot-dot as a word
3782    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3783        let mut word = String::with_capacity(16);
3784
3785        if let Some('{') = self.peek_char() {
3786            word.push('{');
3787            self.advance();
3788        } else {
3789            return None;
3790        }
3791
3792        self.consume_brace_word_body(&mut word);
3793
3794        while let Some(ch) = self.peek_char() {
3795            if Self::is_word_char(ch) {
3796                if self.reinject_buf.is_empty() {
3797                    let chunk = self.cursor.eat_while(Self::is_word_char);
3798                    word.push_str(chunk);
3799                    self.advance_scanned_source_bytes(chunk.len());
3800                } else {
3801                    word.push(ch);
3802                    self.advance();
3803                }
3804            } else {
3805                break;
3806            }
3807        }
3808
3809        Some(LexedToken::owned_word(TokenKind::Word, word))
3810    }
3811
3812    /// Read a brace expansion pattern as a word
3813    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3814        let mut word = String::with_capacity(16);
3815
3816        // Read the opening {
3817        if let Some('{') = self.peek_char() {
3818            word.push('{');
3819            self.advance();
3820        } else {
3821            return None;
3822        }
3823
3824        // Read until matching }
3825        self.consume_brace_word_body(&mut word);
3826
3827        // Continue reading any suffix after the brace pattern
3828        while let Some(ch) = self.peek_char() {
3829            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3830                if ch == '{' {
3831                    // Another brace pattern - include it
3832                    word.push(ch);
3833                    self.advance();
3834                    self.consume_brace_word_body(&mut word);
3835                } else {
3836                    word.push(ch);
3837                    self.advance();
3838                }
3839            } else {
3840                break;
3841            }
3842        }
3843
3844        Some(LexedToken::owned_word(TokenKind::Word, word))
3845    }
3846
3847    /// Peek ahead (without consuming) to see if `=(` starts an associative
3848    /// compound assignment like `([key]=val ...)`.  Returns true when the
3849    /// first non-whitespace char after `(` is `[`.
3850    fn looks_like_assoc_assign(&self) -> bool {
3851        let mut chars = self.lookahead_chars();
3852        // Skip the `(` we haven't consumed yet
3853        if chars.next() != Some('(') {
3854            return false;
3855        }
3856        // Skip optional whitespace
3857        for ch in chars {
3858            match ch {
3859                ' ' | '\t' => continue,
3860                '[' => return true,
3861                _ => return false,
3862            }
3863        }
3864        false
3865    }
3866
3867    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3868        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3869    }
3870
3871    fn looks_like_zsh_alternative_glob_suffix(&mut self, prefix: &str) -> bool {
3872        if self.current_zsh_options().is_none()
3873            || self.peek_char() != Some('(')
3874            || !prefix.ends_with('.')
3875        {
3876            return false;
3877        }
3878
3879        let mut chars = self.lookahead_chars();
3880        if chars.next() != Some('(') {
3881            return false;
3882        }
3883
3884        let mut depth = 1usize;
3885        let mut escaped = false;
3886        let mut saw_glob_marker = false;
3887
3888        for ch in chars {
3889            if escaped {
3890                escaped = false;
3891                continue;
3892            }
3893
3894            match ch {
3895                '\\' => escaped = true,
3896                '(' => depth += 1,
3897                ')' => {
3898                    depth = depth.saturating_sub(1);
3899                    if depth == 0 {
3900                        return saw_glob_marker;
3901                    }
3902                }
3903                '|' if depth == 1 => {
3904                    saw_glob_marker = true;
3905                }
3906                _ => {}
3907            }
3908        }
3909
3910        false
3911    }
3912
3913    fn looks_like_zsh_glob_modifier_suffix(&mut self, prefix: &str) -> bool {
3914        if self.current_zsh_options().is_none()
3915            || self.peek_char() != Some('(')
3916            || !prefix.contains('/')
3917        {
3918            return false;
3919        }
3920
3921        let mut chars = self.lookahead_chars();
3922        matches!((chars.next(), chars.next()), (Some('('), Some(':')))
3923    }
3924
3925    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3926        word.segments().any(|segment| {
3927            matches!(
3928                segment.kind(),
3929                LexedWordSegmentKind::SingleQuoted
3930                    | LexedWordSegmentKind::DollarSingleQuoted
3931                    | LexedWordSegmentKind::DoubleQuoted
3932                    | LexedWordSegmentKind::DollarDoubleQuoted
3933            )
3934        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3935    }
3936
3937    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3938        text.contains(['*', '?'])
3939            || text.ends_with('}') && text.contains("${")
3940            || text.ends_with(']')
3941                && text
3942                    .rfind('[')
3943                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3944    }
3945
3946    fn is_word_char(ch: char) -> bool {
3947        !matches!(
3948            ch,
3949            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3950        )
3951    }
3952
3953    const fn is_ascii_word_byte(byte: u8) -> bool {
3954        !matches!(
3955            byte,
3956            b' ' | b'\t'
3957                | b'\n'
3958                | b';'
3959                | b'|'
3960                | b'&'
3961                | b'>'
3962                | b'<'
3963                | b'('
3964                | b')'
3965                | b'{'
3966                | b'}'
3967                | b'\''
3968                | b'"'
3969        )
3970    }
3971
3972    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3973        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3974    }
3975
3976    fn is_plain_word_char(ch: char) -> bool {
3977        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3978    }
3979
3980    /// Read here document content until the delimiter line is found
3981    pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3982        let mut content = String::with_capacity(64);
3983        let mut current_line = String::with_capacity(64);
3984
3985        // Save rest of current line (after the delimiter token on the command line).
3986        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3987        // tokenize the pipe and subsequent command after the heredoc body.
3988        //
3989        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3990        // so we track quoting state and continue across newlines until quotes close.
3991        let mut rest_of_line = String::with_capacity(32);
3992        let rest_of_line_start = self.current_position();
3993        let mut in_double_quote = false;
3994        let mut in_single_quote = false;
3995        let mut in_comment = false;
3996        let mut saw_non_whitespace_tail = false;
3997        let mut consecutive_backslashes = 0usize;
3998        let mut previous_tail_char = None;
3999        while let Some(ch) = self.peek_char() {
4000            self.advance();
4001            if in_comment {
4002                if ch == '\n' {
4003                    break;
4004                }
4005                rest_of_line.push(ch);
4006                previous_tail_char = Some(ch);
4007                continue;
4008            }
4009            if ch == '#'
4010                && !in_single_quote
4011                && !in_double_quote
4012                && self.comments_enabled()
4013                && heredoc_tail_hash_starts_comment(previous_tail_char)
4014            {
4015                in_comment = true;
4016                rest_of_line.push(ch);
4017                previous_tail_char = Some(ch);
4018                consecutive_backslashes = 0;
4019                continue;
4020            }
4021            let backslash_continues_line = ch == '\\'
4022                && !in_single_quote
4023                && self.peek_char() == Some('\n')
4024                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
4025                && consecutive_backslashes.is_multiple_of(2);
4026            if backslash_continues_line {
4027                rest_of_line.push(ch);
4028                rest_of_line.push('\n');
4029                self.advance();
4030                consecutive_backslashes = 0;
4031                continue;
4032            }
4033            if ch == '\n' && !in_double_quote && !in_single_quote {
4034                break;
4035            }
4036            if ch == '"' && !in_single_quote {
4037                in_double_quote = !in_double_quote;
4038            } else if ch == '\'' && !in_double_quote {
4039                in_single_quote = !in_single_quote;
4040            } else if ch == '\\' && in_double_quote {
4041                // Escaped char inside double quotes — skip the next char too
4042                rest_of_line.push(ch);
4043                if let Some(next) = self.peek_char() {
4044                    rest_of_line.push(next);
4045                    self.advance();
4046                }
4047                continue;
4048            }
4049            rest_of_line.push(ch);
4050            if !ch.is_whitespace() {
4051                saw_non_whitespace_tail = true;
4052            }
4053            if ch == '\\' && !in_single_quote {
4054                consecutive_backslashes += 1;
4055            } else {
4056                consecutive_backslashes = 0;
4057            }
4058            previous_tail_char = Some(ch);
4059        }
4060
4061        // If we just drained a heredoc replay buffer (for example when multiple
4062        // heredocs share one command line), resume tracking from the true cursor
4063        // position before we measure the body span.
4064        self.sync_offset_to_cursor();
4065        let content_start = self.current_position();
4066        let mut current_line_start = content_start;
4067        let content_end;
4068
4069        // Read lines until we find the delimiter
4070        loop {
4071            if self.reinject_buf.is_empty() {
4072                // When the body reading drains a reinject buffer (from a
4073                // previous heredoc on the same command line), the virtual
4074                // offset drifts away from the cursor. Snap it back before
4075                // any source-based work so spans and `post_heredoc_offset`
4076                // stay within bounds.
4077                self.sync_offset_to_cursor();
4078                let rest = self.cursor.rest();
4079                if rest.is_empty() {
4080                    content_end = self.current_position();
4081                    break;
4082                }
4083
4084                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
4085                let line = &rest[..line_len];
4086                let has_newline = line_len < rest.len();
4087
4088                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
4089                    content_end = current_line_start;
4090                    self.consume_source_bytes(line_len);
4091                    if has_newline {
4092                        self.consume_ascii_chars(1);
4093                    }
4094                    break;
4095                }
4096
4097                content.push_str(line);
4098                self.consume_source_bytes(line_len);
4099
4100                if has_newline {
4101                    self.consume_ascii_chars(1);
4102                    content.push('\n');
4103                    current_line_start = self.current_position();
4104                    continue;
4105                }
4106
4107                content_end = self.current_position();
4108                break;
4109            }
4110
4111            match self.peek_char() {
4112                Some('\n') => {
4113                    self.advance();
4114                    // Check if current line matches delimiter
4115                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4116                        content_end = current_line_start;
4117                        break;
4118                    }
4119                    content.push_str(&current_line);
4120                    content.push('\n');
4121                    current_line.clear();
4122                    current_line_start = self.current_position();
4123                }
4124                Some(ch) => {
4125                    current_line.push(ch);
4126                    self.advance();
4127                }
4128                None => {
4129                    // End of input - check last line
4130                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4131                        content_end = current_line_start;
4132                        break;
4133                    }
4134                    if !current_line.is_empty() {
4135                        content.push_str(&current_line);
4136                    }
4137                    content_end = self.current_position();
4138                    break;
4139                }
4140            }
4141        }
4142
4143        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4144        // redirects, command words, additional heredocs) stay visible to the
4145        // parser. Always replay a terminating newline so parsing stops before
4146        // tokens that originally lived on later source lines, like `}` or `do`.
4147        let post_heredoc_offset = self.offset;
4148        self.offset = rest_of_line_start.offset;
4149        for ch in rest_of_line.chars() {
4150            self.reinject_buf.push_back(ch);
4151        }
4152        self.reinject_buf.push_back('\n');
4153        self.reinject_resume_offset = Some(post_heredoc_offset);
4154
4155        HeredocRead {
4156            content,
4157            content_span: Span::from_positions(content_start, content_end),
4158        }
4159    }
4160
4161    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4162        let mut chars = self.cursor.rest().chars();
4163        if chars.next() != Some('\n') {
4164            return false;
4165        }
4166
4167        for ch in chars {
4168            if matches!(ch, ' ' | '\t') {
4169                continue;
4170            }
4171            if ch == '\n' {
4172                return false;
4173            }
4174            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4175                || (ch == '#' && self.comments_enabled());
4176        }
4177
4178        false
4179    }
4180}
4181
4182fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4183    let line = if strip_tabs {
4184        line.trim_start_matches('\t')
4185    } else {
4186        line
4187    };
4188
4189    if line == delimiter {
4190        return true;
4191    }
4192
4193    let Some(trailing) = line.strip_prefix(delimiter) else {
4194        return false;
4195    };
4196
4197    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4198}
4199
4200fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4201    previous_tail_char.is_none_or(|prev| {
4202        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4203    })
4204}
4205
4206fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4207    let ch = input.get(index..)?.chars().next()?;
4208    Some((ch, index + ch.len_utf8()))
4209}
4210
4211fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4212    let mut index = 0usize;
4213    let mut depth = 0usize;
4214    let mut in_single = false;
4215    let mut in_double = false;
4216    let mut in_backtick = false;
4217    let mut escaped = false;
4218
4219    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4220        let was_escaped = escaped;
4221        if ch == '\\' && !in_single {
4222            escaped = !escaped;
4223            index = next_index;
4224            continue;
4225        }
4226        escaped = false;
4227
4228        match ch {
4229            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4230            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4231            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4232            '(' if !in_single
4233                && !in_double
4234                && !in_backtick
4235                && !was_escaped
4236                && prefix[next_index..].starts_with('(') =>
4237            {
4238                depth += 1;
4239                index = next_index + '('.len_utf8();
4240                continue;
4241            }
4242            ')' if !in_single
4243                && !in_double
4244                && !in_backtick
4245                && !was_escaped
4246                && prefix[next_index..].starts_with(')') =>
4247            {
4248                depth = depth.saturating_sub(1);
4249                index = next_index + ')'.len_utf8();
4250                continue;
4251            }
4252            _ => {}
4253        }
4254
4255        index = next_index;
4256    }
4257
4258    depth > 0
4259}
4260
4261fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4262    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4263    let prefix = &input[line_start..index];
4264    line_has_unclosed_double_paren(prefix)
4265}
4266
4267fn hash_starts_comment(input: &str, index: usize) -> bool {
4268    if inside_unclosed_double_paren_on_line(input, index) {
4269        return false;
4270    }
4271
4272    let next = &input[index + '#'.len_utf8()..];
4273    input[..index]
4274        .chars()
4275        .next_back()
4276        .is_none_or(|prev| match prev {
4277            '(' => {
4278                let whitespace_index = next.find(char::is_whitespace);
4279                let close_index = next.find(')');
4280
4281                match (whitespace_index, close_index) {
4282                    (Some(whitespace), Some(close)) => whitespace < close,
4283                    (Some(_), None) | (None, None) => true,
4284                    (None, Some(_)) => false,
4285                }
4286            }
4287            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4288        })
4289}
4290
4291fn heredoc_delimiter_is_terminator(
4292    ch: char,
4293    in_single: bool,
4294    in_double: bool,
4295    escaped: bool,
4296) -> bool {
4297    !in_single
4298        && !in_double
4299        && !escaped
4300        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4301}
4302
4303fn scan_double_quoted_command_substitution_segment(
4304    input: &str,
4305    mut index: usize,
4306    subst_depth: usize,
4307) -> Option<usize> {
4308    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4309        match ch {
4310            '"' => return Some(next_index),
4311            '\\' => {
4312                index = next_index;
4313                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4314                    index = escaped_next;
4315                }
4316            }
4317            '$' if input[next_index..].starts_with('{') => {
4318                let consumed = scan_command_subst_parameter_expansion_len(
4319                    &input[next_index + '{'.len_utf8()..],
4320                    subst_depth,
4321                    0,
4322                )?;
4323                index = next_index + '{'.len_utf8() + consumed;
4324            }
4325            '$' if input[next_index..].starts_with('(')
4326                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4327            {
4328                let consumed = scan_command_substitution_body_len_inner(
4329                    &input[next_index + '('.len_utf8()..],
4330                    subst_depth + 1,
4331                )?;
4332                index = next_index + '('.len_utf8() + consumed;
4333            }
4334            _ => index = next_index,
4335        }
4336    }
4337
4338    None
4339}
4340
4341fn scan_command_subst_parameter_expansion_len(
4342    input: &str,
4343    subst_depth: usize,
4344    parameter_depth: usize,
4345) -> Option<usize> {
4346    if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4347        return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4348    }
4349
4350    let mut index = 0usize;
4351    let mut in_single = false;
4352    let mut in_double = false;
4353    let mut in_ansi_c_single = false;
4354    let mut in_backtick = false;
4355    let mut escaped = false;
4356    let mut ansi_c_quote_pending = false;
4357
4358    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4359        let was_escaped = escaped;
4360        if ch == '\\' && !in_single {
4361            escaped = !escaped;
4362            index = next_index;
4363            ansi_c_quote_pending = false;
4364            continue;
4365        }
4366        escaped = false;
4367
4368        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4369            if input[next_index..].starts_with('{')
4370                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4371                    &input[next_index + '{'.len_utf8()..],
4372                    subst_depth,
4373                    parameter_depth + 1,
4374                )
4375            {
4376                index = next_index + '{'.len_utf8() + consumed;
4377                ansi_c_quote_pending = false;
4378                continue;
4379            }
4380
4381            if input[next_index..].starts_with('(')
4382                && !input[next_index + '('.len_utf8()..].starts_with('(')
4383                && let Some(consumed) = scan_command_substitution_body_len_inner(
4384                    &input[next_index + '('.len_utf8()..],
4385                    subst_depth + 1,
4386                )
4387            {
4388                index = next_index + '('.len_utf8() + consumed;
4389                ansi_c_quote_pending = false;
4390                continue;
4391            }
4392        }
4393
4394        if !in_single
4395            && !in_ansi_c_single
4396            && !in_double
4397            && !in_backtick
4398            && !was_escaped
4399            && matches!(ch, '<' | '>')
4400            && input[next_index..].starts_with('(')
4401            && let Some(consumed) = scan_command_substitution_body_len_inner(
4402                &input[next_index + '('.len_utf8()..],
4403                subst_depth + 1,
4404            )
4405        {
4406            index = next_index + '('.len_utf8() + consumed;
4407            ansi_c_quote_pending = false;
4408            continue;
4409        }
4410
4411        match ch {
4412            '\'' if !in_double && !in_backtick && !was_escaped => {
4413                if in_ansi_c_single {
4414                    in_ansi_c_single = false;
4415                } else if !in_single && ansi_c_quote_pending {
4416                    in_ansi_c_single = true;
4417                } else {
4418                    in_single = !in_single;
4419                }
4420            }
4421            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4422                in_double = !in_double
4423            }
4424            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4425                in_backtick = !in_backtick
4426            }
4427            '}' if !in_single
4428                && !in_ansi_c_single
4429                && !in_double
4430                && !in_backtick
4431                && !was_escaped =>
4432            {
4433                return Some(next_index);
4434            }
4435            _ => {}
4436        }
4437
4438        ansi_c_quote_pending = ch == '$'
4439            && !in_single
4440            && !in_ansi_c_single
4441            && !in_double
4442            && !in_backtick
4443            && !was_escaped;
4444        index = next_index;
4445    }
4446
4447    None
4448}
4449
4450fn scan_command_subst_parameter_expansion_len_balanced(
4451    input: &str,
4452    subst_depth: usize,
4453) -> Option<usize> {
4454    let mut index = 0usize;
4455    let mut brace_depth = 1usize;
4456    let mut in_single = false;
4457    let mut in_double = false;
4458    let mut in_ansi_c_single = false;
4459    let mut in_backtick = false;
4460    let mut escaped = false;
4461    let mut ansi_c_quote_pending = false;
4462
4463    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4464        let was_escaped = escaped;
4465        if ch == '\\' && !in_single {
4466            escaped = !escaped;
4467            index = next_index;
4468            ansi_c_quote_pending = false;
4469            continue;
4470        }
4471        escaped = false;
4472
4473        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4474            if input[next_index..].starts_with('{') {
4475                brace_depth = brace_depth.saturating_add(1);
4476                index = next_index + '{'.len_utf8();
4477                ansi_c_quote_pending = false;
4478                continue;
4479            }
4480
4481            if input[next_index..].starts_with('(')
4482                && !input[next_index + '('.len_utf8()..].starts_with('(')
4483                && let Some(consumed) = scan_command_substitution_body_len_inner(
4484                    &input[next_index + '('.len_utf8()..],
4485                    subst_depth + 1,
4486                )
4487            {
4488                index = next_index + '('.len_utf8() + consumed;
4489                ansi_c_quote_pending = false;
4490                continue;
4491            }
4492        }
4493
4494        if !in_single
4495            && !in_ansi_c_single
4496            && !in_double
4497            && !in_backtick
4498            && !was_escaped
4499            && matches!(ch, '<' | '>')
4500            && input[next_index..].starts_with('(')
4501            && let Some(consumed) = scan_command_substitution_body_len_inner(
4502                &input[next_index + '('.len_utf8()..],
4503                subst_depth + 1,
4504            )
4505        {
4506            index = next_index + '('.len_utf8() + consumed;
4507            ansi_c_quote_pending = false;
4508            continue;
4509        }
4510
4511        match ch {
4512            '\'' if !in_double && !in_backtick && !was_escaped => {
4513                if in_ansi_c_single {
4514                    in_ansi_c_single = false;
4515                } else if !in_single && ansi_c_quote_pending {
4516                    in_ansi_c_single = true;
4517                } else {
4518                    in_single = !in_single;
4519                }
4520            }
4521            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4522                in_double = !in_double
4523            }
4524            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4525                in_backtick = !in_backtick
4526            }
4527            '}' if !in_single
4528                && !in_ansi_c_single
4529                && !in_double
4530                && !in_backtick
4531                && !was_escaped =>
4532            {
4533                brace_depth = brace_depth.saturating_sub(1);
4534                if brace_depth == 0 {
4535                    return Some(next_index);
4536                }
4537            }
4538            _ => {}
4539        }
4540
4541        ansi_c_quote_pending = ch == '$'
4542            && !in_single
4543            && !in_ansi_c_single
4544            && !in_double
4545            && !in_backtick
4546            && !was_escaped;
4547        index = next_index;
4548    }
4549
4550    None
4551}
4552
4553fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4554    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4555        if !matches!(ch, ' ' | '\t') {
4556            break;
4557        }
4558        index = next_index;
4559    }
4560
4561    let start = index;
4562    let mut cooked = String::new();
4563    let mut in_single = false;
4564    let mut in_double = false;
4565    let mut escaped = false;
4566
4567    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4568        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4569            break;
4570        }
4571
4572        index = next_index;
4573        if escaped {
4574            cooked.push(ch);
4575            escaped = false;
4576            continue;
4577        }
4578
4579        match ch {
4580            '\\' if !in_single => escaped = true,
4581            '\'' if !in_double => in_single = !in_single,
4582            '"' if !in_single => in_double = !in_double,
4583            _ => cooked.push(ch),
4584        }
4585    }
4586
4587    (index > start).then_some((index, cooked))
4588}
4589
4590fn skip_command_subst_pending_heredoc(
4591    input: &str,
4592    mut index: usize,
4593    delimiter: &str,
4594    strip_tabs: bool,
4595) -> usize {
4596    while index <= input.len() {
4597        let rest = &input[index..];
4598        let line_len = rest.find('\n').unwrap_or(rest.len());
4599        let line = &rest[..line_len];
4600        let has_newline = line_len < rest.len();
4601
4602        index += line_len;
4603        if has_newline {
4604            index += '\n'.len_utf8();
4605        }
4606
4607        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4608            return index;
4609        }
4610    }
4611
4612    index
4613}
4614
4615fn scan_command_subst_ansi_c_single_quoted_segment(
4616    input: &str,
4617    quote_index: usize,
4618) -> Option<usize> {
4619    let mut index = quote_index + '\''.len_utf8();
4620
4621    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4622        index = next_index;
4623        if ch == '\\' {
4624            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4625                index = escaped_next;
4626            }
4627            continue;
4628        }
4629
4630        if ch == '\'' {
4631            return Some(index);
4632        }
4633    }
4634
4635    None
4636}
4637
4638fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4639    let mut index = start;
4640
4641    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4642        index = next_index;
4643        if ch == '\\' {
4644            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4645                index = escaped_next;
4646            }
4647            continue;
4648        }
4649
4650        if ch == '`' {
4651            return Some(index);
4652        }
4653    }
4654
4655    None
4656}
4657
4658fn flush_scanned_command_subst_keyword(
4659    current_word: &mut String,
4660    pending_case_headers: &mut usize,
4661    case_clause_depths: &mut SmallVec<[usize; 4]>,
4662    depth: usize,
4663    word_started_at_command_start: &mut bool,
4664) {
4665    if current_word.is_empty() {
4666        *word_started_at_command_start = false;
4667        return;
4668    }
4669
4670    match current_word.as_str() {
4671        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4672        "in" if *pending_case_headers > 0 => {
4673            *pending_case_headers -= 1;
4674            case_clause_depths.push(depth);
4675        }
4676        "esac" if *word_started_at_command_start => {
4677            case_clause_depths.pop();
4678        }
4679        _ => {}
4680    }
4681
4682    current_word.clear();
4683    *word_started_at_command_start = false;
4684}
4685
4686pub(super) fn scan_command_substitution_body_len_inner(
4687    input: &str,
4688    subst_depth: usize,
4689) -> Option<usize> {
4690    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4691        return None;
4692    }
4693
4694    let mut index = 0usize;
4695    let mut depth = 1;
4696    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4697    let mut pending_case_headers = 0usize;
4698    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4699    let mut current_word = String::with_capacity(16);
4700    let mut at_command_start = true;
4701    let mut expecting_redirection_target = false;
4702    let mut current_word_started_at_command_start = false;
4703
4704    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4705        match ch {
4706            '#' if hash_starts_comment(input, index) => {
4707                let had_word = !current_word.is_empty();
4708                flush_scanned_command_subst_keyword(
4709                    &mut current_word,
4710                    &mut pending_case_headers,
4711                    &mut case_clause_depths,
4712                    depth,
4713                    &mut current_word_started_at_command_start,
4714                );
4715                if had_word && expecting_redirection_target {
4716                    expecting_redirection_target = false;
4717                }
4718                index = next_index;
4719                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4720                    index = comment_next;
4721                    if comment_ch == '\n' {
4722                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4723                            index = skip_command_subst_pending_heredoc(
4724                                input, index, &delimiter, strip_tabs,
4725                            );
4726                        }
4727                        at_command_start = true;
4728                        expecting_redirection_target = false;
4729                        break;
4730                    }
4731                }
4732            }
4733            '(' => {
4734                flush_scanned_command_subst_keyword(
4735                    &mut current_word,
4736                    &mut pending_case_headers,
4737                    &mut case_clause_depths,
4738                    depth,
4739                    &mut current_word_started_at_command_start,
4740                );
4741                depth += 1;
4742                index = next_index;
4743                at_command_start = true;
4744                expecting_redirection_target = false;
4745            }
4746            ')' => {
4747                flush_scanned_command_subst_keyword(
4748                    &mut current_word,
4749                    &mut pending_case_headers,
4750                    &mut case_clause_depths,
4751                    depth,
4752                    &mut current_word_started_at_command_start,
4753                );
4754                if case_clause_depths
4755                    .last()
4756                    .is_some_and(|case_depth| *case_depth == depth)
4757                {
4758                    index = next_index;
4759                    at_command_start = true;
4760                    expecting_redirection_target = false;
4761                    continue;
4762                }
4763                depth -= 1;
4764                index = next_index;
4765                if depth == 0 {
4766                    return Some(index);
4767                }
4768                at_command_start = false;
4769                expecting_redirection_target = false;
4770            }
4771            '"' => {
4772                let had_word = !current_word.is_empty();
4773                flush_scanned_command_subst_keyword(
4774                    &mut current_word,
4775                    &mut pending_case_headers,
4776                    &mut case_clause_depths,
4777                    depth,
4778                    &mut current_word_started_at_command_start,
4779                );
4780                if had_word && expecting_redirection_target {
4781                    expecting_redirection_target = false;
4782                }
4783                index = scan_double_quoted_command_substitution_segment(
4784                    input,
4785                    next_index,
4786                    subst_depth,
4787                )?;
4788                if expecting_redirection_target {
4789                    expecting_redirection_target = false;
4790                } else {
4791                    at_command_start = false;
4792                }
4793            }
4794            '\'' => {
4795                let had_word = !current_word.is_empty();
4796                flush_scanned_command_subst_keyword(
4797                    &mut current_word,
4798                    &mut pending_case_headers,
4799                    &mut case_clause_depths,
4800                    depth,
4801                    &mut current_word_started_at_command_start,
4802                );
4803                if had_word && expecting_redirection_target {
4804                    expecting_redirection_target = false;
4805                }
4806                index = next_index;
4807                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4808                    index = quoted_next;
4809                    if quoted_ch == '\'' {
4810                        break;
4811                    }
4812                }
4813                if expecting_redirection_target {
4814                    expecting_redirection_target = false;
4815                } else {
4816                    at_command_start = false;
4817                }
4818            }
4819            '`' => {
4820                let had_word = !current_word.is_empty();
4821                flush_scanned_command_subst_keyword(
4822                    &mut current_word,
4823                    &mut pending_case_headers,
4824                    &mut case_clause_depths,
4825                    depth,
4826                    &mut current_word_started_at_command_start,
4827                );
4828                if had_word && expecting_redirection_target {
4829                    expecting_redirection_target = false;
4830                }
4831                index = scan_command_subst_backtick_segment(input, next_index)?;
4832                if expecting_redirection_target {
4833                    expecting_redirection_target = false;
4834                } else {
4835                    at_command_start = false;
4836                }
4837            }
4838            '$' if input[next_index..].starts_with('\'') => {
4839                let had_word = !current_word.is_empty();
4840                flush_scanned_command_subst_keyword(
4841                    &mut current_word,
4842                    &mut pending_case_headers,
4843                    &mut case_clause_depths,
4844                    depth,
4845                    &mut current_word_started_at_command_start,
4846                );
4847                if had_word && expecting_redirection_target {
4848                    expecting_redirection_target = false;
4849                }
4850                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4851                if expecting_redirection_target {
4852                    expecting_redirection_target = false;
4853                } else {
4854                    at_command_start = false;
4855                }
4856            }
4857            '\\' => {
4858                let had_word = !current_word.is_empty();
4859                flush_scanned_command_subst_keyword(
4860                    &mut current_word,
4861                    &mut pending_case_headers,
4862                    &mut case_clause_depths,
4863                    depth,
4864                    &mut current_word_started_at_command_start,
4865                );
4866                if had_word && expecting_redirection_target {
4867                    expecting_redirection_target = false;
4868                }
4869                index = next_index;
4870                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4871                    index = escaped_next;
4872                }
4873                if expecting_redirection_target {
4874                    expecting_redirection_target = false;
4875                } else {
4876                    at_command_start = false;
4877                }
4878            }
4879            '>' => {
4880                let word_was_redirection_fd = current_word_started_at_command_start
4881                    && !current_word.is_empty()
4882                    && current_word.chars().all(|current| current.is_ascii_digit());
4883                flush_scanned_command_subst_keyword(
4884                    &mut current_word,
4885                    &mut pending_case_headers,
4886                    &mut case_clause_depths,
4887                    depth,
4888                    &mut current_word_started_at_command_start,
4889                );
4890                if word_was_redirection_fd {
4891                    at_command_start = true;
4892                }
4893                index = next_index;
4894                expecting_redirection_target = true;
4895            }
4896            '<' if input[next_index..].starts_with('<') => {
4897                let word_was_redirection_fd = current_word_started_at_command_start
4898                    && !current_word.is_empty()
4899                    && current_word.chars().all(|current| current.is_ascii_digit());
4900                let had_word = !current_word.is_empty();
4901                flush_scanned_command_subst_keyword(
4902                    &mut current_word,
4903                    &mut pending_case_headers,
4904                    &mut case_clause_depths,
4905                    depth,
4906                    &mut current_word_started_at_command_start,
4907                );
4908                if had_word && expecting_redirection_target {
4909                    expecting_redirection_target = false;
4910                }
4911                if word_was_redirection_fd {
4912                    at_command_start = true;
4913                }
4914                if inside_unclosed_double_paren_on_line(input, index) {
4915                    index = next_index + '<'.len_utf8();
4916                    continue;
4917                }
4918
4919                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4920                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4921                    expecting_redirection_target = true;
4922                    continue;
4923                }
4924
4925                let strip_tabs = input[next_index..].starts_with("<-");
4926                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4927                if let Some((delimiter_index, delimiter)) =
4928                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4929                {
4930                    pending_heredocs.push((delimiter, strip_tabs));
4931                    index = delimiter_index;
4932                    expecting_redirection_target = false;
4933                } else {
4934                    index = next_index;
4935                    expecting_redirection_target = true;
4936                }
4937            }
4938            '\n' => {
4939                flush_scanned_command_subst_keyword(
4940                    &mut current_word,
4941                    &mut pending_case_headers,
4942                    &mut case_clause_depths,
4943                    depth,
4944                    &mut current_word_started_at_command_start,
4945                );
4946                index = next_index;
4947                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4948                    index =
4949                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4950                }
4951                at_command_start = true;
4952                expecting_redirection_target = false;
4953            }
4954            '$' if input[next_index..].starts_with('{') => {
4955                let had_word = !current_word.is_empty();
4956                flush_scanned_command_subst_keyword(
4957                    &mut current_word,
4958                    &mut pending_case_headers,
4959                    &mut case_clause_depths,
4960                    depth,
4961                    &mut current_word_started_at_command_start,
4962                );
4963                if had_word && expecting_redirection_target {
4964                    expecting_redirection_target = false;
4965                }
4966                let consumed = scan_command_subst_parameter_expansion_len(
4967                    &input[next_index + '{'.len_utf8()..],
4968                    subst_depth,
4969                    0,
4970                )?;
4971                index = next_index + '{'.len_utf8() + consumed;
4972                if expecting_redirection_target {
4973                    expecting_redirection_target = false;
4974                } else {
4975                    at_command_start = false;
4976                }
4977            }
4978            '$' if input[next_index..].starts_with('(')
4979                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4980            {
4981                let had_word = !current_word.is_empty();
4982                flush_scanned_command_subst_keyword(
4983                    &mut current_word,
4984                    &mut pending_case_headers,
4985                    &mut case_clause_depths,
4986                    depth,
4987                    &mut current_word_started_at_command_start,
4988                );
4989                if had_word && expecting_redirection_target {
4990                    expecting_redirection_target = false;
4991                }
4992                let consumed = scan_command_substitution_body_len_inner(
4993                    &input[next_index + '('.len_utf8()..],
4994                    subst_depth + 1,
4995                )?;
4996                index = next_index + '('.len_utf8() + consumed;
4997                if expecting_redirection_target {
4998                    expecting_redirection_target = false;
4999                } else {
5000                    at_command_start = false;
5001                }
5002            }
5003            _ => {
5004                if ch.is_ascii_alphanumeric() || ch == '_' {
5005                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
5006                    {
5007                        current_word_started_at_command_start = true;
5008                        at_command_start = false;
5009                    }
5010                    current_word.push(ch);
5011                } else {
5012                    let had_word = !current_word.is_empty();
5013                    flush_scanned_command_subst_keyword(
5014                        &mut current_word,
5015                        &mut pending_case_headers,
5016                        &mut case_clause_depths,
5017                        depth,
5018                        &mut current_word_started_at_command_start,
5019                    );
5020                    if had_word && expecting_redirection_target {
5021                        expecting_redirection_target = false;
5022                    }
5023                    match ch {
5024                        ' ' | '\t' => {}
5025                        ';' | '|' | '&' => {
5026                            at_command_start = true;
5027                            expecting_redirection_target = false;
5028                        }
5029                        _ => {
5030                            if !expecting_redirection_target {
5031                                at_command_start = false;
5032                            }
5033                        }
5034                    }
5035                }
5036                index = next_index;
5037            }
5038        }
5039    }
5040
5041    None
5042}
5043
5044pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
5045    scan_command_substitution_body_len_inner(input, 0)
5046}
5047
5048#[cfg(test)]
5049mod tests {
5050    use super::*;
5051
5052    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
5053        match token.kind {
5054            kind if kind.is_word_like() => token.word_string(),
5055            TokenKind::Comment => token
5056                .span
5057                .slice(source)
5058                .strip_prefix('#')
5059                .map(str::to_string),
5060            TokenKind::Error => token
5061                .error_kind()
5062                .map(LexerErrorKind::message)
5063                .map(str::to_string),
5064            _ => None,
5065        }
5066    }
5067
5068    fn assert_next_token(
5069        lexer: &mut Lexer<'_>,
5070        expected_kind: TokenKind,
5071        expected_text: Option<&str>,
5072    ) {
5073        let token = lexer.next_lexed_token().unwrap();
5074        assert_eq!(token.kind, expected_kind);
5075        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
5076    }
5077
5078    fn assert_next_token_with_comments(
5079        lexer: &mut Lexer<'_>,
5080        expected_kind: TokenKind,
5081        expected_text: Option<&str>,
5082    ) {
5083        let token = lexer.next_lexed_token_with_comments().unwrap();
5084        assert_eq!(token.kind, expected_kind);
5085        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
5086    }
5087
5088    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
5089        let mut lexer = Lexer::new(input);
5090
5091        while let Some(token) = lexer.next_lexed_token() {
5092            if token.kind == TokenKind::Newline {
5093                continue;
5094            }
5095
5096            assert_eq!(
5097                token.span.start.line, token.span.end.line,
5098                "token should stay on one line: {:?}",
5099                token
5100            );
5101        }
5102    }
5103
5104    #[test]
5105    fn test_simple_words() {
5106        let mut lexer = Lexer::new("echo hello world");
5107
5108        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5109        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5110        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5111        assert!(lexer.next_lexed_token().is_none());
5112    }
5113
5114    #[test]
5115    fn test_single_quoted_string() {
5116        let mut lexer = Lexer::new("echo 'hello world'");
5117
5118        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5119        // Single-quoted strings return LiteralWord (no variable expansion)
5120        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5121        assert!(lexer.next_lexed_token().is_none());
5122    }
5123
5124    #[test]
5125    fn test_double_quoted_string() {
5126        let mut lexer = Lexer::new("echo \"hello world\"");
5127
5128        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5129        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5130        assert!(lexer.next_lexed_token().is_none());
5131    }
5132
5133    #[test]
5134    fn test_brace_expansion_token_ignores_quoted_closers() {
5135        let mut lexer = Lexer::new("echo {\"}\",a}\n");
5136
5137        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5138        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5139        assert_next_token(&mut lexer, TokenKind::Newline, None);
5140        assert!(lexer.next_lexed_token().is_none());
5141    }
5142
5143    #[test]
5144    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5145        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5146
5147        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5148        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5149        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5150        assert_next_token(&mut lexer, TokenKind::Newline, None);
5151        assert!(lexer.next_lexed_token().is_none());
5152    }
5153
5154    #[test]
5155    fn test_double_quoted_expansion_token_keeps_source_backing() {
5156        let source = r#""$bar""#;
5157        let mut lexer = Lexer::new(source);
5158
5159        let token = lexer.next_lexed_token().unwrap();
5160        assert_eq!(token.kind, TokenKind::QuotedWord);
5161        assert_eq!(token.word_text(), Some("$bar"));
5162
5163        let word = token.word().unwrap();
5164        let segment = word.single_segment().unwrap();
5165        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5166        assert_eq!(segment.span().unwrap().slice(source), "$bar");
5167    }
5168
5169    #[test]
5170    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5171        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5172        let mut lexer = Lexer::new(source);
5173
5174        let token = lexer.next_lexed_token().unwrap();
5175        assert_eq!(token.kind, TokenKind::QuotedWord);
5176        assert_eq!(
5177            token.word_text(),
5178            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5179        );
5180    }
5181
5182    #[test]
5183    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5184        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5185        let mut lexer = Lexer::new(source);
5186
5187        let token = lexer.next_lexed_token().unwrap();
5188        assert_eq!(token.kind, TokenKind::QuotedWord);
5189        assert_eq!(
5190            token.word_text(),
5191            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5192        );
5193    }
5194
5195    #[test]
5196    fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5197        let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5198        let mut lexer = Lexer::new(source);
5199
5200        let token = lexer.next_lexed_token().unwrap();
5201        assert_eq!(token.kind, TokenKind::QuotedWord);
5202        assert_eq!(
5203            token.word_text(),
5204            Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5205        );
5206    }
5207
5208    #[test]
5209    fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5210        let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5211        let mut lexer = Lexer::new(source);
5212
5213        let token = lexer.next_lexed_token().unwrap();
5214        assert_eq!(token.kind, TokenKind::QuotedWord);
5215        assert_eq!(
5216            token.word_text(),
5217            Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5218        );
5219    }
5220
5221    #[test]
5222    fn test_mixed_word_keeps_segment_kinds() {
5223        let source = r#"foo"bar"'baz'"#;
5224        let mut lexer = Lexer::new(source);
5225
5226        let token = lexer.next_lexed_token().unwrap();
5227        assert_eq!(token.kind, TokenKind::Word);
5228
5229        let word = token.word().unwrap();
5230        let segments: Vec<_> = word
5231            .segments()
5232            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5233            .collect();
5234
5235        assert_eq!(
5236            segments,
5237            vec![
5238                (LexedWordSegmentKind::Plain, "foo".to_string()),
5239                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5240                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5241            ]
5242        );
5243        assert_eq!(word.joined_text(), "foobarbaz");
5244        assert_eq!(
5245            word.segments()
5246                .next()
5247                .and_then(LexedWordSegment::span)
5248                .unwrap()
5249                .slice(source),
5250            "foo"
5251        );
5252    }
5253
5254    #[test]
5255    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5256        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5257
5258        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5259        let body = &source[..consumed];
5260
5261        assert!(body.contains("field, direction"));
5262        assert!(body.ends_with(')'));
5263    }
5264
5265    #[test]
5266    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5267        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5268
5269        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5270        let body = &source[..consumed];
5271
5272        assert!(body.contains("printf '%s' y"));
5273        assert!(body.ends_with(')'));
5274    }
5275
5276    #[test]
5277    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5278        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5279
5280        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5281        let body = &source[..consumed];
5282
5283        assert!(body.contains("printf %s 1,2"));
5284        assert!(body.ends_with(')'));
5285    }
5286
5287    #[test]
5288    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5289        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5290
5291        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5292        let body = &source[..consumed];
5293
5294        assert!(body.contains("field, direction"));
5295        assert!(body.ends_with(')'));
5296    }
5297
5298    #[test]
5299    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5300        let source = "printf %s ${x//foo/)},1)\"";
5301
5302        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5303        let body = &source[..consumed];
5304
5305        assert!(body.contains("${x//foo/)},1"));
5306        assert!(body.ends_with(')'));
5307    }
5308
5309    #[test]
5310    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5311        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5312
5313        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5314        let body = &source[..consumed];
5315
5316        assert!(body.contains("printf %s 1,2"));
5317        assert!(body.ends_with(')'));
5318    }
5319
5320    #[test]
5321    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5322        let source = "[[ \"$buf\" == (#b)(*) ]]";
5323        let index = source.find('#').expect("expected hash");
5324
5325        assert!(!hash_starts_comment(source, index));
5326    }
5327
5328    #[test]
5329    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5330        let source = "(#comment with )";
5331        let index = source.find('#').expect("expected hash");
5332
5333        assert!(hash_starts_comment(source, index));
5334    }
5335
5336    #[test]
5337    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5338        let source = "(( #c < 256 ))";
5339        let index = source.find('#').expect("expected hash");
5340
5341        assert!(!hash_starts_comment(source, index));
5342    }
5343
5344    #[test]
5345    fn test_hash_starts_comment_respects_quoted_double_parens() {
5346        let source = "printf '((' # comment";
5347        let index = source.find('#').expect("expected hash");
5348
5349        assert!(hash_starts_comment(source, index));
5350    }
5351
5352    #[test]
5353    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5354        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5355
5356        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5357        let body = &source[..consumed];
5358
5359        assert!(body.contains("printf %s 1,2"));
5360        assert!(body.ends_with(')'));
5361    }
5362
5363    #[test]
5364    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5365        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5366
5367        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5368        let body = &source[..consumed];
5369
5370        assert!(body.contains("printf %s 1,2"));
5371        assert!(body.ends_with(')'));
5372    }
5373
5374    #[test]
5375    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5376        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5377
5378        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5379        let body = &source[..consumed];
5380
5381        assert!(body.contains("printf %s 1,2"));
5382        assert!(body.ends_with(')'));
5383    }
5384
5385    #[test]
5386    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5387        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5388
5389        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5390        let body = &source[..consumed];
5391
5392        assert!(body.contains("printf %s 1,2"));
5393        assert!(body.ends_with("))"));
5394    }
5395
5396    #[test]
5397    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5398        let source = "printf %s 1,2; echo case in)\"";
5399
5400        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5401        let body = &source[..consumed];
5402
5403        assert!(body.contains("echo case in"));
5404        assert!(body.ends_with(')'));
5405    }
5406
5407    #[test]
5408    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5409        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5410
5411        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5412        let body = &source[..consumed];
5413
5414        assert!(body.contains("$'a\\'b'"));
5415        assert!(body.contains("printf %s 1,2"));
5416        assert!(body.ends_with(')'));
5417    }
5418
5419    #[test]
5420    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5421        let source = "printf %s `echo foo)`; printf %s ok)\"";
5422
5423        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5424        let body = &source[..consumed];
5425
5426        assert!(body.contains("`echo foo)`"));
5427        assert!(body.contains("printf %s ok"));
5428        assert!(body.ends_with(')'));
5429    }
5430
5431    #[test]
5432    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5433        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5434
5435        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5436        let body = &source[..consumed];
5437
5438        assert!(body.contains("${x/`echo }`/foo)},1"));
5439        assert!(body.ends_with(')'));
5440    }
5441
5442    #[test]
5443    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5444     {
5445        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5446
5447        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5448        let body = &source[..consumed];
5449
5450        assert!(body.contains("${x/<(echo })/foo)},1"));
5451        assert!(body.ends_with(')'));
5452    }
5453
5454    #[test]
5455    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5456        let source = "printf %s 1,2; echo case in)";
5457
5458        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5459        let body = &source[..consumed];
5460
5461        assert_eq!(body, source);
5462    }
5463
5464    #[test]
5465    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5466        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5467
5468        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5469        let body = &source[..consumed];
5470
5471        assert_eq!(body, source);
5472    }
5473
5474    #[test]
5475    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5476        let source = "printf %s `echo foo)`; printf %s ok)";
5477
5478        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5479        let body = &source[..consumed];
5480
5481        assert_eq!(body, source);
5482    }
5483
5484    #[test]
5485    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5486        let source = "echo \"$line\" | cut -d' ' -f2-)";
5487
5488        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5489        let body = &source[..consumed];
5490
5491        assert_eq!(body, source);
5492    }
5493
5494    #[test]
5495    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5496        let source = "echo \"${@}\" | tr -d '[:space:]')";
5497
5498        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5499        let body = &source[..consumed];
5500
5501        assert_eq!(body, source);
5502    }
5503
5504    #[test]
5505    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5506        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5507
5508        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5509        let body = &source[..consumed];
5510
5511        assert_eq!(body, source);
5512    }
5513
5514    #[test]
5515    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5516        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5517
5518        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5519        let body = &source[..consumed];
5520
5521        assert_eq!(body, source);
5522    }
5523
5524    #[test]
5525    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5526        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5527        let mut lexer = Lexer::new(source);
5528
5529        let first = lexer.next_lexed_token().expect("expected first token");
5530        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5531        assert_eq!(first.word_string().as_deref(), Some("echo"));
5532
5533        let second = lexer.next_lexed_token().expect("expected second token");
5534        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5535        assert_eq!(
5536            second.word_string().as_deref(),
5537            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5538        );
5539    }
5540
5541    #[test]
5542    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5543        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5544        let start = source.find("$(").expect("expected command substitution") + 2;
5545        let consumed =
5546            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5547        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5548    }
5549
5550    #[test]
5551    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5552        let source = "echo $(echo $(basename $filename .fuzz))";
5553        let start = source.find("$(").expect("expected command substitution") + 2;
5554        let consumed =
5555            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5556        assert_eq!(
5557            &source[start..start + consumed],
5558            "echo $(basename $filename .fuzz))"
5559        );
5560    }
5561
5562    #[test]
5563    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5564        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5565        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5566        assert_eq!(consumed, source.len());
5567    }
5568
5569    #[test]
5570    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5571        let source = "'foo'bar";
5572        let mut lexer = Lexer::new(source);
5573
5574        let token = lexer.next_lexed_token().unwrap();
5575        assert_eq!(token.kind, TokenKind::LiteralWord);
5576
5577        let word = token.word().unwrap();
5578        let segments: Vec<_> = word
5579            .segments()
5580            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5581            .collect();
5582
5583        assert_eq!(
5584            segments,
5585            vec![
5586                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5587                (LexedWordSegmentKind::Plain, "bar".to_string()),
5588            ]
5589        );
5590        assert_eq!(word.joined_text(), "foobar");
5591        assert_eq!(
5592            word.segments()
5593                .nth(1)
5594                .and_then(LexedWordSegment::span)
5595                .unwrap()
5596                .slice(source),
5597            "bar"
5598        );
5599    }
5600
5601    #[test]
5602    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5603        let source = "$(printf hi)";
5604        let mut lexer = Lexer::new(source);
5605
5606        let token = lexer.next_lexed_token().unwrap();
5607        assert_eq!(token.kind, TokenKind::Word);
5608
5609        let word = token.word().unwrap();
5610        let segment = word.single_segment().unwrap();
5611        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5612        assert_eq!(segment.as_str(), source);
5613        assert_eq!(segment.span().unwrap().slice(source), source);
5614    }
5615
5616    #[test]
5617    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5618        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5619        let mut lexer = Lexer::new(source);
5620
5621        let token = lexer.next_lexed_token().unwrap();
5622        assert_eq!(token.kind, TokenKind::Word);
5623
5624        let word = token.word().unwrap();
5625        let segment = word.single_segment().unwrap();
5626        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5627        assert_eq!(segment.as_str(), source);
5628        assert_eq!(segment.span().unwrap().slice(source), source);
5629    }
5630
5631    #[test]
5632    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5633        let source = "\"foo\"$(printf hi)";
5634        let mut lexer = Lexer::new(source);
5635
5636        let token = lexer.next_lexed_token().unwrap();
5637        assert_eq!(token.kind, TokenKind::Word);
5638
5639        let word = token.word().unwrap();
5640        let continuation = word.segments().nth(1).unwrap();
5641        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5642        assert_eq!(continuation.as_str(), "$(printf hi)");
5643        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5644    }
5645
5646    #[test]
5647    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5648        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5649        let mut lexer = Lexer::new(source);
5650
5651        let token = lexer.next_lexed_token().unwrap();
5652        assert_eq!(token.kind, TokenKind::QuotedWord);
5653
5654        let word = token.word().unwrap();
5655        let segment = word.single_segment().unwrap();
5656        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5657        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5658        assert_eq!(
5659            segment.span().unwrap().slice(source),
5660            "${arr[$RANDOM % ${#arr[@]}]}"
5661        );
5662    }
5663
5664    #[test]
5665    fn test_ansi_c_control_escape_can_consume_quote() {
5666        let mut lexer = Lexer::new("echo $'\\c''");
5667
5668        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5669        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5670        assert!(lexer.next_lexed_token().is_none());
5671    }
5672
5673    #[test]
5674    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5675        let source = r#"out_line="${out_line//'"'/'\"'}"
5676"#;
5677        let mut lexer = Lexer::new(source);
5678
5679        assert_next_token(
5680            &mut lexer,
5681            TokenKind::Word,
5682            Some(r#"out_line=${out_line//'"'/'"'}"#),
5683        );
5684        assert_next_token(&mut lexer, TokenKind::Newline, None);
5685        assert!(lexer.next_lexed_token().is_none());
5686    }
5687
5688    #[test]
5689    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5690        let source = r#"out_line="${out_line//'"'/'\"'}"
5691echo "Error: Missing python3!"
5692cat << 'EOF' > "${pywrapper}"
5693import os
5694EOF
5695"#;
5696        let mut lexer = Lexer::new(source);
5697
5698        assert_next_token(
5699            &mut lexer,
5700            TokenKind::Word,
5701            Some(r#"out_line=${out_line//'"'/'"'}"#),
5702        );
5703        assert_next_token(&mut lexer, TokenKind::Newline, None);
5704        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5705        assert_next_token(
5706            &mut lexer,
5707            TokenKind::QuotedWord,
5708            Some("Error: Missing python3!"),
5709        );
5710        assert_next_token(&mut lexer, TokenKind::Newline, None);
5711        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5712        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5713        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5714        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5715        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5716    }
5717
5718    #[test]
5719    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5720        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5721        let mut lexer = Lexer::new(source);
5722
5723        let token = lexer.next_lexed_token().unwrap();
5724        assert_eq!(token.kind, TokenKind::Word);
5725        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5726        assert!(token.source_slice(source).is_none());
5727        assert_eq!(
5728            token.word_string().as_deref(),
5729            Some("crypt=${crypt//\\/\\\\}")
5730        );
5731        assert_next_token(&mut lexer, TokenKind::Newline, None);
5732        assert!(lexer.next_lexed_token().is_none());
5733    }
5734
5735    #[test]
5736    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5737        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5738        let mut lexer = Lexer::new(source);
5739
5740        assert_next_token(
5741            &mut lexer,
5742            TokenKind::Word,
5743            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5744        );
5745        assert_next_token(&mut lexer, TokenKind::Newline, None);
5746        assert_next_token(&mut lexer, TokenKind::Newline, None);
5747        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5748        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5749        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5750        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5751        assert_next_token(&mut lexer, TokenKind::Newline, None);
5752        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5753        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5754        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5755        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5756        assert_next_token(&mut lexer, TokenKind::Newline, None);
5757        assert_next_token(
5758            &mut lexer,
5759            TokenKind::Word,
5760            Some(
5761                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5762            ),
5763        );
5764        assert_next_token(&mut lexer, TokenKind::Newline, None);
5765        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5766        assert_next_token(&mut lexer, TokenKind::Newline, None);
5767        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5768        assert_next_token(&mut lexer, TokenKind::Newline, None);
5769        assert!(lexer.next_lexed_token().is_none());
5770    }
5771
5772    #[test]
5773    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5774        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5775        let mut lexer = Lexer::new(source);
5776
5777        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5778        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5779        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5780        assert_next_token(&mut lexer, TokenKind::Newline, None);
5781        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5782        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5783        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5784        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5785        assert_next_token(&mut lexer, TokenKind::Newline, None);
5786        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5787        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5788        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5789        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5790        assert_next_token(&mut lexer, TokenKind::Newline, None);
5791        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5792        assert_next_token(&mut lexer, TokenKind::Newline, None);
5793        assert!(lexer.next_lexed_token().is_none());
5794    }
5795
5796    #[test]
5797    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5798        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5799        let mut lexer = Lexer::new(source);
5800
5801        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5802        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5803        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5804        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5805        assert_next_token(&mut lexer, TokenKind::And, None);
5806        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5807        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5808        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5809        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5810        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5811        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5812        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5813        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5814        assert_next_token(&mut lexer, TokenKind::Newline, None);
5815        assert!(lexer.next_lexed_token().is_none());
5816    }
5817
5818    #[test]
5819    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5820        let source = "echo -{$(echo a),b}-\n";
5821        let mut lexer = Lexer::new(source);
5822
5823        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5824        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5825        assert_next_token(&mut lexer, TokenKind::Newline, None);
5826        assert!(lexer.next_lexed_token().is_none());
5827    }
5828
5829    #[test]
5830    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5831        let source = "echo -{$((1 + 2)),b}-\n";
5832        let mut lexer = Lexer::new(source);
5833
5834        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5835        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5836        assert_next_token(&mut lexer, TokenKind::Newline, None);
5837        assert!(lexer.next_lexed_token().is_none());
5838    }
5839
5840    #[test]
5841    fn test_operators() {
5842        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5843
5844        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5845        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5846        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5847        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5848        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5849        assert_next_token(&mut lexer, TokenKind::And, None);
5850        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5851        assert_next_token(&mut lexer, TokenKind::Or, None);
5852        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5853        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5854        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5855        assert_next_token(&mut lexer, TokenKind::Background, None);
5856        assert!(lexer.next_lexed_token().is_none());
5857    }
5858
5859    #[test]
5860    fn test_double_left_bracket_requires_separator() {
5861        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5862
5863        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5864        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5865        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5866        assert_next_token(&mut lexer, TokenKind::Newline, None);
5867        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5868        assert_next_token(&mut lexer, TokenKind::Newline, None);
5869        assert!(lexer.next_lexed_token().is_none());
5870    }
5871
5872    #[test]
5873    fn test_redirects() {
5874        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5875
5876        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5877        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5878        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5879        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5880        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5881        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5882        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5883        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5884        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5885        let token = lexer.next_lexed_token().unwrap();
5886        assert_eq!(token.kind, TokenKind::Clobber);
5887        assert_eq!(token.fd_value(), Some(2));
5888        assert_eq!(token_text(&token, lexer.input), None);
5889        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5890        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5891        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5892        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5893        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5894        assert_next_token(&mut lexer, TokenKind::HereString, None);
5895        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5896        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5897        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5898        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5899        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5900    }
5901
5902    #[test]
5903    fn test_comment() {
5904        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5905
5906        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5907        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5908        assert_next_token(&mut lexer, TokenKind::Newline, None);
5909        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5910        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5911    }
5912
5913    #[test]
5914    fn test_comment_token_with_span() {
5915        let mut lexer = Lexer::new("# lead\necho hi # tail");
5916
5917        let comment = lexer.next_lexed_token_with_comments().unwrap();
5918        assert_eq!(comment.kind, TokenKind::Comment);
5919        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5920        assert_eq!(comment.span.start.line, 1);
5921        assert_eq!(comment.span.start.column, 1);
5922        assert_eq!(comment.span.end.line, 1);
5923        assert_eq!(comment.span.end.column, 7);
5924
5925        assert_next_token(&mut lexer, TokenKind::Newline, None);
5926        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5927        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5928
5929        let inline = lexer.next_lexed_token_with_comments().unwrap();
5930        assert_eq!(inline.kind, TokenKind::Comment);
5931        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5932        assert_eq!(inline.span.start.line, 2);
5933        assert_eq!(inline.span.start.column, 9);
5934    }
5935
5936    #[test]
5937    fn test_comment_token_preserves_hash_boundaries() {
5938        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5939
5940        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5941        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5942        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5943        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5944        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5945        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5946        assert!(lexer.next_lexed_token_with_comments().is_none());
5947    }
5948
5949    #[test]
5950    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5951        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5952
5953        let mut saw_comment = false;
5954        while let Some(token) = lexer.next_lexed_token_with_comments() {
5955            if token.kind == TokenKind::Comment {
5956                saw_comment = true;
5957                break;
5958            }
5959        }
5960
5961        assert!(
5962            !saw_comment,
5963            "zsh inline glob controls inside [[ ]] should not lex as comments"
5964        );
5965    }
5966
5967    #[test]
5968    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5969        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5970
5971        let mut saw_comment = false;
5972        while let Some(token) = lexer.next_lexed_token_with_comments() {
5973            if token.kind == TokenKind::Comment {
5974                saw_comment = true;
5975                break;
5976            }
5977        }
5978
5979        assert!(
5980            !saw_comment,
5981            "zsh arithmetic char literals inside (( )) should not lex as comments"
5982        );
5983    }
5984
5985    #[test]
5986    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5987        let mut lexer = Lexer::new(
5988            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5989        );
5990
5991        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5992        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5993        assert_next_token(
5994            &mut lexer,
5995            TokenKind::LiteralWord,
5996            Some("\\e]133;C;cmdline_url=%s\\a"),
5997        );
5998        assert_next_token(
5999            &mut lexer,
6000            TokenKind::QuotedWord,
6001            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
6002        );
6003        assert_next_token(&mut lexer, TokenKind::Newline, None);
6004    }
6005
6006    #[test]
6007    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
6008        let mut lexer = Lexer::new(
6009            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
6010        );
6011
6012        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6013        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6014        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
6015        assert_next_token(&mut lexer, TokenKind::Newline, None);
6016        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
6017        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
6018        assert_next_token(
6019            &mut lexer,
6020            TokenKind::LiteralWord,
6021            Some("\\e]133;C;cmdline_url=%s\\a"),
6022        );
6023        assert_next_token(
6024            &mut lexer,
6025            TokenKind::QuotedWord,
6026            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
6027        );
6028        assert_next_token(&mut lexer, TokenKind::Newline, None);
6029        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
6030        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
6031        assert_next_token(&mut lexer, TokenKind::Newline, None);
6032    }
6033
6034    #[test]
6035    fn test_variable_words() {
6036        let mut lexer = Lexer::new("echo $HOME $USER");
6037
6038        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6039        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
6040        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
6041        assert!(lexer.next_lexed_token().is_none());
6042    }
6043
6044    #[test]
6045    fn test_pipeline_tokens() {
6046        let mut lexer = Lexer::new("echo hello | cat");
6047
6048        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6049        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6050        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6051        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6052        assert!(lexer.next_lexed_token().is_none());
6053    }
6054
6055    #[test]
6056    fn test_read_heredoc() {
6057        // Simulate state after reading "cat <<EOF" - positioned at newline before content
6058        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
6059        let content = lexer.read_heredoc("EOF", false);
6060        assert_eq!(content.content, "hello\nworld\n");
6061    }
6062
6063    #[test]
6064    fn test_read_heredoc_single_line() {
6065        let mut lexer = Lexer::new("\ntest\nEOF");
6066        let content = lexer.read_heredoc("EOF", false);
6067        assert_eq!(content.content, "test\n");
6068    }
6069
6070    #[test]
6071    fn test_read_heredoc_full_scenario() {
6072        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
6073        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
6074
6075        // Parser would read these tokens
6076        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6077        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6078        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6079
6080        // Now read heredoc content
6081        let content = lexer.read_heredoc("EOF", false);
6082        assert_eq!(content.content, "hello\nworld\n");
6083    }
6084
6085    #[test]
6086    fn test_read_heredoc_with_redirect() {
6087        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
6088        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
6089        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6090        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6091        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6092        let content = lexer.read_heredoc("EOF", false);
6093        assert_eq!(content.content, "hello\n");
6094        // The redirect tokens are now available from the lexer
6095        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6096        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
6097    }
6098
6099    #[test]
6100    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
6101        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
6102        let mut lexer = Lexer::new(source);
6103
6104        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6105        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6106        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6107
6108        let heredoc = lexer.read_heredoc("EOF", false);
6109        assert_eq!(heredoc.content, "hello\n");
6110
6111        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6112        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6113        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6114        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6115        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6116        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6117        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6118    }
6119
6120    #[test]
6121    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6122        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6123        let mut lexer = Lexer::new(source);
6124
6125        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6126        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6127        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6128
6129        let heredoc = lexer.read_heredoc("EOF", false);
6130        assert_eq!(heredoc.content, "1\n2\n3\n");
6131    }
6132
6133    #[test]
6134    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6135        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6136        let mut lexer = Lexer::new(source);
6137
6138        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6139        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6140        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6141
6142        let heredoc = lexer.read_heredoc("EOF", false);
6143        assert_eq!(heredoc.content, "body\n");
6144    }
6145
6146    #[test]
6147    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6148        let source = "cat <<EOF # note \\\nbody\nEOF\n";
6149        let mut lexer = Lexer::new(source);
6150
6151        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6152        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6153        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6154
6155        let heredoc = lexer.read_heredoc("EOF", false);
6156        assert_eq!(heredoc.content, "body\n");
6157    }
6158
6159    #[test]
6160    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6161        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6162        let mut lexer = Lexer::new(source);
6163
6164        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6165        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6166        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6167        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6168
6169        let heredoc = lexer.read_heredoc("EOF", false);
6170        assert_eq!(heredoc.content, "body\n");
6171
6172        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6173    }
6174
6175    #[test]
6176    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6177        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6178        let mut lexer = Lexer::new(source);
6179
6180        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6181        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6182        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6183
6184        let heredoc = lexer.read_heredoc("EOF", false);
6185        assert_eq!(heredoc.content, "1\n");
6186
6187        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6188        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6189    }
6190
6191    #[test]
6192    fn test_read_heredoc_with_redirect_preserves_following_spans() {
6193        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6194        let mut lexer = Lexer::new(source);
6195
6196        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6197        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6198        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6199
6200        let heredoc = lexer.read_heredoc("EOF", false);
6201        assert_eq!(heredoc.content, "hello\n");
6202
6203        let redirect = lexer.next_lexed_token_with_comments().unwrap();
6204        assert_eq!(redirect.kind, TokenKind::RedirectOut);
6205        assert_eq!(redirect.span.slice(source), ">");
6206
6207        let target = lexer.next_lexed_token_with_comments().unwrap();
6208        assert_eq!(target.kind, TokenKind::Word);
6209        assert_eq!(
6210            token_text(&target, lexer.input).as_deref(),
6211            Some("file.txt")
6212        );
6213        assert_eq!(target.span.slice(source), "file.txt");
6214
6215        let newline = lexer.next_lexed_token_with_comments().unwrap();
6216        assert_eq!(newline.kind, TokenKind::Newline);
6217        assert_eq!(newline.span.slice(source), "\n");
6218
6219        let comment = lexer.next_lexed_token_with_comments().unwrap();
6220        assert_eq!(comment.kind, TokenKind::Comment);
6221        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6222        assert_eq!(comment.span.slice(source), "# done");
6223    }
6224
6225    #[test]
6226    fn test_comment_with_unicode() {
6227        // Comment containing multi-byte UTF-8 characters
6228        let source = "# café résumé\necho ok";
6229        let mut lexer = Lexer::new(source);
6230
6231        let comment = lexer.next_lexed_token_with_comments().unwrap();
6232        assert_eq!(comment.kind, TokenKind::Comment);
6233        assert_eq!(
6234            token_text(&comment, lexer.input).as_deref(),
6235            Some(" café résumé")
6236        );
6237        // Span should cover exactly the comment bytes (including #)
6238        let start = comment.span.start.offset;
6239        let end = comment.span.end.offset;
6240        assert_eq!(start, 0);
6241        assert_eq!(&source[start..end], "# café résumé");
6242        assert!(source.is_char_boundary(start));
6243        assert!(source.is_char_boundary(end));
6244
6245        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6246        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6247    }
6248
6249    #[test]
6250    fn test_comment_with_cjk_characters() {
6251        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
6252        let source = "# 你好世界\necho ok";
6253        let mut lexer = Lexer::new(source);
6254
6255        let comment = lexer.next_lexed_token_with_comments().unwrap();
6256        assert_eq!(comment.kind, TokenKind::Comment);
6257        assert_eq!(
6258            token_text(&comment, lexer.input).as_deref(),
6259            Some(" 你好世界")
6260        );
6261        let start = comment.span.start.offset;
6262        let end = comment.span.end.offset;
6263        assert_eq!(&source[start..end], "# 你好世界");
6264        assert!(source.is_char_boundary(start));
6265        assert!(source.is_char_boundary(end));
6266    }
6267
6268    #[test]
6269    fn test_heredoc_with_comments_inside() {
6270        // Comments inside heredoc body should NOT appear as comment tokens
6271        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6272        let mut lexer = Lexer::new(source);
6273
6274        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6275        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6276        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6277
6278        let heredoc = lexer.read_heredoc("EOF", false);
6279        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6280
6281        // After heredoc, replayed line termination should appear before
6282        // tokens from following source lines.
6283        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6284        let comment = lexer.next_lexed_token_with_comments().unwrap();
6285        assert_eq!(comment.kind, TokenKind::Comment);
6286        assert_eq!(
6287            token_text(&comment, lexer.input).as_deref(),
6288            Some(" real comment")
6289        );
6290    }
6291
6292    #[test]
6293    fn test_heredoc_with_hash_in_variable() {
6294        // ${var#pattern} inside heredoc should not produce comment tokens
6295        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6296        let mut lexer = Lexer::new(source);
6297
6298        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6299        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6300        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6301
6302        let heredoc = lexer.read_heredoc("EOF", false);
6303        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6304    }
6305
6306    #[test]
6307    fn test_heredoc_span_does_not_leak() {
6308        // Heredoc content span must be within source bounds and must not
6309        // overlap with content before or after.
6310        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6311        let mut lexer = Lexer::new(source);
6312
6313        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6314        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6315        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6316
6317        let heredoc = lexer.read_heredoc("EOF", false);
6318        let start = heredoc.content_span.start.offset;
6319        let end = heredoc.content_span.end.offset;
6320        assert!(
6321            end <= source.len(),
6322            "heredoc span end ({end}) exceeds source length ({})",
6323            source.len()
6324        );
6325        assert_eq!(&source[start..end], "hello\nworld\n");
6326
6327        // Tokens after heredoc should still parse correctly
6328        assert_next_token(&mut lexer, TokenKind::Newline, None);
6329        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6330        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6331    }
6332
6333    #[test]
6334    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6335        let source = "\
6336cat <<\\_ACEOF
6337Use these variables to override the choices made by `configure' or to help
6338it to find libraries and programs with nonstandard names/locations.
6339_ACEOF
6340ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6341ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6342";
6343        let mut lexer = Lexer::new(source);
6344
6345        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6346        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6347        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6348        assert_eq!(delimiter.kind, TokenKind::Word);
6349        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6350
6351        let heredoc = lexer.read_heredoc("_ACEOF", false);
6352        assert_eq!(
6353            heredoc.content,
6354            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6355        );
6356
6357        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6358
6359        let first = lexer.next_lexed_token_with_comments().unwrap();
6360        assert_eq!(first.kind, TokenKind::Word);
6361        assert_eq!(
6362            first.span.slice(source),
6363            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6364        );
6365        let first_segments = first
6366            .word()
6367            .unwrap()
6368            .segments()
6369            .map(|segment| {
6370                (
6371                    segment.kind(),
6372                    segment.as_str().to_string(),
6373                    segment.span().map(|span| span.slice(source).to_string()),
6374                )
6375            })
6376            .collect::<Vec<_>>();
6377        assert_eq!(
6378            first_segments,
6379            vec![
6380                (
6381                    LexedWordSegmentKind::Plain,
6382                    "ac_dir_suffix=/".to_string(),
6383                    Some("ac_dir_suffix=/".to_string()),
6384                ),
6385                (
6386                    LexedWordSegmentKind::Plain,
6387                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6388                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6389                ),
6390            ]
6391        );
6392
6393        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6394
6395        let second = lexer.next_lexed_token_with_comments().unwrap();
6396        assert_eq!(second.kind, TokenKind::Word);
6397        assert_eq!(
6398            second.span.slice(source),
6399            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6400        );
6401        let second_segments = second
6402            .word()
6403            .unwrap()
6404            .segments()
6405            .map(|segment| {
6406                (
6407                    segment.kind(),
6408                    segment.as_str().to_string(),
6409                    segment.span().map(|span| span.slice(source).to_string()),
6410                )
6411            })
6412            .collect::<Vec<_>>();
6413        assert_eq!(
6414            second_segments,
6415            vec![
6416                (
6417                    LexedWordSegmentKind::Plain,
6418                    "ac_top_builddir_sub=".to_string(),
6419                    Some("ac_top_builddir_sub=".to_string()),
6420                ),
6421                (
6422                    LexedWordSegmentKind::Plain,
6423                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6424                    Some(
6425                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6426                            .to_string(),
6427                    ),
6428                ),
6429            ]
6430        );
6431    }
6432
6433    #[test]
6434    fn test_heredoc_with_unicode_content() {
6435        // Heredoc containing multi-byte characters; spans must be on char boundaries
6436        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6437        let mut lexer = Lexer::new(source);
6438
6439        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6440        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6441        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6442
6443        let heredoc = lexer.read_heredoc("EOF", false);
6444        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6445        let start = heredoc.content_span.start.offset;
6446        let end = heredoc.content_span.end.offset;
6447        assert!(
6448            source.is_char_boundary(start),
6449            "heredoc span start ({start}) not on char boundary"
6450        );
6451        assert!(
6452            source.is_char_boundary(end),
6453            "heredoc span end ({end}) not on char boundary"
6454        );
6455        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6456    }
6457
6458    #[test]
6459    fn test_assoc_compound_assignment() {
6460        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6461        // assignment as a single Word token
6462        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6463        assert_next_token(
6464            &mut lexer,
6465            TokenKind::Word,
6466            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6467        );
6468        assert!(lexer.next_lexed_token().is_none());
6469    }
6470
6471    #[test]
6472    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6473        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6474        let mut lexer = Lexer::new(source);
6475
6476        let token = lexer.next_lexed_token().unwrap();
6477        assert_eq!(token.kind, TokenKind::Word);
6478        assert_eq!(token.span.slice(source), source);
6479        assert!(lexer.next_lexed_token().is_none());
6480    }
6481
6482    #[test]
6483    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6484        let source = r#"foo\_bar@(baz|qux)"#;
6485        let mut lexer = Lexer::new(source);
6486
6487        let token = lexer.next_lexed_token().unwrap();
6488        assert_eq!(token.kind, TokenKind::Word);
6489        assert_eq!(token.span.slice(source), source);
6490        assert!(lexer.next_lexed_token().is_none());
6491    }
6492
6493    #[test]
6494    fn test_zsh_alternative_glob_after_dot_keeps_suffix_group() {
6495        let source = "file.(txt|doc|pdf)";
6496        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6497        let mut lexer = Lexer::with_profile(source, &profile);
6498
6499        let token = lexer.next_lexed_token().unwrap();
6500        assert_eq!(token.kind, TokenKind::Word);
6501        assert_eq!(token.span.slice(source), source);
6502        assert!(lexer.next_lexed_token().is_none());
6503    }
6504
6505    #[test]
6506    fn test_zsh_path_glob_modifier_keeps_suffix_group() {
6507        let source = "/path/file(:h)";
6508        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6509        let mut lexer = Lexer::with_profile(source, &profile);
6510
6511        let token = lexer.next_lexed_token().unwrap();
6512        assert_eq!(token.kind, TokenKind::Word);
6513        assert_eq!(token.span.slice(source), source);
6514        assert!(lexer.next_lexed_token().is_none());
6515
6516        let mut default_lexer = Lexer::new(source);
6517        let token = default_lexer.next_lexed_token().unwrap();
6518        assert_eq!(token.kind, TokenKind::Word);
6519        assert_eq!(token.span.slice(source), "/path/file");
6520    }
6521
6522    #[test]
6523    fn test_indexed_array_not_collapsed() {
6524        // arr=("hello world") should NOT be collapsed — parser handles
6525        // quoted elements token-by-token via the LeftParen path
6526        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6527        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6528        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6529    }
6530
6531    #[test]
6532    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6533        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6534        let mut lexer = Lexer::new(source);
6535
6536        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6537        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6538
6539        let token = lexer.next_lexed_token().unwrap();
6540        assert_eq!(token.kind, TokenKind::Word);
6541        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6542
6543        let word = token.word().unwrap();
6544        let segments: Vec<_> = word
6545            .segments()
6546            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6547            .collect();
6548        assert_eq!(
6549            segments,
6550            vec![
6551                (
6552                    LexedWordSegmentKind::DoubleQuoted,
6553                    "$plugin_dir".to_string()
6554                ),
6555                (LexedWordSegmentKind::Plain, "/*".to_string()),
6556                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6557            ]
6558        );
6559
6560        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6561        assert!(lexer.next_lexed_token().is_none());
6562    }
6563
6564    #[test]
6565    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6566        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6567        let mut lexer = Lexer::new(source);
6568
6569        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6570        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6571
6572        let token = lexer.next_lexed_token().unwrap();
6573        assert_eq!(token.kind, TokenKind::Word);
6574        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6575
6576        let word = token.word().unwrap();
6577        let segments: Vec<_> = word
6578            .segments()
6579            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6580            .collect();
6581        assert_eq!(
6582            segments,
6583            vec![
6584                (
6585                    LexedWordSegmentKind::DoubleQuoted,
6586                    "$__GREP_CACHE_FILE".to_string()
6587                ),
6588                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6589            ]
6590        );
6591
6592        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6593        assert!(lexer.next_lexed_token().is_none());
6594    }
6595
6596    #[test]
6597    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6598        let source = r#"$dir/${~pats}(N)"#;
6599        let mut lexer = Lexer::new(source);
6600
6601        let token = lexer.next_lexed_token().unwrap();
6602        assert_eq!(token.kind, TokenKind::Word);
6603        assert_eq!(token.span.slice(source), source);
6604        assert!(lexer.next_lexed_token().is_none());
6605    }
6606
6607    #[test]
6608    fn test_dollar_word_does_not_absorb_function_parens() {
6609        let mut lexer = Lexer::new(r#"foo$x()"#);
6610
6611        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6612        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6613        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6614        assert!(lexer.next_lexed_token().is_none());
6615    }
6616
6617    #[test]
6618    fn test_command_substitution_word_does_not_absorb_function_parens() {
6619        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6620
6621        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6622        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6623        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6624        assert!(lexer.next_lexed_token().is_none());
6625    }
6626
6627    /// Regression test for fuzz crash: single digit at EOF should not panic
6628    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6629    #[test]
6630    fn test_digit_at_eof_no_panic() {
6631        // A lone digit with no following redirect operator must not panic
6632        let mut lexer = Lexer::new("2");
6633        let token = lexer.next_lexed_token();
6634        assert!(token.is_some());
6635    }
6636
6637    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6638    #[test]
6639    fn test_nested_brace_expansion_single_token() {
6640        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6641        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6642        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6643        // No more tokens — everything was consumed
6644        assert!(lexer.next_lexed_token().is_none());
6645    }
6646
6647    /// Simple ${var} still works after brace depth change.
6648    #[test]
6649    fn test_simple_brace_expansion_unchanged() {
6650        let mut lexer = Lexer::new("${foo}");
6651        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6652        assert!(lexer.next_lexed_token().is_none());
6653    }
6654
6655    #[test]
6656    fn test_nvm_fixture_lexes_without_stalling() {
6657        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6658        let mut lexer = Lexer::new(input);
6659        let mut tokens = 0usize;
6660
6661        while lexer.next_lexed_token().is_some() {
6662            tokens += 1;
6663            assert!(
6664                tokens < 100_000,
6665                "lexer should continue making progress on the nvm fixture"
6666            );
6667        }
6668
6669        assert!(tokens > 0, "nvm fixture should produce at least one token");
6670    }
6671
6672    #[test]
6673    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6674        let input = concat!(
6675            "case \"${_input_type:-}\" in\n",
6676            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6677            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6678            "esac\n",
6679        );
6680
6681        assert_non_newline_tokens_stay_on_one_line(input);
6682
6683        let mut lexer = Lexer::new(input);
6684        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6685            .map(|token| (token.kind, token_text(&token, input)))
6686            .collect::<Vec<_>>();
6687        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6688        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6689    }
6690
6691    #[test]
6692    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6693        let input = concat!(
6694            "case $2 in\n",
6695            "  cygwin*) bin='cygwin32/bin' ;|\n",
6696            "esac\n",
6697        );
6698
6699        let mut lexer = Lexer::new(input);
6700        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6701            .map(|token| (token.kind, token_text(&token, input)))
6702            .collect::<Vec<_>>();
6703
6704        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6705        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6706        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6707    }
6708
6709    #[test]
6710    fn test_inline_if_with_array_append_stays_line_local() {
6711        let input = concat!(
6712            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6713            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6714        );
6715
6716        assert_non_newline_tokens_stay_on_one_line(input);
6717    }
6718
6719    #[test]
6720    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6721        let source = "unsetopt interactive_comments\n#literal\n";
6722        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6723        let mut lexer = Lexer::with_profile(source, &profile);
6724
6725        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6726        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6727        assert_next_token(&mut lexer, TokenKind::Newline, None);
6728        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6729    }
6730
6731    #[test]
6732    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6733        let source = "setopt rc_quotes\nprint 'a''b'\n";
6734        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6735        let mut lexer = Lexer::with_profile(source, &profile);
6736
6737        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6738        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6739        assert_next_token(&mut lexer, TokenKind::Newline, None);
6740        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6741        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6742    }
6743
6744    #[test]
6745    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6746        let source = "setopt ignore_braces\n{ echo }\n";
6747        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6748        let mut lexer = Lexer::with_profile(source, &profile);
6749
6750        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6751        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6752        assert_next_token(&mut lexer, TokenKind::Newline, None);
6753        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6754        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6755        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6756    }
6757
6758    #[test]
6759    fn test_zsh_midfile_setopt_brace_ccl_keeps_adjacent_brace_expansions_in_one_word() {
6760        let source = "setopt brace_ccl\n{ab}{0-2}\n";
6761        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6762        let mut lexer = Lexer::with_profile(source, &profile);
6763
6764        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6765        assert_next_token(&mut lexer, TokenKind::Word, Some("brace_ccl"));
6766        assert_next_token(&mut lexer, TokenKind::Newline, None);
6767        assert_next_token(&mut lexer, TokenKind::Word, Some("{ab}{0-2}"));
6768    }
6769
6770    #[test]
6771    fn test_heredoc_in_arithmetic_fuzz_crash() {
6772        // Regression test: the fuzzer found that heredoc re-injection inside
6773        // arithmetic context can push self.offset past self.input.len(),
6774        // causing a panic in read_unquoted_segment's borrowed-slice path.
6775        let data: &[u8] = &[
6776            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6777            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6778            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6779            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6780            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6781            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6782            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6783            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6784            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6785            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6786            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6787            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6788            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6789            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6790            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6791            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6792            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6793            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6794            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6795        ];
6796        let input = std::str::from_utf8(data).unwrap();
6797        let script = format!("echo $(({input}))\n");
6798        // Must not panic.
6799        let _ = crate::parser::Parser::new(&script).parse();
6800    }
6801}