shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub(crate) fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub(crate) const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub(crate) fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub(crate) fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub(crate) fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub(crate) const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333///
334/// Public consumers can inspect the token kind and source span. Word payloads,
335/// descriptor payloads, and lexer recovery details are currently parser-internal
336/// so the lexer can evolve without expanding the public API.
337#[derive(Debug, Clone, PartialEq, Eq)]
338pub struct LexedToken<'a> {
339    /// Token kind used by the parser.
340    pub kind: TokenKind,
341    /// Source span covered by the token.
342    pub span: Span,
343    pub(crate) flags: TokenFlags,
344    payload: TokenPayload<'a>,
345}
346
347impl<'a> LexedToken<'a> {
348    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
349        match kind {
350            TokenKind::Word => LexedWordSegmentKind::Plain,
351            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
352            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
353            _ => LexedWordSegmentKind::Composite,
354        }
355    }
356
357    pub(crate) fn punctuation(kind: TokenKind) -> Self {
358        Self {
359            kind,
360            span: Span::new(),
361            flags: TokenFlags::empty(),
362            payload: TokenPayload::None,
363        }
364    }
365
366    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
367        let flags = if word.has_cooked_text() {
368            TokenFlags::cooked_text()
369        } else {
370            TokenFlags::empty()
371        };
372
373        Self {
374            kind,
375            span: Span::new(),
376            flags,
377            payload: TokenPayload::Word(word),
378        }
379    }
380
381    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
382        Self::with_word_payload(
383            kind,
384            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
385        )
386    }
387
388    fn owned_word(kind: TokenKind, text: String) -> Self {
389        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
390    }
391
392    fn comment() -> Self {
393        Self {
394            kind: TokenKind::Comment,
395            span: Span::new(),
396            flags: TokenFlags::empty(),
397            payload: TokenPayload::None,
398        }
399    }
400
401    fn fd(kind: TokenKind, fd: i32) -> Self {
402        Self {
403            kind,
404            span: Span::new(),
405            flags: TokenFlags::empty(),
406            payload: TokenPayload::Fd(fd),
407        }
408    }
409
410    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
411        Self {
412            kind,
413            span: Span::new(),
414            flags: TokenFlags::empty(),
415            payload: TokenPayload::FdPair(src_fd, dst_fd),
416        }
417    }
418
419    fn error(kind: LexerErrorKind) -> Self {
420        Self {
421            kind: TokenKind::Error,
422            span: Span::new(),
423            flags: TokenFlags::empty(),
424            payload: TokenPayload::Error(kind),
425        }
426    }
427
428    pub(crate) fn with_span(mut self, span: Span) -> Self {
429        self.span = span;
430        self
431    }
432
433    pub(crate) fn rebased(mut self, base: Position) -> Self {
434        self.span = self.span.rebased(base);
435        self.payload = match self.payload {
436            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
437            payload => payload,
438        };
439        self
440    }
441
442    pub(crate) fn with_synthetic_flag(mut self) -> Self {
443        self.flags = self.flags.with_synthetic();
444        self
445    }
446
447    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
448        let payload = match self.payload {
449            TokenPayload::None => TokenPayload::None,
450            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
451            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
452            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
453            TokenPayload::Error(kind) => TokenPayload::Error(kind),
454        };
455
456        LexedToken {
457            kind: self.kind,
458            span: self.span,
459            flags: self.flags,
460            payload,
461        }
462    }
463
464    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
465        let payload = match self.payload {
466            TokenPayload::None => TokenPayload::None,
467            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
468            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
469            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
470            TokenPayload::Error(kind) => TokenPayload::Error(kind),
471        };
472
473        LexedToken {
474            kind: self.kind,
475            span: self.span,
476            flags: self.flags,
477            payload,
478        }
479    }
480
481    /// Borrow the token text when it is a single-segment word token.
482    pub(crate) fn word_text(&self) -> Option<&str> {
483        self.kind
484            .is_word_like()
485            .then_some(())
486            .and_then(|_| match &self.payload {
487                TokenPayload::Word(word) => word.text(),
488                _ => None,
489            })
490    }
491
492    /// Return an owned string containing the token's word text.
493    pub(crate) fn word_string(&self) -> Option<String> {
494        self.kind
495            .is_word_like()
496            .then_some(())
497            .and_then(|_| match &self.payload {
498                TokenPayload::Word(word) => Some(word.joined_text()),
499                _ => None,
500            })
501    }
502
503    /// Borrow the structured word payload for word-like tokens.
504    pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
505        match &self.payload {
506            TokenPayload::Word(word) => Some(word),
507            _ => None,
508        }
509    }
510
511    /// Borrow the original source slice when the token is source-backed and uncooked.
512    pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
513        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
514            return None;
515        }
516
517        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
518            .then(|| &source[self.span.start.offset..self.span.end.offset])
519    }
520
521    /// Return the file-descriptor payload for redirection tokens that carry one.
522    pub(crate) fn fd_value(&self) -> Option<i32> {
523        match self.payload {
524            TokenPayload::Fd(fd) => Some(fd),
525            _ => None,
526        }
527    }
528
529    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
530    pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
531        match self.payload {
532            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
533            _ => None,
534        }
535    }
536
537    /// Return the lexer error payload when this token represents `TokenKind::Error`.
538    pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
539        match self.payload {
540            TokenPayload::Error(kind) => Some(kind),
541            _ => None,
542        }
543    }
544}
545
546/// Result of reading a heredoc body from the source.
547#[derive(Debug, Clone, PartialEq)]
548pub(crate) struct HeredocRead {
549    /// Decoded heredoc content.
550    pub content: String,
551    /// Source span covering the heredoc body content.
552    pub content_span: Span,
553}
554
555/// Maximum nesting depth for command substitution in the lexer.
556/// Prevents stack overflow from deeply nested $() patterns.
557const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
558const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
559
560#[derive(Clone, Debug)]
561struct Cursor<'a> {
562    rest: &'a str,
563}
564
565impl<'a> Cursor<'a> {
566    fn new(source: &'a str) -> Self {
567        Self { rest: source }
568    }
569
570    fn first(&self) -> Option<char> {
571        self.rest.chars().next()
572    }
573
574    fn second(&self) -> Option<char> {
575        let mut chars = self.rest.chars();
576        chars.next()?;
577        chars.next()
578    }
579
580    fn third(&self) -> Option<char> {
581        let mut chars = self.rest.chars();
582        chars.next()?;
583        chars.next()?;
584        chars.next()
585    }
586
587    fn bump(&mut self) -> Option<char> {
588        let ch = self.first()?;
589        self.rest = &self.rest[ch.len_utf8()..];
590        Some(ch)
591    }
592
593    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
594        let start = self.rest;
595        let mut end = 0;
596
597        for ch in start.chars() {
598            if !predicate(ch) {
599                break;
600            }
601            end += ch.len_utf8();
602        }
603
604        self.rest = &start[end..];
605        &start[..end]
606    }
607
608    fn rest(&self) -> &'a str {
609        self.rest
610    }
611
612    fn skip_bytes(&mut self, count: usize) {
613        self.rest = &self.rest[count..];
614    }
615
616    fn find_byte(&self, byte: u8) -> Option<usize> {
617        memchr(byte, self.rest.as_bytes())
618    }
619}
620
621#[derive(Clone, Debug)]
622struct PositionMap<'a> {
623    source: &'a str,
624    line_starts: Arc<[usize]>,
625    cached: Position,
626}
627
628#[cfg(feature = "benchmarking")]
629#[derive(Clone, Copy, Debug, Default)]
630pub(crate) struct LexerBenchmarkCounters {
631    pub(crate) current_position_calls: u64,
632}
633
634impl<'a> PositionMap<'a> {
635    fn new(source: &'a str) -> Self {
636        let mut line_starts =
637            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
638        line_starts.push(0);
639        line_starts.extend(
640            source
641                .bytes()
642                .enumerate()
643                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
644        );
645
646        Self {
647            source,
648            line_starts: line_starts.into(),
649            cached: Position::new(),
650        }
651    }
652
653    fn position(&mut self, offset: usize) -> Position {
654        if offset == self.cached.offset {
655            return self.cached;
656        }
657
658        let position = if offset > self.cached.offset && offset <= self.source.len() {
659            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
660        } else {
661            self.position_uncached(offset)
662        };
663        self.cached = position;
664        position
665    }
666
667    fn position_uncached(&self, offset: usize) -> Position {
668        let offset = offset.min(self.source.len());
669        let line_index = self
670            .line_starts
671            .partition_point(|start| *start <= offset)
672            .saturating_sub(1);
673        let line_start = self.line_starts[line_index];
674        let line_text = &self.source[line_start..offset];
675        let column = if line_text.is_ascii() {
676            line_text.len() + 1
677        } else {
678            line_text.chars().count() + 1
679        };
680
681        Position {
682            line: line_index + 1,
683            column,
684            offset,
685        }
686    }
687
688    fn advance_from(mut position: Position, text: &str) -> Position {
689        position.offset += text.len();
690        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
691        if newline_count == 0 {
692            position.column += if text.is_ascii() {
693                text.len()
694            } else {
695                text.chars().count()
696            };
697            return position;
698        }
699
700        position.line += newline_count;
701        let tail_start = memrchr(b'\n', text.as_bytes())
702            .map(|index| index + 1)
703            .unwrap_or_default();
704        let tail = &text[tail_start..];
705        position.column = if tail.is_ascii() {
706            tail.len() + 1
707        } else {
708            tail.chars().count() + 1
709        };
710        position
711    }
712}
713
714/// Source-backed lexer for shell scripts.
715///
716/// The public lexer surface is intended for lower-level tooling and
717/// benchmarks. It tokenizes using the default bash profile; use the parser
718/// constructors when dialect or zsh option state matters.
719#[derive(Clone)]
720pub struct Lexer<'a> {
721    input: &'a str,
722    /// Current byte offset in the input/reinjected stream.
723    offset: usize,
724    cursor: Cursor<'a>,
725    position_map: PositionMap<'a>,
726    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
727    /// Consumed before `cursor`.
728    reinject_buf: VecDeque<char>,
729    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
730    reinject_resume_offset: Option<usize>,
731    /// Maximum allowed nesting depth for command substitution
732    max_subst_depth: usize,
733    initial_zsh_options: Option<ZshOptionState>,
734    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
735    zsh_timeline_index: usize,
736    #[cfg(feature = "benchmarking")]
737    benchmark_counters: Option<LexerBenchmarkCounters>,
738}
739
740impl<'a> Lexer<'a> {
741    /// Create a new bash-profile lexer for the given input.
742    pub fn new(input: &'a str) -> Self {
743        Self::with_max_subst_depth_and_profile(
744            input,
745            DEFAULT_MAX_SUBST_DEPTH,
746            &ShellProfile::native(super::ShellDialect::Bash),
747            None,
748        )
749    }
750
751    /// Create a new lexer with a custom max substitution nesting depth.
752    /// Limits recursion in read_command_subst_into().
753    pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
754        Self::with_max_subst_depth_and_profile(
755            input,
756            max_depth,
757            &ShellProfile::native(super::ShellDialect::Bash),
758            None,
759        )
760    }
761
762    /// Create a new lexer using the provided shell profile.
763    #[cfg(test)]
764    fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
765        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
766            .then(|| ZshOptionTimeline::build(input, shell_profile))
767            .flatten()
768            .map(Arc::new);
769        Self::with_max_subst_depth_and_profile(
770            input,
771            DEFAULT_MAX_SUBST_DEPTH,
772            shell_profile,
773            zsh_timeline,
774        )
775    }
776
777    pub(crate) fn with_max_subst_depth_and_profile(
778        input: &'a str,
779        max_depth: usize,
780        shell_profile: &ShellProfile,
781        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
782    ) -> Self {
783        Self {
784            input,
785            offset: 0,
786            cursor: Cursor::new(input),
787            position_map: PositionMap::new(input),
788            reinject_buf: VecDeque::new(),
789            reinject_resume_offset: None,
790            max_subst_depth: max_depth,
791            initial_zsh_options: shell_profile.zsh_options().cloned(),
792            zsh_timeline,
793            zsh_timeline_index: 0,
794            #[cfg(feature = "benchmarking")]
795            benchmark_counters: None,
796        }
797    }
798
799    pub(super) fn position_at_offset(&self, offset: usize) -> Position {
800        self.position_map.position_uncached(offset)
801    }
802
803    fn current_position(&mut self) -> Position {
804        #[cfg(feature = "benchmarking")]
805        self.maybe_record_current_position_call();
806        self.position_map.position(self.offset)
807    }
808
809    #[cfg(feature = "benchmarking")]
810    pub(crate) fn enable_benchmark_counters(&mut self) {
811        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
812    }
813
814    #[cfg(feature = "benchmarking")]
815    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
816        self.benchmark_counters.unwrap_or_default()
817    }
818
819    #[cfg(feature = "benchmarking")]
820    fn maybe_record_current_position_call(&mut self) {
821        if let Some(counters) = &mut self.benchmark_counters {
822            counters.current_position_calls += 1;
823        }
824    }
825
826    fn sync_offset_to_cursor(&mut self) {
827        if self.reinject_buf.is_empty()
828            && let Some(offset) = self.reinject_resume_offset.take()
829        {
830            self.offset = offset;
831        }
832    }
833
834    /// Get the next token kind from the input.
835    ///
836    /// This skips whitespace and line comments, matching
837    /// [`Lexer::next_lexed_token`]. It is useful for callers that only need the
838    /// token stream shape.
839    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
840        self.next_lexed_token().map(|token| token.kind)
841    }
842
843    fn peek_char(&mut self) -> Option<char> {
844        self.sync_offset_to_cursor();
845        if let Some(&ch) = self.reinject_buf.front() {
846            Some(ch)
847        } else {
848            self.cursor.first()
849        }
850    }
851
852    fn advance(&mut self) -> Option<char> {
853        self.sync_offset_to_cursor();
854        let ch = if !self.reinject_buf.is_empty() {
855            self.reinject_buf.pop_front()
856        } else {
857            self.cursor.bump()
858        };
859        if let Some(c) = ch {
860            self.offset += c.len_utf8();
861        }
862        ch
863    }
864
865    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
866        self.reinject_buf
867            .iter()
868            .copied()
869            .chain(self.cursor.rest().chars())
870    }
871
872    fn second_char(&self) -> Option<char> {
873        match self.reinject_buf.len() {
874            0 => self.cursor.second(),
875            1 => self.cursor.first(),
876            _ => self.reinject_buf.get(1).copied(),
877        }
878    }
879
880    fn third_char(&self) -> Option<char> {
881        match self.reinject_buf.len() {
882            0 => self.cursor.third(),
883            1 => self.cursor.second(),
884            2 => self.cursor.first(),
885            _ => self.reinject_buf.get(2).copied(),
886        }
887    }
888
889    fn fourth_char(&self) -> Option<char> {
890        match self.reinject_buf.len() {
891            0 => self.cursor.rest().chars().nth(3),
892            1 => self.cursor.third(),
893            2 => self.cursor.second(),
894            3 => self.cursor.first(),
895            _ => self.reinject_buf.get(3).copied(),
896        }
897    }
898
899    fn consume_source_bytes(&mut self, byte_len: usize) {
900        debug_assert!(self.reinject_buf.is_empty());
901        self.sync_offset_to_cursor();
902        self.offset += byte_len;
903        self.cursor.skip_bytes(byte_len);
904    }
905
906    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
907        debug_assert!(self.reinject_buf.is_empty());
908        self.offset += byte_len;
909    }
910
911    fn consume_ascii_chars(&mut self, count: usize) {
912        if self.reinject_buf.is_empty() {
913            self.consume_source_bytes(count);
914            return;
915        }
916
917        for _ in 0..count {
918            self.advance();
919        }
920    }
921
922    fn source_horizontal_whitespace_len(&self) -> usize {
923        self.cursor
924            .rest()
925            .as_bytes()
926            .iter()
927            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
928            .count()
929    }
930
931    fn source_ascii_plain_word_len(&self) -> usize {
932        self.cursor
933            .rest()
934            .as_bytes()
935            .iter()
936            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
937            .count()
938    }
939
940    fn find_double_quote_special(source: &str) -> Option<usize> {
941        source
942            .as_bytes()
943            .iter()
944            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
945    }
946
947    fn ensure_capture_from_source(
948        &self,
949        capture: &mut Option<String>,
950        start: Position,
951        end: Position,
952    ) {
953        if capture.is_none() {
954            *capture = Some(self.input[start.offset..end.offset].to_string());
955        }
956    }
957
958    fn push_capture_char(capture: &mut Option<String>, ch: char) {
959        if let Some(text) = capture.as_mut() {
960            text.push(ch);
961        }
962    }
963
964    fn push_capture_str(capture: &mut Option<String>, text: &str) {
965        if let Some(current) = capture.as_mut() {
966            current.push_str(text);
967        }
968    }
969
970    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
971        if let Some(timeline) = self.zsh_timeline.as_ref() {
972            while self.zsh_timeline_index < timeline.entries.len()
973                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
974            {
975                self.zsh_timeline_index += 1;
976            }
977            return if self.zsh_timeline_index == 0 {
978                self.initial_zsh_options.as_ref()
979            } else {
980                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
981            };
982        }
983
984        self.initial_zsh_options.as_ref()
985    }
986
987    fn comments_enabled(&mut self) -> bool {
988        !self
989            .current_zsh_options()
990            .is_some_and(|options| options.interactive_comments.is_definitely_off())
991    }
992
993    fn rc_quotes_enabled(&mut self) -> bool {
994        self.current_zsh_options()
995            .is_some_and(|options| options.rc_quotes.is_definitely_on())
996    }
997
998    fn ignore_braces_enabled(&mut self) -> bool {
999        self.current_zsh_options()
1000            .is_some_and(|options| options.ignore_braces.is_definitely_on())
1001    }
1002
1003    fn ignore_close_braces_enabled(&mut self) -> bool {
1004        self.current_zsh_options().is_some_and(|options| {
1005            options.ignore_braces.is_definitely_on()
1006                || options.ignore_close_braces.is_definitely_on()
1007        })
1008    }
1009
1010    fn should_treat_hash_as_word_char(&mut self) -> bool {
1011        if !self.comments_enabled() {
1012            return true;
1013        }
1014        self.reinject_buf.is_empty()
1015            && (self
1016                .input
1017                .get(..self.offset)
1018                .and_then(|prefix| prefix.chars().next_back())
1019                .is_some_and(|prev| {
1020                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1021                })
1022                || self.is_inside_unclosed_double_paren_on_line())
1023    }
1024
1025    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1026        capture
1027            .as_deref()
1028            .unwrap_or(&self.input[start.offset..self.offset])
1029    }
1030
1031    fn current_word_surface_is_single_char(
1032        &self,
1033        start: Position,
1034        capture: &Option<String>,
1035        target: char,
1036    ) -> bool {
1037        let text = self.current_word_text(start, capture);
1038        if !text.contains('\x00') {
1039            let mut encoded = [0; 4];
1040            return text == target.encode_utf8(&mut encoded);
1041        }
1042
1043        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1044        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1045    }
1046
1047    fn current_word_surface_last_char<'b>(
1048        &'b self,
1049        start: Position,
1050        capture: &'b Option<String>,
1051    ) -> Option<char> {
1052        self.current_word_text(start, capture)
1053            .chars()
1054            .rev()
1055            .find(|&ch| ch != '\x00')
1056    }
1057
1058    fn current_word_surface_ends_with_char(
1059        &self,
1060        start: Position,
1061        capture: &Option<String>,
1062        target: char,
1063    ) -> bool {
1064        self.current_word_surface_last_char(start, capture) == Some(target)
1065    }
1066
1067    fn current_word_surface_ends_with_extglob_prefix(
1068        &self,
1069        start: Position,
1070        capture: &Option<String>,
1071    ) -> bool {
1072        self.current_word_surface_last_char(start, capture)
1073            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1074    }
1075
1076    /// Get the next source-backed token from the input, skipping line comments.
1077    ///
1078    /// Returned tokens expose their [`TokenKind`] and source [`Span`]. Comments
1079    /// are omitted from this public stream; the parser uses an internal variant
1080    /// when it needs to preserve them for AST attachment.
1081    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1082        self.skip_whitespace();
1083        let start = self.current_position();
1084        let token = self.next_lexed_token_inner(false)?;
1085        let end = self.current_position();
1086        Some(token.with_span(Span::from_positions(start, end)))
1087    }
1088
1089    /// Get the next source-backed token from the input, preserving line comments.
1090    pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1091        self.skip_whitespace();
1092        let start = self.current_position();
1093        let token = self.next_lexed_token_inner(true)?;
1094        let end = self.current_position();
1095        Some(token.with_span(Span::from_positions(start, end)))
1096    }
1097
1098    /// Internal: get next token without recording position (called after whitespace skip)
1099    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1100        let ch = self.peek_char()?;
1101
1102        match ch {
1103            '\n' => {
1104                self.consume_ascii_chars(1);
1105                Some(LexedToken::punctuation(TokenKind::Newline))
1106            }
1107            ';' => {
1108                if self.second_char() == Some(';') {
1109                    if self.third_char() == Some('&') {
1110                        self.consume_ascii_chars(3);
1111                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1112                    } else {
1113                        self.consume_ascii_chars(2);
1114                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1115                    }
1116                } else if self.second_char() == Some('|') {
1117                    self.consume_ascii_chars(2);
1118                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1119                } else if self.second_char() == Some('&') {
1120                    self.consume_ascii_chars(2);
1121                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1122                } else {
1123                    self.consume_ascii_chars(1);
1124                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1125                }
1126            }
1127            '|' => {
1128                if self.second_char() == Some('|') {
1129                    self.consume_ascii_chars(2);
1130                    Some(LexedToken::punctuation(TokenKind::Or))
1131                } else if self.second_char() == Some('&') {
1132                    self.consume_ascii_chars(2);
1133                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1134                } else {
1135                    self.consume_ascii_chars(1);
1136                    Some(LexedToken::punctuation(TokenKind::Pipe))
1137                }
1138            }
1139            '&' => {
1140                if self.second_char() == Some('&') {
1141                    self.consume_ascii_chars(2);
1142                    Some(LexedToken::punctuation(TokenKind::And))
1143                } else if self.second_char() == Some('>') {
1144                    if self.third_char() == Some('>') {
1145                        self.consume_ascii_chars(3);
1146                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1147                    } else {
1148                        self.consume_ascii_chars(2);
1149                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1150                    }
1151                } else if self.second_char() == Some('|') {
1152                    self.consume_ascii_chars(2);
1153                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1154                } else if self.second_char() == Some('!') {
1155                    self.consume_ascii_chars(2);
1156                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1157                } else {
1158                    self.consume_ascii_chars(1);
1159                    Some(LexedToken::punctuation(TokenKind::Background))
1160                }
1161            }
1162            '>' => {
1163                if self.second_char() == Some('>') {
1164                    if self.third_char() == Some('|') {
1165                        self.consume_ascii_chars(3);
1166                    } else {
1167                        self.consume_ascii_chars(2);
1168                    }
1169                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1170                } else if self.second_char() == Some('|') {
1171                    self.consume_ascii_chars(2);
1172                    Some(LexedToken::punctuation(TokenKind::Clobber))
1173                } else if self.second_char() == Some('(') {
1174                    self.consume_ascii_chars(2);
1175                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1176                } else if self.second_char() == Some('&') {
1177                    self.consume_ascii_chars(2);
1178                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1179                } else {
1180                    self.consume_ascii_chars(1);
1181                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1182                }
1183            }
1184            '<' => {
1185                if self.second_char() == Some('<') {
1186                    if self.third_char() == Some('<') {
1187                        self.consume_ascii_chars(3);
1188                        Some(LexedToken::punctuation(TokenKind::HereString))
1189                    } else if self.third_char() == Some('-') {
1190                        self.consume_ascii_chars(3);
1191                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1192                    } else {
1193                        self.consume_ascii_chars(2);
1194                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1195                    }
1196                } else if self.second_char() == Some('>') {
1197                    self.consume_ascii_chars(2);
1198                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1199                } else if self.second_char() == Some('(') {
1200                    self.consume_ascii_chars(2);
1201                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1202                } else if self.second_char() == Some('&') {
1203                    self.consume_ascii_chars(2);
1204                    Some(LexedToken::punctuation(TokenKind::DupInput))
1205                } else {
1206                    self.consume_ascii_chars(1);
1207                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1208                }
1209            }
1210            '(' => {
1211                if self.second_char() == Some('(') {
1212                    self.consume_ascii_chars(2);
1213                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1214                } else {
1215                    self.consume_ascii_chars(1);
1216                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1217                }
1218            }
1219            ')' => {
1220                if self.second_char() == Some(')') {
1221                    self.consume_ascii_chars(2);
1222                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1223                } else {
1224                    self.consume_ascii_chars(1);
1225                    Some(LexedToken::punctuation(TokenKind::RightParen))
1226                }
1227            }
1228            '{' => {
1229                let start = self.current_position();
1230                if self.ignore_braces_enabled() {
1231                    self.consume_ascii_chars(1);
1232                    match self.peek_char() {
1233                        Some(' ') | Some('\t') | Some('\n') | None => {
1234                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1235                        }
1236                        _ => self.read_word_starting_with("{", start),
1237                    }
1238                } else if self.looks_like_brace_expansion() {
1239                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1240                    // vs a brace group like { cmd; }
1241                    // Note: { must be followed by space/newline to be a brace group
1242                    self.read_brace_expansion_word()
1243                } else if self.is_brace_group_start() {
1244                    self.advance();
1245                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1246                } else if self.brace_literal_starts_case_pattern_delimiter() {
1247                    self.read_word_starting_with("{", start)
1248                } else {
1249                    self.read_brace_literal_word()
1250                }
1251            }
1252            '}' => {
1253                self.consume_ascii_chars(1);
1254                if self.ignore_close_braces_enabled() {
1255                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1256                } else {
1257                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1258                }
1259            }
1260            '[' => {
1261                let start = self.current_position();
1262                self.consume_ascii_chars(1);
1263                if self.peek_char() == Some('[')
1264                    && matches!(
1265                        self.second_char(),
1266                        Some(' ') | Some('\t') | Some('\n') | None
1267                    )
1268                {
1269                    self.consume_ascii_chars(1);
1270                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1271                } else {
1272                    // `[` can start the test command when followed by whitespace, or it can be
1273                    // ordinary word text such as a glob bracket expression.
1274                    //
1275                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1276                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1277                    // structural tokens mid-word.
1278                    match self.peek_char() {
1279                        Some(' ') | Some('\t') | Some('\n') | None => {
1280                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1281                        }
1282                        _ => self.read_word_starting_with("[", start),
1283                    }
1284                }
1285            }
1286            ']' => {
1287                if self.second_char() == Some(']') {
1288                    self.consume_ascii_chars(2);
1289                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1290                } else {
1291                    self.consume_ascii_chars(1);
1292                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1293                }
1294            }
1295            '\'' => self.read_single_quoted_string(),
1296            '"' => self.read_double_quoted_string(),
1297            '#' => {
1298                if self.should_treat_hash_as_word_char() {
1299                    let start = self.current_position();
1300                    return self.read_word_starting_with("#", start);
1301                }
1302                if preserve_comments {
1303                    self.read_comment();
1304                    Some(LexedToken::comment())
1305                } else {
1306                    self.skip_comment();
1307                    self.next_lexed_token_inner(false)
1308                }
1309            }
1310            // Handle file descriptor redirects like 2> or 2>&1
1311            '0'..='9' => self.read_word_or_fd_redirect(),
1312            _ => self.read_word(),
1313        }
1314    }
1315
1316    fn skip_whitespace(&mut self) {
1317        while let Some(ch) = self.peek_char() {
1318            if self.reinject_buf.is_empty() {
1319                let whitespace_len = self.source_horizontal_whitespace_len();
1320                if whitespace_len > 0 {
1321                    self.consume_source_bytes(whitespace_len);
1322                    continue;
1323                }
1324
1325                if self.cursor.rest().starts_with("\\\n") {
1326                    self.consume_source_bytes(2);
1327                    continue;
1328                }
1329            }
1330
1331            if ch == ' ' || ch == '\t' {
1332                self.consume_ascii_chars(1);
1333            } else if ch == '\\' {
1334                // Check for backslash-newline (line continuation) between tokens
1335                if self.second_char() == Some('\n') {
1336                    self.consume_ascii_chars(2);
1337                } else {
1338                    break;
1339                }
1340            } else {
1341                break;
1342            }
1343        }
1344    }
1345
1346    fn skip_comment(&mut self) {
1347        if self.reinject_buf.is_empty() {
1348            let end = self
1349                .cursor
1350                .find_byte(b'\n')
1351                .unwrap_or(self.cursor.rest().len());
1352            self.consume_source_bytes(end);
1353            return;
1354        }
1355
1356        while let Some(ch) = self.peek_char() {
1357            if ch == '\n' {
1358                break;
1359            }
1360            self.advance();
1361        }
1362    }
1363
1364    fn read_comment(&mut self) {
1365        debug_assert_eq!(self.peek_char(), Some('#'));
1366
1367        if self.reinject_buf.is_empty() {
1368            let rest = self.cursor.rest();
1369            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1370            self.consume_source_bytes(end);
1371            return;
1372        }
1373
1374        self.advance(); // consume '#'
1375
1376        while let Some(ch) = self.peek_char() {
1377            if ch == '\n' {
1378                break;
1379            }
1380            self.advance();
1381        }
1382    }
1383
1384    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1385        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1386            return false;
1387        }
1388
1389        let line_start = self.input[..self.offset]
1390            .rfind('\n')
1391            .map_or(0, |index| index + 1);
1392        let prefix = &self.input[line_start..self.offset];
1393        line_has_unclosed_double_paren(prefix)
1394    }
1395
1396    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1397    /// or just a regular word starting with a digit
1398    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1399        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1400            let Some(fd) = first_digit.to_digit(10) else {
1401                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1402            };
1403            let fd = fd as i32;
1404
1405            match (self.second_char(), self.third_char()) {
1406                (Some('>'), Some('>')) => {
1407                    if self.fourth_char() == Some('|') {
1408                        self.consume_ascii_chars(4);
1409                    } else {
1410                        self.consume_ascii_chars(3);
1411                    }
1412                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1413                }
1414                (Some('>'), Some('|')) => {
1415                    self.consume_ascii_chars(3);
1416                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1417                }
1418                (Some('>'), Some('&')) => {
1419                    self.consume_ascii_chars(3);
1420
1421                    let mut target_str = String::with_capacity(4);
1422                    while let Some(c) = self.peek_char() {
1423                        if c.is_ascii_digit() {
1424                            target_str.push(c);
1425                            self.advance();
1426                        } else {
1427                            break;
1428                        }
1429                    }
1430
1431                    if target_str.is_empty() {
1432                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1433                    }
1434
1435                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1436                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1437                }
1438                (Some('>'), _) => {
1439                    self.consume_ascii_chars(2);
1440                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1441                }
1442                (Some('<'), Some('&')) => {
1443                    self.consume_ascii_chars(3);
1444
1445                    let mut target_str = String::with_capacity(4);
1446                    while let Some(c) = self.peek_char() {
1447                        if c.is_ascii_digit() || c == '-' {
1448                            target_str.push(c);
1449                            self.advance();
1450                            if c == '-' {
1451                                break;
1452                            }
1453                        } else {
1454                            break;
1455                        }
1456                    }
1457
1458                    if target_str == "-" {
1459                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1460                    }
1461                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1462                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1463                }
1464                (Some('<'), Some('>')) => {
1465                    self.consume_ascii_chars(3);
1466                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1467                }
1468                (Some('<'), Some('<')) => {}
1469                (Some('<'), _) => {
1470                    self.consume_ascii_chars(2);
1471                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1472                }
1473                _ => {}
1474            }
1475        }
1476
1477        // Not a fd redirect pattern, read as regular word
1478        self.read_word()
1479    }
1480
1481    fn read_word_starting_with(
1482        &mut self,
1483        _prefix: &str,
1484        start: Position,
1485    ) -> Option<LexedToken<'a>> {
1486        let segment = match self.read_unquoted_segment(start) {
1487            Ok(segment) => segment,
1488            Err(kind) => return Some(LexedToken::error(kind)),
1489        };
1490        if segment.as_str().is_empty() {
1491            return None;
1492        }
1493        let mut lexed_word = LexedWord::from_segment(segment);
1494        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1495            return Some(LexedToken::error(kind));
1496        }
1497        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1498    }
1499
1500    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1501        let start = self.current_position();
1502
1503        if self.reinject_buf.is_empty() {
1504            let ascii_len = self.source_ascii_plain_word_len();
1505            let chunk = if ascii_len > 0
1506                && self
1507                    .cursor
1508                    .rest()
1509                    .as_bytes()
1510                    .get(ascii_len)
1511                    .is_none_or(|byte| byte.is_ascii())
1512            {
1513                self.consume_source_bytes(ascii_len);
1514                &self.input[start.offset..self.offset]
1515            } else {
1516                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1517                self.advance_scanned_source_bytes(chunk.len());
1518                chunk
1519            };
1520            if !chunk.is_empty() {
1521                let continues = matches!(
1522                    self.peek_char(),
1523                    Some(next)
1524                        if Self::is_word_char(next)
1525                            || next == '$'
1526                            || matches!(next, '\'' | '"')
1527                            || next == '{'
1528                            || (next == '\\' && self.second_char() == Some('\n'))
1529                            || (next == '('
1530                                && (chunk.ends_with('=')
1531                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1532                );
1533
1534                if !continues {
1535                    let end = self.current_position();
1536                    return Some(LexedToken::borrowed_word(
1537                        TokenKind::Word,
1538                        &self.input[start.offset..self.offset],
1539                        Some(Span::from_positions(start, end)),
1540                    ));
1541                }
1542
1543                if self.peek_char() == Some('(')
1544                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1545                {
1546                    return self.read_complex_word(start);
1547                }
1548
1549                let end = self.current_position();
1550                return self.finish_segmented_word(LexedWord::borrowed(
1551                    LexedWordSegmentKind::Plain,
1552                    &self.input[start.offset..self.offset],
1553                    Some(Span::from_positions(start, end)),
1554                ));
1555            }
1556        }
1557
1558        self.read_complex_word(start)
1559    }
1560
1561    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1562        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1563            return Some(LexedToken::error(kind));
1564        }
1565
1566        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1567    }
1568
1569    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1570        if self.peek_char() == Some('$') {
1571            match self.second_char() {
1572                Some('\'') => return self.read_dollar_single_quoted_string(),
1573                Some('"') => return self.read_dollar_double_quoted_string(),
1574                _ => {}
1575            }
1576        }
1577
1578        let segment = match self.read_unquoted_segment(start) {
1579            Ok(segment) => segment,
1580            Err(kind) => return Some(LexedToken::error(kind)),
1581        };
1582
1583        if segment.as_str().is_empty() {
1584            return None;
1585        }
1586
1587        self.finish_segmented_word(LexedWord::from_segment(segment))
1588    }
1589
1590    fn read_unquoted_segment(
1591        &mut self,
1592        start: Position,
1593    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1594        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1595        while let Some(ch) = self.peek_char() {
1596            if ch == '"' || ch == '\'' {
1597                break;
1598            } else if ch == '$' {
1599                if matches!(self.second_char(), Some('\'') | Some('"'))
1600                    && (self.current_position().offset > start.offset
1601                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1602                {
1603                    break;
1604                }
1605
1606                // Handle variable references and command substitution
1607                self.advance();
1608
1609                Self::push_capture_char(&mut word, ch); // push the '$'
1610
1611                // Check for $[ / $( / ${ forms before falling back to variable text.
1612                if self.peek_char() == Some('[') {
1613                    Self::push_capture_char(&mut word, '[');
1614                    self.advance();
1615                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1616                        return Err(LexerErrorKind::CommandSubstitution);
1617                    }
1618                } else if self.peek_char() == Some('(') {
1619                    if self.second_char() == Some('(') {
1620                        if !self.read_arithmetic_expansion_into(&mut word) {
1621                            return Err(LexerErrorKind::CommandSubstitution);
1622                        }
1623                    } else {
1624                        Self::push_capture_char(&mut word, '(');
1625                        self.advance();
1626                        if !self.read_command_subst_into(&mut word) {
1627                            return Err(LexerErrorKind::CommandSubstitution);
1628                        }
1629                    }
1630                } else if self.peek_char() == Some('{') {
1631                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1632                    // doesn't stop at the inner }.
1633                    Self::push_capture_char(&mut word, '{');
1634                    self.advance();
1635                    let _ = self.read_param_expansion_into(&mut word, start);
1636                } else {
1637                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1638                    if let Some(c) = self.peek_char() {
1639                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1640                            || c.is_ascii_digit()
1641                        {
1642                            Self::push_capture_char(&mut word, c);
1643                            self.advance();
1644                        } else {
1645                            // Read variable name (alphanumeric + _)
1646                            while let Some(c) = self.peek_char() {
1647                                if c.is_ascii_alphanumeric() || c == '_' {
1648                                    Self::push_capture_char(&mut word, c);
1649                                    self.advance();
1650                                } else {
1651                                    break;
1652                                }
1653                            }
1654                        }
1655                    }
1656                }
1657            } else if ch == '{' {
1658                if self.looks_like_mid_word_brace_segment() {
1659                    // Keep balanced {...} forms attached to the current word so
1660                    // plain literals like foo{bar} and brace expansions stay intact.
1661                    Self::push_capture_char(&mut word, ch);
1662                    self.advance();
1663                    self.consume_mid_word_brace_segment(&mut word);
1664                } else {
1665                    // Unmatched literal braces in regexes like ^{ should not swallow
1666                    // trailing delimiters such as ]] or then.
1667                    Self::push_capture_char(&mut word, ch);
1668                    self.advance();
1669                }
1670            } else if ch == '`' {
1671                // Preserve legacy backticks verbatim so the parser can keep the
1672                // original syntax form.
1673                let capture_end = self.current_position();
1674                self.ensure_capture_from_source(&mut word, start, capture_end);
1675                Self::push_capture_char(&mut word, ch);
1676                self.advance(); // consume opening `
1677                let mut closed = false;
1678                while let Some(c) = self.peek_char() {
1679                    Self::push_capture_char(&mut word, c);
1680                    self.advance();
1681                    if c == '`' {
1682                        closed = true;
1683                        break;
1684                    }
1685                    if c == '\\'
1686                        && let Some(next) = self.peek_char()
1687                    {
1688                        Self::push_capture_char(&mut word, next);
1689                        self.advance();
1690                    }
1691                }
1692                if !closed {
1693                    return Err(LexerErrorKind::BacktickSubstitution);
1694                }
1695            } else if ch == '\\' {
1696                let capture_end = self.current_position();
1697                self.ensure_capture_from_source(&mut word, start, capture_end);
1698                self.advance();
1699                if let Some(next) = self.peek_char() {
1700                    if next == '\n' {
1701                        // Line continuation: skip backslash + newline
1702                        self.advance();
1703                    } else {
1704                        // Escaped character: backslash quotes the next char
1705                        // (quote removal — only the literal char survives).
1706                        // Preserve source/decoded alignment with a sentinel so
1707                        // downstream word decoding keeps later spans anchored.
1708                        Self::push_capture_char(&mut word, '\x00');
1709                        Self::push_capture_char(&mut word, next);
1710                        self.advance();
1711                        if next == '{'
1712                            && self.current_word_surface_is_single_char(start, &word, '{')
1713                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1714                        {
1715                            let mut depth = 1;
1716                            while let Some(c) = self.peek_char() {
1717                                Self::push_capture_char(&mut word, c);
1718                                self.advance();
1719                                match c {
1720                                    '{' => depth += 1,
1721                                    '}' => {
1722                                        depth -= 1;
1723                                        if depth == 0 {
1724                                            break;
1725                                        }
1726                                    }
1727                                    _ => {}
1728                                }
1729                            }
1730                        }
1731                    }
1732                } else {
1733                    Self::push_capture_char(&mut word, '\\');
1734                }
1735            } else if ch == '('
1736                && self.current_word_surface_ends_with_char(start, &word, '=')
1737                && self.looks_like_assoc_assign()
1738            {
1739                // Associative compound assignment: var=([k]="v" ...) — keep entire
1740                // (...) as part of word so declare -A m=([k]="v") stays one token.
1741                Self::push_capture_char(&mut word, ch);
1742                self.advance();
1743                let mut depth = 1;
1744                while let Some(c) = self.peek_char() {
1745                    Self::push_capture_char(&mut word, c);
1746                    self.advance();
1747                    match c {
1748                        '(' => depth += 1,
1749                        ')' => {
1750                            depth -= 1;
1751                            if depth == 0 {
1752                                break;
1753                            }
1754                        }
1755                        '"' => {
1756                            while let Some(qc) = self.peek_char() {
1757                                Self::push_capture_char(&mut word, qc);
1758                                self.advance();
1759                                if qc == '"' {
1760                                    break;
1761                                }
1762                                if qc == '\\'
1763                                    && let Some(esc) = self.peek_char()
1764                                {
1765                                    Self::push_capture_char(&mut word, esc);
1766                                    self.advance();
1767                                }
1768                            }
1769                        }
1770                        '\'' => {
1771                            while let Some(qc) = self.peek_char() {
1772                                Self::push_capture_char(&mut word, qc);
1773                                self.advance();
1774                                if qc == '\'' {
1775                                    break;
1776                                }
1777                            }
1778                        }
1779                        '\\' => {
1780                            if let Some(esc) = self.peek_char() {
1781                                Self::push_capture_char(&mut word, esc);
1782                                self.advance();
1783                            }
1784                        }
1785                        _ => {}
1786                    }
1787                }
1788            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1789            {
1790                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1791                // Consume through matching ) including nested parens
1792                Self::push_capture_char(&mut word, ch);
1793                self.advance();
1794                let mut depth = 1;
1795                while let Some(c) = self.peek_char() {
1796                    Self::push_capture_char(&mut word, c);
1797                    self.advance();
1798                    match c {
1799                        '(' => depth += 1,
1800                        ')' => {
1801                            depth -= 1;
1802                            if depth == 0 {
1803                                break;
1804                            }
1805                        }
1806                        '\\' => {
1807                            if let Some(esc) = self.peek_char() {
1808                                Self::push_capture_char(&mut word, esc);
1809                                self.advance();
1810                            }
1811                        }
1812                        _ => {}
1813                    }
1814                }
1815            } else if Self::is_plain_word_char(ch) {
1816                if self.reinject_buf.is_empty() {
1817                    let ascii_len = self.source_ascii_plain_word_len();
1818                    let chunk = if ascii_len > 0
1819                        && self
1820                            .cursor
1821                            .rest()
1822                            .as_bytes()
1823                            .get(ascii_len)
1824                            .is_none_or(|byte| byte.is_ascii())
1825                    {
1826                        self.consume_source_bytes(ascii_len);
1827                        &self.input[self.offset - ascii_len..self.offset]
1828                    } else {
1829                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1830                        self.advance_scanned_source_bytes(chunk.len());
1831                        chunk
1832                    };
1833                    Self::push_capture_str(&mut word, chunk);
1834                } else {
1835                    Self::push_capture_char(&mut word, ch);
1836                    self.advance();
1837                }
1838            } else {
1839                break;
1840            }
1841        }
1842
1843        if let Some(word) = word {
1844            let span = Some(Span::from_positions(start, self.current_position()));
1845            Ok(LexedWordSegment::owned_with_spans(
1846                LexedWordSegmentKind::Plain,
1847                word,
1848                span,
1849                span,
1850            ))
1851        } else {
1852            let end = self.current_position();
1853            Ok(LexedWordSegment::borrowed(
1854                LexedWordSegmentKind::Plain,
1855                &self.input[start.offset..self.offset],
1856                Some(Span::from_positions(start, end)),
1857            ))
1858        }
1859    }
1860
1861    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1862        let segment = match self.read_single_quoted_segment() {
1863            Ok(segment) => segment,
1864            Err(kind) => return Some(LexedToken::error(kind)),
1865        };
1866        let mut word = LexedWord::from_segment(segment);
1867        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1868            return Some(LexedToken::error(kind));
1869        }
1870
1871        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1872    }
1873
1874    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1875        debug_assert_eq!(self.peek_char(), Some('\''));
1876
1877        let wrapper_start = self.current_position();
1878        self.consume_ascii_chars(1); // consume opening '
1879        let content_start = self.current_position();
1880        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1881        let mut content_end = content_start;
1882        let mut content = String::with_capacity(16);
1883        let mut closed = false;
1884
1885        if can_borrow {
1886            let rest = self.cursor.rest();
1887            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1888                self.consume_source_bytes(quote_index);
1889                content_end = self.current_position();
1890                self.consume_ascii_chars(1); // consume closing '
1891                closed = true;
1892            } else {
1893                self.consume_source_bytes(rest.len());
1894            }
1895        }
1896
1897        while let Some(ch) = self.peek_char() {
1898            if closed {
1899                break;
1900            }
1901            if ch == '\'' {
1902                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1903                    if !can_borrow {
1904                        content.push('\'');
1905                    }
1906                    self.advance();
1907                    self.advance();
1908                    continue;
1909                }
1910                content_end = self.current_position();
1911                self.consume_ascii_chars(1); // consume closing '
1912                closed = true;
1913                break;
1914            }
1915            if !can_borrow {
1916                content.push(ch);
1917            }
1918            self.advance();
1919        }
1920
1921        if !closed {
1922            return Err(LexerErrorKind::SingleQuote);
1923        }
1924
1925        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1926        let content_span = Some(Span::from_positions(content_start, content_end));
1927
1928        if can_borrow {
1929            Ok(LexedWordSegment::borrowed_with_spans(
1930                LexedWordSegmentKind::SingleQuoted,
1931                &self.input[content_start.offset..content_end.offset],
1932                content_span,
1933                wrapper_span,
1934            ))
1935        } else {
1936            Ok(LexedWordSegment::owned_with_spans(
1937                LexedWordSegmentKind::SingleQuoted,
1938                content,
1939                content_span,
1940                wrapper_span,
1941            ))
1942        }
1943    }
1944
1945    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1946        let segment = match self.read_dollar_single_quoted_segment() {
1947            Ok(segment) => segment,
1948            Err(kind) => return Some(LexedToken::error(kind)),
1949        };
1950        let mut word = LexedWord::from_segment(segment);
1951        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1952            return Some(LexedToken::error(kind));
1953        }
1954
1955        let kind = if word.single_segment().is_some() {
1956            TokenKind::LiteralWord
1957        } else {
1958            TokenKind::Word
1959        };
1960
1961        Some(LexedToken::with_word_payload(kind, word))
1962    }
1963
1964    fn read_dollar_single_quoted_segment(
1965        &mut self,
1966    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1967        debug_assert_eq!(self.peek_char(), Some('$'));
1968        debug_assert_eq!(self.second_char(), Some('\''));
1969
1970        let wrapper_start = self.current_position();
1971        self.consume_ascii_chars(2); // consume $'
1972        let content_start = self.current_position();
1973        let mut out = String::with_capacity(16);
1974
1975        while let Some(ch) = self.peek_char() {
1976            if ch == '\'' {
1977                let content_end = self.current_position();
1978                self.advance();
1979                let wrapper_span =
1980                    Some(Span::from_positions(wrapper_start, self.current_position()));
1981                let content_span = Some(Span::from_positions(content_start, content_end));
1982                return Ok(LexedWordSegment::owned_with_spans(
1983                    LexedWordSegmentKind::DollarSingleQuoted,
1984                    out,
1985                    content_span,
1986                    wrapper_span,
1987                ));
1988            }
1989
1990            if ch == '\\' {
1991                self.advance();
1992                if let Some(esc) = self.peek_char() {
1993                    self.advance();
1994                    match esc {
1995                        'n' => out.push('\n'),
1996                        't' => out.push('\t'),
1997                        'r' => out.push('\r'),
1998                        'a' => out.push('\x07'),
1999                        'b' => out.push('\x08'),
2000                        'f' => out.push('\x0C'),
2001                        'v' => out.push('\x0B'),
2002                        'e' | 'E' => out.push('\x1B'),
2003                        '\\' => out.push('\\'),
2004                        '\'' => out.push('\''),
2005                        '"' => out.push('"'),
2006                        '?' => out.push('?'),
2007                        'c' => {
2008                            if let Some(control) = self.peek_char() {
2009                                self.advance();
2010                                out.push(((control as u32 & 0x1F) as u8) as char);
2011                            } else {
2012                                out.push('\\');
2013                                out.push('c');
2014                            }
2015                        }
2016                        'x' => {
2017                            let mut hex = String::new();
2018                            for _ in 0..2 {
2019                                if let Some(h) = self.peek_char() {
2020                                    if h.is_ascii_hexdigit() {
2021                                        hex.push(h);
2022                                        self.advance();
2023                                    } else {
2024                                        break;
2025                                    }
2026                                }
2027                            }
2028                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2029                                out.push(val as char);
2030                            }
2031                        }
2032                        'u' => {
2033                            let mut hex = String::new();
2034                            for _ in 0..4 {
2035                                if let Some(h) = self.peek_char() {
2036                                    if h.is_ascii_hexdigit() {
2037                                        hex.push(h);
2038                                        self.advance();
2039                                    } else {
2040                                        break;
2041                                    }
2042                                }
2043                            }
2044                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2045                                && let Some(c) = char::from_u32(val)
2046                            {
2047                                out.push(c);
2048                            }
2049                        }
2050                        'U' => {
2051                            let mut hex = String::new();
2052                            for _ in 0..8 {
2053                                if let Some(h) = self.peek_char() {
2054                                    if h.is_ascii_hexdigit() {
2055                                        hex.push(h);
2056                                        self.advance();
2057                                    } else {
2058                                        break;
2059                                    }
2060                                }
2061                            }
2062                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2063                                && let Some(c) = char::from_u32(val)
2064                            {
2065                                out.push(c);
2066                            }
2067                        }
2068                        '0'..='7' => {
2069                            let mut oct = String::new();
2070                            oct.push(esc);
2071                            for _ in 0..2 {
2072                                if let Some(o) = self.peek_char() {
2073                                    if o.is_ascii_digit() && o < '8' {
2074                                        oct.push(o);
2075                                        self.advance();
2076                                    } else {
2077                                        break;
2078                                    }
2079                                }
2080                            }
2081                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2082                                out.push(val as char);
2083                            }
2084                        }
2085                        _ => {
2086                            out.push('\\');
2087                            out.push(esc);
2088                        }
2089                    }
2090                } else {
2091                    out.push('\\');
2092                }
2093                continue;
2094            }
2095
2096            out.push(ch);
2097            self.advance();
2098        }
2099
2100        Err(LexerErrorKind::SingleQuote)
2101    }
2102
2103    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2104        let start = self.current_position();
2105
2106        if self.reinject_buf.is_empty() {
2107            let ascii_len = self.source_ascii_plain_word_len();
2108            let chunk = if ascii_len > 0
2109                && self
2110                    .cursor
2111                    .rest()
2112                    .as_bytes()
2113                    .get(ascii_len)
2114                    .is_none_or(|byte| byte.is_ascii())
2115            {
2116                self.consume_source_bytes(ascii_len);
2117                &self.input[start.offset..self.offset]
2118            } else {
2119                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2120                self.advance_scanned_source_bytes(chunk.len());
2121                chunk
2122            };
2123            if chunk.is_empty() {
2124                return None;
2125            }
2126
2127            let end = self.current_position();
2128            return Some(LexedWordSegment::borrowed(
2129                LexedWordSegmentKind::Plain,
2130                &self.input[start.offset..self.offset],
2131                Some(Span::from_positions(start, end)),
2132            ));
2133        }
2134
2135        let ch = self.peek_char()?;
2136        if !Self::is_plain_word_char(ch) {
2137            return None;
2138        }
2139
2140        let mut text = String::with_capacity(16);
2141        while let Some(ch) = self.peek_char() {
2142            if !Self::is_plain_word_char(ch) {
2143                break;
2144            }
2145            text.push(ch);
2146            self.advance();
2147        }
2148
2149        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2150    }
2151
2152    /// After a closing quote, read any adjacent quoted or unquoted word chars
2153    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2154    fn append_segmented_continuation(
2155        &mut self,
2156        word: &mut LexedWord<'a>,
2157    ) -> Result<(), LexerErrorKind> {
2158        loop {
2159            match self.peek_char() {
2160                Some('\\') if self.second_char() == Some('\n') => {
2161                    self.advance();
2162                    self.advance();
2163                    continue;
2164                }
2165                Some('\'') => {
2166                    word.push_segment(self.read_single_quoted_segment()?);
2167                }
2168                Some('"') => {
2169                    word.push_segment(self.read_double_quoted_segment()?);
2170                }
2171                Some('$') if self.second_char() == Some('\'') => {
2172                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2173                }
2174                Some('$') if self.second_char() == Some('"') => {
2175                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2176                }
2177                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2178                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2179                        unreachable!("peeked '(' should produce a suffix segment");
2180                    };
2181                    word.push_segment(segment);
2182                }
2183                _ => {
2184                    if let Some(segment) = self.read_plain_continuation_segment() {
2185                        word.push_segment(segment);
2186                        continue;
2187                    }
2188
2189                    let start = self.current_position();
2190                    let plain = self.read_unquoted_segment(start)?;
2191                    if plain.as_str().is_empty() {
2192                        break;
2193                    }
2194                    word.push_segment(plain);
2195                }
2196            }
2197        }
2198
2199        Ok(())
2200    }
2201
2202    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2203        debug_assert_eq!(self.peek_char(), Some('('));
2204
2205        let start = self.current_position();
2206        let mut depth = 0usize;
2207        let mut escaped = false;
2208        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2209
2210        while let Some(ch) = self.peek_char() {
2211            if let Some(text) = text.as_mut() {
2212                text.push(ch);
2213            }
2214            self.advance();
2215
2216            if escaped {
2217                escaped = false;
2218                continue;
2219            }
2220
2221            match ch {
2222                '\\' => escaped = true,
2223                '(' => depth += 1,
2224                ')' => {
2225                    depth = depth.saturating_sub(1);
2226                    if depth == 0 {
2227                        break;
2228                    }
2229                }
2230                _ => {}
2231            }
2232        }
2233
2234        let end = self.current_position();
2235        let span = Some(Span::from_positions(start, end));
2236        if let Some(text) = text {
2237            Some(LexedWordSegment::owned_with_spans(
2238                LexedWordSegmentKind::Plain,
2239                text,
2240                span,
2241                span,
2242            ))
2243        } else {
2244            Some(LexedWordSegment::borrowed_with_spans(
2245                LexedWordSegmentKind::Plain,
2246                &self.input[start.offset..end.offset],
2247                span,
2248                span,
2249            ))
2250        }
2251    }
2252
2253    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2254        self.read_double_quoted_word(false)
2255    }
2256
2257    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2258        self.read_double_quoted_word(true)
2259    }
2260
2261    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2262        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2263            Ok(segment) => segment,
2264            Err(kind) => return Some(LexedToken::error(kind)),
2265        };
2266        let mut word = LexedWord::from_segment(segment);
2267        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2268            return Some(LexedToken::error(kind));
2269        }
2270
2271        let kind = if word.single_segment().is_some() {
2272            TokenKind::QuotedWord
2273        } else {
2274            TokenKind::Word
2275        };
2276
2277        Some(LexedToken::with_word_payload(kind, word))
2278    }
2279
2280    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2281        self.read_double_quoted_segment_with_dollar(false)
2282    }
2283
2284    fn read_dollar_double_quoted_segment(
2285        &mut self,
2286    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2287        self.read_double_quoted_segment_with_dollar(true)
2288    }
2289
2290    fn read_double_quoted_segment_with_dollar(
2291        &mut self,
2292        dollar: bool,
2293    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2294        if dollar {
2295            debug_assert_eq!(self.peek_char(), Some('$'));
2296            debug_assert_eq!(self.second_char(), Some('"'));
2297        } else {
2298            debug_assert_eq!(self.peek_char(), Some('"'));
2299        }
2300
2301        let wrapper_start = self.current_position();
2302        if dollar {
2303            self.consume_ascii_chars(2); // consume $"
2304        } else {
2305            self.consume_ascii_chars(1); // consume opening "
2306        }
2307        let content_start = self.current_position();
2308        let mut content_end = content_start;
2309        let mut simple = self.reinject_buf.is_empty();
2310        let mut borrowable = self.reinject_buf.is_empty();
2311        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2312        let mut closed = false;
2313
2314        while let Some(ch) = self.peek_char() {
2315            if simple {
2316                if self.reinject_buf.is_empty() {
2317                    let rest = self.cursor.rest();
2318                    match Self::find_double_quote_special(rest) {
2319                        Some(index) if index > 0 => {
2320                            self.consume_source_bytes(index);
2321                            continue;
2322                        }
2323                        None => {
2324                            self.consume_source_bytes(rest.len());
2325                            return Err(LexerErrorKind::DoubleQuote);
2326                        }
2327                        _ => {}
2328                    }
2329                }
2330
2331                match ch {
2332                    '"' => {
2333                        content_end = self.current_position();
2334                        self.consume_ascii_chars(1); // consume closing "
2335                        closed = true;
2336                        break;
2337                    }
2338                    '\\' | '$' | '`' => {
2339                        simple = false;
2340                        if ch == '`' {
2341                            borrowable = false;
2342                            let capture_end = self.current_position();
2343                            self.ensure_capture_from_source(
2344                                &mut content,
2345                                content_start,
2346                                capture_end,
2347                            );
2348                        }
2349                    }
2350                    _ => {
2351                        self.advance();
2352                    }
2353                }
2354                if simple {
2355                    continue;
2356                }
2357            }
2358
2359            match ch {
2360                '"' => {
2361                    if borrowable {
2362                        content_end = self.current_position();
2363                    }
2364                    self.consume_ascii_chars(1); // consume closing "
2365                    closed = true;
2366                    break;
2367                }
2368                '\\' => {
2369                    let escape_start = self.current_position();
2370                    self.advance();
2371                    if let Some(next) = self.peek_char() {
2372                        match next {
2373                            '\n' => {
2374                                borrowable = false;
2375                                self.ensure_capture_from_source(
2376                                    &mut content,
2377                                    content_start,
2378                                    escape_start,
2379                                );
2380                                self.advance();
2381                            }
2382                            '$' => {
2383                                borrowable = false;
2384                                self.ensure_capture_from_source(
2385                                    &mut content,
2386                                    content_start,
2387                                    escape_start,
2388                                );
2389                                Self::push_capture_char(&mut content, '\x00');
2390                                Self::push_capture_char(&mut content, '$');
2391                                self.advance();
2392                            }
2393                            '"' | '\\' | '`' => {
2394                                borrowable = false;
2395                                self.ensure_capture_from_source(
2396                                    &mut content,
2397                                    content_start,
2398                                    escape_start,
2399                                );
2400                                if next == '\\' {
2401                                    Self::push_capture_char(&mut content, '\x00');
2402                                }
2403                                if next == '`' {
2404                                    Self::push_capture_char(&mut content, '\x00');
2405                                }
2406                                Self::push_capture_char(&mut content, next);
2407                                self.advance();
2408                                content_end = self.current_position();
2409                            }
2410                            _ => {
2411                                Self::push_capture_char(&mut content, '\\');
2412                                Self::push_capture_char(&mut content, next);
2413                                self.advance();
2414                                content_end = self.current_position();
2415                            }
2416                        }
2417                    }
2418                }
2419                '$' => {
2420                    Self::push_capture_char(&mut content, '$');
2421                    self.advance();
2422                    if self.peek_char() == Some('(') {
2423                        if self.second_char() == Some('(') {
2424                            self.read_arithmetic_expansion_into(&mut content);
2425                        } else {
2426                            Self::push_capture_char(&mut content, '(');
2427                            self.advance();
2428                            self.read_command_subst_into(&mut content);
2429                        }
2430                    } else if self.peek_char() == Some('{') {
2431                        Self::push_capture_char(&mut content, '{');
2432                        self.advance();
2433                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2434                    }
2435                    content_end = self.current_position();
2436                }
2437                '`' => {
2438                    borrowable = false;
2439                    let capture_end = self.current_position();
2440                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2441                    Self::push_capture_char(&mut content, '`');
2442                    self.advance(); // consume opening `
2443                    while let Some(c) = self.peek_char() {
2444                        Self::push_capture_char(&mut content, c);
2445                        self.advance();
2446                        if c == '`' {
2447                            break;
2448                        }
2449                        if c == '\\'
2450                            && let Some(next) = self.peek_char()
2451                        {
2452                            Self::push_capture_char(&mut content, next);
2453                            self.advance();
2454                        }
2455                    }
2456                    content_end = self.current_position();
2457                }
2458                _ => {
2459                    Self::push_capture_char(&mut content, ch);
2460                    self.advance();
2461                    content_end = self.current_position();
2462                }
2463            }
2464        }
2465
2466        if !closed {
2467            return Err(LexerErrorKind::DoubleQuote);
2468        }
2469
2470        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2471        let content_span = Some(Span::from_positions(content_start, content_end));
2472
2473        if borrowable {
2474            Ok(LexedWordSegment::borrowed_with_spans(
2475                if dollar {
2476                    LexedWordSegmentKind::DollarDoubleQuoted
2477                } else {
2478                    LexedWordSegmentKind::DoubleQuoted
2479                },
2480                &self.input[content_start.offset..content_end.offset],
2481                content_span,
2482                wrapper_span,
2483            ))
2484        } else {
2485            Ok(LexedWordSegment::owned_with_spans(
2486                if dollar {
2487                    LexedWordSegmentKind::DollarDoubleQuoted
2488                } else {
2489                    LexedWordSegmentKind::DoubleQuoted
2490                },
2491                content.unwrap_or_default(),
2492                content_span,
2493                wrapper_span,
2494            ))
2495        }
2496    }
2497
2498    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2499        debug_assert_eq!(self.peek_char(), Some('('));
2500        debug_assert_eq!(self.second_char(), Some('('));
2501
2502        Self::push_capture_char(content, '(');
2503        self.advance();
2504        Self::push_capture_char(content, '(');
2505        self.advance();
2506
2507        let mut depth = 2;
2508        while let Some(c) = self.peek_char() {
2509            match c {
2510                '\\' => {
2511                    Self::push_capture_char(content, c);
2512                    self.advance();
2513                    if let Some(next) = self.peek_char() {
2514                        Self::push_capture_char(content, next);
2515                        self.advance();
2516                    }
2517                }
2518                '\'' => {
2519                    Self::push_capture_char(content, c);
2520                    self.advance();
2521                    while let Some(quoted) = self.peek_char() {
2522                        Self::push_capture_char(content, quoted);
2523                        self.advance();
2524                        if quoted == '\'' {
2525                            break;
2526                        }
2527                    }
2528                }
2529                '"' => {
2530                    let mut escaped = false;
2531                    Self::push_capture_char(content, c);
2532                    self.advance();
2533                    while let Some(quoted) = self.peek_char() {
2534                        Self::push_capture_char(content, quoted);
2535                        self.advance();
2536                        if escaped {
2537                            escaped = false;
2538                            continue;
2539                        }
2540                        match quoted {
2541                            '\\' => escaped = true,
2542                            '"' => break,
2543                            _ => {}
2544                        }
2545                    }
2546                }
2547                '`' => {
2548                    let mut escaped = false;
2549                    Self::push_capture_char(content, c);
2550                    self.advance();
2551                    while let Some(quoted) = self.peek_char() {
2552                        Self::push_capture_char(content, quoted);
2553                        self.advance();
2554                        if escaped {
2555                            escaped = false;
2556                            continue;
2557                        }
2558                        match quoted {
2559                            '\\' => escaped = true,
2560                            '`' => break,
2561                            _ => {}
2562                        }
2563                    }
2564                }
2565                '(' => {
2566                    Self::push_capture_char(content, c);
2567                    self.advance();
2568                    depth += 1;
2569                }
2570                ')' => {
2571                    Self::push_capture_char(content, c);
2572                    self.advance();
2573                    depth -= 1;
2574                    if depth == 0 {
2575                        return true;
2576                    }
2577                }
2578                _ => {
2579                    Self::push_capture_char(content, c);
2580                    self.advance();
2581                }
2582            }
2583        }
2584
2585        false
2586    }
2587
2588    fn read_legacy_arithmetic_into(
2589        &mut self,
2590        content: &mut Option<String>,
2591        segment_start: Position,
2592    ) -> bool {
2593        let mut bracket_depth = 1;
2594
2595        while let Some(c) = self.peek_char() {
2596            match c {
2597                '\\' => {
2598                    Self::push_capture_char(content, c);
2599                    self.advance();
2600                    if let Some(next) = self.peek_char() {
2601                        Self::push_capture_char(content, next);
2602                        self.advance();
2603                    }
2604                }
2605                '\'' => {
2606                    Self::push_capture_char(content, c);
2607                    self.advance();
2608                    while let Some(quoted) = self.peek_char() {
2609                        Self::push_capture_char(content, quoted);
2610                        self.advance();
2611                        if quoted == '\'' {
2612                            break;
2613                        }
2614                    }
2615                }
2616                '"' => {
2617                    let mut escaped = false;
2618                    Self::push_capture_char(content, c);
2619                    self.advance();
2620                    while let Some(quoted) = self.peek_char() {
2621                        Self::push_capture_char(content, quoted);
2622                        self.advance();
2623                        if escaped {
2624                            escaped = false;
2625                            continue;
2626                        }
2627                        match quoted {
2628                            '\\' => escaped = true,
2629                            '"' => break,
2630                            _ => {}
2631                        }
2632                    }
2633                }
2634                '`' => {
2635                    let mut escaped = false;
2636                    Self::push_capture_char(content, c);
2637                    self.advance();
2638                    while let Some(quoted) = self.peek_char() {
2639                        Self::push_capture_char(content, quoted);
2640                        self.advance();
2641                        if escaped {
2642                            escaped = false;
2643                            continue;
2644                        }
2645                        match quoted {
2646                            '\\' => escaped = true,
2647                            '`' => break,
2648                            _ => {}
2649                        }
2650                    }
2651                }
2652                '[' => {
2653                    Self::push_capture_char(content, c);
2654                    self.advance();
2655                    bracket_depth += 1;
2656                }
2657                ']' => {
2658                    Self::push_capture_char(content, c);
2659                    self.advance();
2660                    bracket_depth -= 1;
2661                    if bracket_depth == 0 {
2662                        return true;
2663                    }
2664                }
2665                '$' => {
2666                    Self::push_capture_char(content, c);
2667                    self.advance();
2668                    if self.peek_char() == Some('(') {
2669                        if self.second_char() == Some('(') {
2670                            if !self.read_arithmetic_expansion_into(content) {
2671                                return false;
2672                            }
2673                        } else {
2674                            Self::push_capture_char(content, '(');
2675                            self.advance();
2676                            if !self.read_command_subst_into(content) {
2677                                return false;
2678                            }
2679                        }
2680                    } else if self.peek_char() == Some('{') {
2681                        Self::push_capture_char(content, '{');
2682                        self.advance();
2683                        if !self.read_param_expansion_into(content, segment_start) {
2684                            return false;
2685                        }
2686                    } else if self.peek_char() == Some('[') {
2687                        Self::push_capture_char(content, '[');
2688                        self.advance();
2689                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2690                            return false;
2691                        }
2692                    }
2693                }
2694                _ => {
2695                    Self::push_capture_char(content, c);
2696                    self.advance();
2697                }
2698            }
2699        }
2700
2701        false
2702    }
2703
2704    /// Read command substitution content after `$(`, handling nested parens and quotes.
2705    /// Appends chars to `content` and adds the closing `)`.
2706    /// `subst_depth` tracks nesting to prevent stack overflow.
2707    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2708        self.read_command_subst_into_depth(content, 0)
2709    }
2710
2711    fn flush_command_subst_keyword(
2712        current_word: &mut String,
2713        pending_case_headers: &mut usize,
2714        case_clause_depths: &mut SmallVec<[usize; 4]>,
2715        depth: usize,
2716        word_started_at_command_start: &mut bool,
2717    ) {
2718        if current_word.is_empty() {
2719            *word_started_at_command_start = false;
2720            return;
2721        }
2722
2723        match current_word.as_str() {
2724            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2725            "in" if *pending_case_headers > 0 => {
2726                *pending_case_headers -= 1;
2727                case_clause_depths.push(depth);
2728            }
2729            "esac" if *word_started_at_command_start => {
2730                case_clause_depths.pop();
2731            }
2732            _ => {}
2733        }
2734
2735        current_word.clear();
2736        *word_started_at_command_start = false;
2737    }
2738
2739    fn read_command_subst_heredoc_delimiter_into(
2740        &mut self,
2741        content: &mut Option<String>,
2742    ) -> Option<String> {
2743        while let Some(ch) = self.peek_char() {
2744            if !matches!(ch, ' ' | '\t') {
2745                break;
2746            }
2747            Self::push_capture_char(content, ch);
2748            self.advance();
2749        }
2750
2751        let mut cooked = String::new();
2752        let mut in_single = false;
2753        let mut in_double = false;
2754        let mut escaped = false;
2755        let mut saw_any = false;
2756
2757        while let Some(ch) = self.peek_char() {
2758            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2759                break;
2760            }
2761
2762            saw_any = true;
2763            Self::push_capture_char(content, ch);
2764            self.advance();
2765
2766            if escaped {
2767                cooked.push(ch);
2768                escaped = false;
2769                continue;
2770            }
2771
2772            match ch {
2773                '\\' if !in_single => escaped = true,
2774                '\'' if !in_double => in_single = !in_single,
2775                '"' if !in_single => in_double = !in_double,
2776                _ => cooked.push(ch),
2777            }
2778        }
2779
2780        saw_any.then_some(cooked)
2781    }
2782
2783    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2784        Self::push_capture_char(content, '`');
2785        self.advance();
2786        while let Some(ch) = self.peek_char() {
2787            Self::push_capture_char(content, ch);
2788            self.advance();
2789            if ch == '\\' {
2790                if let Some(esc) = self.peek_char() {
2791                    Self::push_capture_char(content, esc);
2792                    self.advance();
2793                }
2794                continue;
2795            }
2796            if ch == '`' {
2797                break;
2798            }
2799        }
2800    }
2801
2802    fn read_command_subst_pending_heredoc_into(
2803        &mut self,
2804        content: &mut Option<String>,
2805        delimiter: &str,
2806        strip_tabs: bool,
2807    ) -> bool {
2808        loop {
2809            let mut line = String::new();
2810            let mut saw_newline = false;
2811
2812            while let Some(ch) = self.peek_char() {
2813                self.advance();
2814                if ch == '\n' {
2815                    saw_newline = true;
2816                    break;
2817                }
2818                line.push(ch);
2819            }
2820
2821            Self::push_capture_str(content, &line);
2822            if saw_newline {
2823                Self::push_capture_char(content, '\n');
2824            }
2825
2826            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2827                return true;
2828            }
2829        }
2830    }
2831
2832    fn read_command_subst_into_depth(
2833        &mut self,
2834        content: &mut Option<String>,
2835        subst_depth: usize,
2836    ) -> bool {
2837        if subst_depth >= self.max_subst_depth {
2838            // Depth limit exceeded — consume until matching ')' and emit error token
2839            let mut depth = 1;
2840            while let Some(c) = self.peek_char() {
2841                self.advance();
2842                match c {
2843                    '(' => depth += 1,
2844                    ')' => {
2845                        depth -= 1;
2846                        if depth == 0 {
2847                            Self::push_capture_char(content, ')');
2848                            return true;
2849                        }
2850                    }
2851                    _ => {}
2852                }
2853            }
2854            return false;
2855        }
2856
2857        let mut depth = 1;
2858        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2859        let mut pending_case_headers = 0usize;
2860        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2861        let mut current_word = String::with_capacity(16);
2862        let mut at_command_start = true;
2863        let mut expecting_redirection_target = false;
2864        let mut current_word_started_at_command_start = false;
2865        while let Some(c) = self.peek_char() {
2866            match c {
2867                '#' if !self.should_treat_hash_as_word_char() => {
2868                    let had_word = !current_word.is_empty();
2869                    Self::flush_command_subst_keyword(
2870                        &mut current_word,
2871                        &mut pending_case_headers,
2872                        &mut case_clause_depths,
2873                        depth,
2874                        &mut current_word_started_at_command_start,
2875                    );
2876                    if had_word && expecting_redirection_target {
2877                        expecting_redirection_target = false;
2878                    }
2879                    Self::push_capture_char(content, '#');
2880                    self.advance();
2881                    while let Some(comment_ch) = self.peek_char() {
2882                        Self::push_capture_char(content, comment_ch);
2883                        self.advance();
2884                        if comment_ch == '\n' {
2885                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2886                                if !self.read_command_subst_pending_heredoc_into(
2887                                    content, &delimiter, strip_tabs,
2888                                ) {
2889                                    return false;
2890                                }
2891                            }
2892                            at_command_start = true;
2893                            expecting_redirection_target = false;
2894                            break;
2895                        }
2896                    }
2897                }
2898                '(' => {
2899                    Self::flush_command_subst_keyword(
2900                        &mut current_word,
2901                        &mut pending_case_headers,
2902                        &mut case_clause_depths,
2903                        depth,
2904                        &mut current_word_started_at_command_start,
2905                    );
2906                    depth += 1;
2907                    Self::push_capture_char(content, c);
2908                    self.advance();
2909                    at_command_start = true;
2910                    expecting_redirection_target = false;
2911                }
2912                ')' => {
2913                    Self::flush_command_subst_keyword(
2914                        &mut current_word,
2915                        &mut pending_case_headers,
2916                        &mut case_clause_depths,
2917                        depth,
2918                        &mut current_word_started_at_command_start,
2919                    );
2920                    if case_clause_depths
2921                        .last()
2922                        .is_some_and(|case_depth| *case_depth == depth)
2923                    {
2924                        Self::push_capture_char(content, ')');
2925                        self.advance();
2926                        at_command_start = true;
2927                        expecting_redirection_target = false;
2928                        continue;
2929                    }
2930                    depth -= 1;
2931                    self.advance();
2932                    if depth == 0 {
2933                        Self::push_capture_char(content, ')');
2934                        return true;
2935                    }
2936                    Self::push_capture_char(content, c);
2937                    at_command_start = false;
2938                    expecting_redirection_target = false;
2939                }
2940                '"' => {
2941                    let had_word = !current_word.is_empty();
2942                    Self::flush_command_subst_keyword(
2943                        &mut current_word,
2944                        &mut pending_case_headers,
2945                        &mut case_clause_depths,
2946                        depth,
2947                        &mut current_word_started_at_command_start,
2948                    );
2949                    if had_word && expecting_redirection_target {
2950                        expecting_redirection_target = false;
2951                    }
2952                    // Nested double-quoted string inside $()
2953                    Self::push_capture_char(content, '"');
2954                    self.advance();
2955                    while let Some(qc) = self.peek_char() {
2956                        match qc {
2957                            '"' => {
2958                                Self::push_capture_char(content, '"');
2959                                self.advance();
2960                                break;
2961                            }
2962                            '\\' => {
2963                                Self::push_capture_char(content, '\\');
2964                                self.advance();
2965                                if let Some(esc) = self.peek_char() {
2966                                    Self::push_capture_char(content, esc);
2967                                    self.advance();
2968                                }
2969                            }
2970                            '$' => {
2971                                Self::push_capture_char(content, '$');
2972                                self.advance();
2973                                if self.peek_char() == Some('(') {
2974                                    if self.second_char() == Some('(') {
2975                                        if !self.read_arithmetic_expansion_into(content) {
2976                                            return false;
2977                                        }
2978                                    } else {
2979                                        Self::push_capture_char(content, '(');
2980                                        self.advance();
2981                                        if !self
2982                                            .read_command_subst_into_depth(content, subst_depth + 1)
2983                                        {
2984                                            return false;
2985                                        }
2986                                    }
2987                                }
2988                            }
2989                            _ => {
2990                                Self::push_capture_char(content, qc);
2991                                self.advance();
2992                            }
2993                        }
2994                    }
2995                    if expecting_redirection_target {
2996                        expecting_redirection_target = false;
2997                    } else {
2998                        at_command_start = false;
2999                    }
3000                }
3001                '\'' => {
3002                    let had_word = !current_word.is_empty();
3003                    Self::flush_command_subst_keyword(
3004                        &mut current_word,
3005                        &mut pending_case_headers,
3006                        &mut case_clause_depths,
3007                        depth,
3008                        &mut current_word_started_at_command_start,
3009                    );
3010                    if had_word && expecting_redirection_target {
3011                        expecting_redirection_target = false;
3012                    }
3013                    // Single-quoted string inside $()
3014                    Self::push_capture_char(content, '\'');
3015                    self.advance();
3016                    while let Some(qc) = self.peek_char() {
3017                        Self::push_capture_char(content, qc);
3018                        self.advance();
3019                        if qc == '\'' {
3020                            break;
3021                        }
3022                    }
3023                    if expecting_redirection_target {
3024                        expecting_redirection_target = false;
3025                    } else {
3026                        at_command_start = false;
3027                    }
3028                }
3029                '`' => {
3030                    let had_word = !current_word.is_empty();
3031                    Self::flush_command_subst_keyword(
3032                        &mut current_word,
3033                        &mut pending_case_headers,
3034                        &mut case_clause_depths,
3035                        depth,
3036                        &mut current_word_started_at_command_start,
3037                    );
3038                    if had_word && expecting_redirection_target {
3039                        expecting_redirection_target = false;
3040                    }
3041                    self.read_command_subst_backtick_segment_into(content);
3042                    if expecting_redirection_target {
3043                        expecting_redirection_target = false;
3044                    } else {
3045                        at_command_start = false;
3046                    }
3047                }
3048                '$' if self.second_char() == Some('\'') => {
3049                    let had_word = !current_word.is_empty();
3050                    Self::flush_command_subst_keyword(
3051                        &mut current_word,
3052                        &mut pending_case_headers,
3053                        &mut case_clause_depths,
3054                        depth,
3055                        &mut current_word_started_at_command_start,
3056                    );
3057                    if had_word && expecting_redirection_target {
3058                        expecting_redirection_target = false;
3059                    }
3060                    Self::push_capture_char(content, '$');
3061                    self.advance();
3062                    Self::push_capture_char(content, '\'');
3063                    self.advance();
3064                    while let Some(qc) = self.peek_char() {
3065                        Self::push_capture_char(content, qc);
3066                        self.advance();
3067                        if qc == '\\' {
3068                            if let Some(esc) = self.peek_char() {
3069                                Self::push_capture_char(content, esc);
3070                                self.advance();
3071                            }
3072                            continue;
3073                        }
3074                        if qc == '\'' {
3075                            break;
3076                        }
3077                    }
3078                    if expecting_redirection_target {
3079                        expecting_redirection_target = false;
3080                    } else {
3081                        at_command_start = false;
3082                    }
3083                }
3084                '\\' => {
3085                    let had_word = !current_word.is_empty();
3086                    Self::flush_command_subst_keyword(
3087                        &mut current_word,
3088                        &mut pending_case_headers,
3089                        &mut case_clause_depths,
3090                        depth,
3091                        &mut current_word_started_at_command_start,
3092                    );
3093                    if had_word && expecting_redirection_target {
3094                        expecting_redirection_target = false;
3095                    }
3096                    Self::push_capture_char(content, '\\');
3097                    self.advance();
3098                    if let Some(esc) = self.peek_char() {
3099                        Self::push_capture_char(content, esc);
3100                        self.advance();
3101                    }
3102                    if expecting_redirection_target {
3103                        expecting_redirection_target = false;
3104                    } else {
3105                        at_command_start = false;
3106                    }
3107                }
3108                '<' if self.second_char() == Some('<') => {
3109                    let word_was_redirection_fd = current_word_started_at_command_start
3110                        && !current_word.is_empty()
3111                        && current_word.chars().all(|current| current.is_ascii_digit());
3112                    Self::flush_command_subst_keyword(
3113                        &mut current_word,
3114                        &mut pending_case_headers,
3115                        &mut case_clause_depths,
3116                        depth,
3117                        &mut current_word_started_at_command_start,
3118                    );
3119                    if word_was_redirection_fd {
3120                        at_command_start = true;
3121                    }
3122
3123                    Self::push_capture_char(content, '<');
3124                    self.advance();
3125                    Self::push_capture_char(content, '<');
3126                    self.advance();
3127
3128                    if self.peek_char() == Some('<') {
3129                        Self::push_capture_char(content, '<');
3130                        self.advance();
3131                        expecting_redirection_target = true;
3132                        continue;
3133                    }
3134
3135                    let strip_tabs = if self.peek_char() == Some('-') {
3136                        Self::push_capture_char(content, '-');
3137                        self.advance();
3138                        true
3139                    } else {
3140                        false
3141                    };
3142
3143                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3144                    {
3145                        pending_heredocs.push((delimiter, strip_tabs));
3146                        expecting_redirection_target = false;
3147                    } else {
3148                        expecting_redirection_target = true;
3149                    }
3150                }
3151                '>' | '<' => {
3152                    let word_was_redirection_fd = current_word_started_at_command_start
3153                        && !current_word.is_empty()
3154                        && current_word.chars().all(|current| current.is_ascii_digit());
3155                    Self::flush_command_subst_keyword(
3156                        &mut current_word,
3157                        &mut pending_case_headers,
3158                        &mut case_clause_depths,
3159                        depth,
3160                        &mut current_word_started_at_command_start,
3161                    );
3162                    if word_was_redirection_fd {
3163                        at_command_start = true;
3164                    }
3165                    Self::push_capture_char(content, c);
3166                    self.advance();
3167                    expecting_redirection_target = true;
3168                }
3169                '\n' => {
3170                    Self::flush_command_subst_keyword(
3171                        &mut current_word,
3172                        &mut pending_case_headers,
3173                        &mut case_clause_depths,
3174                        depth,
3175                        &mut current_word_started_at_command_start,
3176                    );
3177                    Self::push_capture_char(content, '\n');
3178                    self.advance();
3179                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3180                        if !self.read_command_subst_pending_heredoc_into(
3181                            content, &delimiter, strip_tabs,
3182                        ) {
3183                            return false;
3184                        }
3185                    }
3186                    at_command_start = true;
3187                    expecting_redirection_target = false;
3188                }
3189                _ => {
3190                    if c.is_ascii_alphanumeric() || c == '_' {
3191                        if current_word.is_empty()
3192                            && !expecting_redirection_target
3193                            && at_command_start
3194                        {
3195                            current_word_started_at_command_start = true;
3196                            at_command_start = false;
3197                        }
3198                        current_word.push(c);
3199                    } else {
3200                        let had_word = !current_word.is_empty();
3201                        Self::flush_command_subst_keyword(
3202                            &mut current_word,
3203                            &mut pending_case_headers,
3204                            &mut case_clause_depths,
3205                            depth,
3206                            &mut current_word_started_at_command_start,
3207                        );
3208                        if had_word && expecting_redirection_target {
3209                            expecting_redirection_target = false;
3210                        }
3211                        match c {
3212                            ' ' | '\t' => {}
3213                            ';' | '|' | '&' => {
3214                                at_command_start = true;
3215                                expecting_redirection_target = false;
3216                            }
3217                            _ => {
3218                                if !expecting_redirection_target {
3219                                    at_command_start = false;
3220                                }
3221                            }
3222                        }
3223                    }
3224                    Self::push_capture_char(content, c);
3225                    self.advance();
3226                }
3227            }
3228        }
3229
3230        false
3231    }
3232
3233    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3234    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3235    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3236    fn read_param_expansion_into(
3237        &mut self,
3238        content: &mut Option<String>,
3239        segment_start: Position,
3240    ) -> bool {
3241        let mut borrowable = true;
3242        let mut depth = 1;
3243        let mut literal_brace_depth = 0usize;
3244        let mut in_single = false;
3245        let mut in_double = false;
3246        let mut double_quote_depth = 0usize;
3247        while let Some(c) = self.peek_char() {
3248            if in_single {
3249                match c {
3250                    '\\' => {
3251                        let escape_start = self.current_position();
3252                        if self.second_char() == Some('"') {
3253                            self.advance();
3254                            borrowable = false;
3255                            self.ensure_capture_from_source(content, segment_start, escape_start);
3256                            Self::push_capture_char(content, '"');
3257                            self.advance();
3258                        } else {
3259                            Self::push_capture_char(content, '\\');
3260                            self.advance();
3261                        }
3262                    }
3263                    '\'' => {
3264                        Self::push_capture_char(content, c);
3265                        self.advance();
3266                        in_single = false;
3267                    }
3268                    _ => {
3269                        Self::push_capture_char(content, c);
3270                        self.advance();
3271                    }
3272                }
3273                continue;
3274            }
3275
3276            match c {
3277                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3278                    self.advance();
3279                    Self::push_capture_char(content, '}');
3280                    if depth == 1
3281                        && literal_brace_depth > 0
3282                        && self.has_later_top_level_param_expansion_closer(depth)
3283                    {
3284                        literal_brace_depth -= 1;
3285                        continue;
3286                    }
3287                    depth -= 1;
3288                    if depth == 0 {
3289                        break;
3290                    }
3291                }
3292                '{' if !in_single && !in_double => {
3293                    literal_brace_depth += 1;
3294                    Self::push_capture_char(content, '{');
3295                    self.advance();
3296                }
3297                '"' => {
3298                    // Quotes inside ${...} are part of the expansion, not string delimiters
3299                    Self::push_capture_char(content, '"');
3300                    self.advance();
3301                    in_double = !in_double;
3302                    double_quote_depth = if in_double { depth } else { 0 };
3303                }
3304                '\'' => {
3305                    Self::push_capture_char(content, '\'');
3306                    self.advance();
3307                    if !in_double {
3308                        in_single = true;
3309                    }
3310                }
3311                '\\' => {
3312                    // Inside ${...} within double quotes, same escape rules apply:
3313                    // \", \\, \$, \` produce the escaped char; others keep backslash
3314                    let escape_start = self.current_position();
3315                    self.advance();
3316                    if let Some(esc) = self.peek_char() {
3317                        match esc {
3318                            '$' => {
3319                                borrowable = false;
3320                                self.ensure_capture_from_source(
3321                                    content,
3322                                    segment_start,
3323                                    escape_start,
3324                                );
3325                                Self::push_capture_char(content, '\x00');
3326                                Self::push_capture_char(content, '$');
3327                                self.advance();
3328                            }
3329                            '"' | '\\' | '`' => {
3330                                borrowable = false;
3331                                self.ensure_capture_from_source(
3332                                    content,
3333                                    segment_start,
3334                                    escape_start,
3335                                );
3336                                Self::push_capture_char(content, esc);
3337                                self.advance();
3338                            }
3339                            '}' => {
3340                                // \} should be a literal } without closing the expansion
3341                                Self::push_capture_char(content, '\\');
3342                                Self::push_capture_char(content, '}');
3343                                self.advance();
3344                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3345                            }
3346                            _ => {
3347                                Self::push_capture_char(content, '\\');
3348                                Self::push_capture_char(content, esc);
3349                                self.advance();
3350                            }
3351                        }
3352                    } else {
3353                        Self::push_capture_char(content, '\\');
3354                    }
3355                }
3356                '$' => {
3357                    Self::push_capture_char(content, '$');
3358                    self.advance();
3359                    if self.peek_char() == Some('(') {
3360                        if self.second_char() == Some('(') {
3361                            if !self.read_arithmetic_expansion_into(content) {
3362                                borrowable = false;
3363                            }
3364                        } else {
3365                            Self::push_capture_char(content, '(');
3366                            self.advance();
3367                            self.read_command_subst_into(content);
3368                        }
3369                    } else if self.peek_char() == Some('{') {
3370                        Self::push_capture_char(content, '{');
3371                        self.advance();
3372                        borrowable &= self.read_param_expansion_into(content, segment_start);
3373                    }
3374                }
3375                _ => {
3376                    Self::push_capture_char(content, c);
3377                    self.advance();
3378                }
3379            }
3380        }
3381        borrowable
3382    }
3383
3384    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3385        let mut chars = self.lookahead_chars().peekable();
3386        let mut depth = target_depth;
3387        let mut in_single = false;
3388        let mut in_double = false;
3389        let mut double_quote_depth = 0usize;
3390
3391        while let Some(ch) = chars.next() {
3392            if in_single {
3393                match ch {
3394                    '\'' => in_single = false,
3395                    '\\' if chars.peek() == Some(&'"') => {
3396                        chars.next();
3397                    }
3398                    '\\' => {}
3399                    _ => {}
3400                }
3401                continue;
3402            }
3403
3404            if in_double {
3405                match ch {
3406                    '"' => {
3407                        in_double = false;
3408                        double_quote_depth = 0;
3409                    }
3410                    '\\' => {
3411                        chars.next();
3412                    }
3413                    '$' if chars.peek() == Some(&'{') => {
3414                        chars.next();
3415                        depth += 1;
3416                    }
3417                    '}' if depth > double_quote_depth => {
3418                        depth -= 1;
3419                    }
3420                    _ => {}
3421                }
3422                continue;
3423            }
3424
3425            match ch {
3426                '\n' if depth == target_depth => return false,
3427                '\'' => in_single = true,
3428                '"' => {
3429                    in_double = true;
3430                    double_quote_depth = depth;
3431                }
3432                '\\' => {
3433                    chars.next();
3434                }
3435                '$' if chars.peek() == Some(&'{') => {
3436                    chars.next();
3437                    depth += 1;
3438                }
3439                '}' => {
3440                    if depth == target_depth {
3441                        return true;
3442                    }
3443                    depth -= 1;
3444                }
3445                _ => {}
3446            }
3447        }
3448
3449        false
3450    }
3451
3452    /// Check if the content starting with { looks like a brace expansion
3453    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3454    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3455    /// Caps lookahead to prevent O(n^2) scanning when input
3456    /// contains many unmatched `{` characters (issue #997).
3457    fn looks_like_brace_expansion(&self) -> bool {
3458        const MAX_LOOKAHEAD: usize = 10_000;
3459
3460        let mut chars = self.lookahead_chars();
3461
3462        // Skip the opening {
3463        if chars.next() != Some('{') {
3464            return false;
3465        }
3466
3467        let mut depth = 1;
3468        let mut paren_depth = 0usize;
3469        let mut has_comma = false;
3470        let mut has_dot_dot = false;
3471        let mut escaped = false;
3472        let mut in_single = false;
3473        let mut in_double = false;
3474        let mut in_backtick = false;
3475        let mut prev_char = None;
3476        let mut scanned = 0usize;
3477
3478        for ch in chars {
3479            scanned += 1;
3480            if scanned > MAX_LOOKAHEAD {
3481                return false;
3482            }
3483
3484            let brace_surface_active = !in_single && !in_double && !in_backtick;
3485            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3486
3487            match ch {
3488                _ if escaped => {
3489                    escaped = false;
3490                }
3491                '\\' if !in_single => escaped = true,
3492                '\'' if !in_double && !in_backtick => in_single = !in_single,
3493                '"' if !in_single && !in_backtick => in_double = !in_double,
3494                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3495                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3496                    paren_depth += 1
3497                }
3498                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3499                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3500                '}' if !in_single && !in_double && !in_backtick => {
3501                    depth -= 1;
3502                    if depth == 0 {
3503                        // Found matching }, check if we have brace expansion markers
3504                        return has_comma || has_dot_dot;
3505                    }
3506                }
3507                ',' if at_top_level => has_comma = true,
3508                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3509                // Brace groups have whitespace/newlines/semicolons at depth 1
3510                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3511                _ => {}
3512            }
3513            prev_char = Some(ch);
3514        }
3515
3516        false
3517    }
3518
3519    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3520        let mut brace_depth = 1usize;
3521        let mut paren_depth = 0usize;
3522        let mut escaped = false;
3523        let mut in_single = false;
3524        let mut in_double = false;
3525        let mut in_backtick = false;
3526        let mut prev_char = None;
3527
3528        while let Some(ch) = self.peek_char() {
3529            Self::push_capture_char(word, ch);
3530            self.advance();
3531
3532            if escaped {
3533                escaped = false;
3534                prev_char = Some(ch);
3535                continue;
3536            }
3537
3538            match ch {
3539                '\\' if !in_single => escaped = true,
3540                '\'' if !in_double && !in_backtick => in_single = !in_single,
3541                '"' if !in_single && !in_backtick => in_double = !in_double,
3542                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3543                '(' if !in_single
3544                    && !in_double
3545                    && !in_backtick
3546                    && (paren_depth > 0 || prev_char == Some('$')) =>
3547                {
3548                    paren_depth += 1
3549                }
3550                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3551                    paren_depth -= 1
3552                }
3553                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3554                '}' if !in_single && !in_double && !in_backtick => {
3555                    brace_depth -= 1;
3556                    if brace_depth == 0 {
3557                        break;
3558                    }
3559                }
3560                _ => {}
3561            }
3562
3563            prev_char = Some(ch);
3564        }
3565    }
3566
3567    fn consume_brace_word_body(&mut self, word: &mut String) {
3568        let mut brace_depth = 1usize;
3569        let mut paren_depth = 0usize;
3570        let mut escaped = false;
3571        let mut in_single = false;
3572        let mut in_double = false;
3573        let mut in_backtick = false;
3574        let mut prev_char = None;
3575
3576        while let Some(ch) = self.peek_char() {
3577            word.push(ch);
3578            self.advance();
3579
3580            if escaped {
3581                escaped = false;
3582                prev_char = Some(ch);
3583                continue;
3584            }
3585
3586            match ch {
3587                '\\' if !in_single => escaped = true,
3588                '\'' if !in_double && !in_backtick => in_single = !in_single,
3589                '"' if !in_single && !in_backtick => in_double = !in_double,
3590                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3591                '(' if !in_single
3592                    && !in_double
3593                    && !in_backtick
3594                    && (paren_depth > 0 || prev_char == Some('$')) =>
3595                {
3596                    paren_depth += 1
3597                }
3598                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3599                    paren_depth -= 1
3600                }
3601                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3602                '}' if !in_single && !in_double && !in_backtick => {
3603                    brace_depth -= 1;
3604                    if brace_depth == 0 {
3605                        break;
3606                    }
3607                }
3608                _ => {}
3609            }
3610
3611            prev_char = Some(ch);
3612        }
3613    }
3614
3615    /// Check whether a mid-word `{...}` segment can stay attached to the current
3616    /// word without crossing a top-level word boundary.
3617    fn looks_like_mid_word_brace_segment(&self) -> bool {
3618        const MAX_LOOKAHEAD: usize = 10_000;
3619
3620        let mut chars = self.lookahead_chars();
3621        if chars.next() != Some('{') {
3622            return false;
3623        }
3624
3625        let mut brace_depth = 1;
3626        let mut paren_depth = 0usize;
3627        let mut escaped = false;
3628        let mut in_single = false;
3629        let mut in_double = false;
3630        let mut in_backtick = false;
3631        let mut prev_char = None;
3632        let mut scanned = 0usize;
3633
3634        for ch in chars {
3635            scanned += 1;
3636            if scanned > MAX_LOOKAHEAD {
3637                return false;
3638            }
3639
3640            if !in_single
3641                && !in_double
3642                && !in_backtick
3643                && !escaped
3644                && brace_depth == 1
3645                && paren_depth == 0
3646                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3647            {
3648                return false;
3649            }
3650
3651            if escaped {
3652                escaped = false;
3653                prev_char = Some(ch);
3654                continue;
3655            }
3656
3657            match ch {
3658                '\\' => escaped = true,
3659                '\'' if !in_double && !in_backtick => in_single = !in_single,
3660                '"' if !in_single && !in_backtick => in_double = !in_double,
3661                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3662                '(' if !in_single
3663                    && !in_double
3664                    && !in_backtick
3665                    && (paren_depth > 0 || prev_char == Some('$')) =>
3666                {
3667                    paren_depth += 1
3668                }
3669                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3670                    paren_depth -= 1
3671                }
3672                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3673                '}' if !in_single && !in_double && !in_backtick => {
3674                    brace_depth -= 1;
3675                    if brace_depth == 0 {
3676                        return true;
3677                    }
3678                }
3679                _ => {}
3680            }
3681
3682            prev_char = Some(ch);
3683        }
3684
3685        false
3686    }
3687
3688    /// Check if { is followed by whitespace (brace group start)
3689    fn is_brace_group_start(&self) -> bool {
3690        let mut chars = self.lookahead_chars();
3691        // Skip the opening {
3692        if chars.next() != Some('{') {
3693            return false;
3694        }
3695        // If next char is whitespace or newline, it's a brace group
3696        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3697    }
3698
3699    /// Check whether the text after an escaped `{` looks like a brace-expansion
3700    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3701    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3702        const MAX_LOOKAHEAD: usize = 10_000;
3703
3704        let mut chars = self.lookahead_chars();
3705        let mut depth = 1;
3706        let mut has_comma = false;
3707        let mut has_dot_dot = false;
3708        let mut prev_char = None;
3709        let mut scanned = 0usize;
3710
3711        for ch in chars.by_ref() {
3712            scanned += 1;
3713            if scanned > MAX_LOOKAHEAD {
3714                return false;
3715            }
3716            match ch {
3717                '{' => depth += 1,
3718                '}' => {
3719                    depth -= 1;
3720                    if depth == 0 {
3721                        return has_comma || has_dot_dot;
3722                    }
3723                }
3724                ',' if depth == 1 => has_comma = true,
3725                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3726                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3727                _ => {}
3728            }
3729            prev_char = Some(ch);
3730        }
3731
3732        false
3733    }
3734
3735    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3736        let mut chars = self.lookahead_chars();
3737        if chars.next() != Some('{') {
3738            return false;
3739        }
3740        chars.next() == Some(')')
3741    }
3742
3743    /// Read a {literal} pattern without comma/dot-dot as a word
3744    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3745        let mut word = String::with_capacity(16);
3746
3747        if let Some('{') = self.peek_char() {
3748            word.push('{');
3749            self.advance();
3750        } else {
3751            return None;
3752        }
3753
3754        self.consume_brace_word_body(&mut word);
3755
3756        while let Some(ch) = self.peek_char() {
3757            if Self::is_word_char(ch) {
3758                if self.reinject_buf.is_empty() {
3759                    let chunk = self.cursor.eat_while(Self::is_word_char);
3760                    word.push_str(chunk);
3761                    self.advance_scanned_source_bytes(chunk.len());
3762                } else {
3763                    word.push(ch);
3764                    self.advance();
3765                }
3766            } else {
3767                break;
3768            }
3769        }
3770
3771        Some(LexedToken::owned_word(TokenKind::Word, word))
3772    }
3773
3774    /// Read a brace expansion pattern as a word
3775    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3776        let mut word = String::with_capacity(16);
3777
3778        // Read the opening {
3779        if let Some('{') = self.peek_char() {
3780            word.push('{');
3781            self.advance();
3782        } else {
3783            return None;
3784        }
3785
3786        // Read until matching }
3787        self.consume_brace_word_body(&mut word);
3788
3789        // Continue reading any suffix after the brace pattern
3790        while let Some(ch) = self.peek_char() {
3791            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3792                if ch == '{' {
3793                    // Another brace pattern - include it
3794                    word.push(ch);
3795                    self.advance();
3796                    self.consume_brace_word_body(&mut word);
3797                } else {
3798                    word.push(ch);
3799                    self.advance();
3800                }
3801            } else {
3802                break;
3803            }
3804        }
3805
3806        Some(LexedToken::owned_word(TokenKind::Word, word))
3807    }
3808
3809    /// Peek ahead (without consuming) to see if `=(` starts an associative
3810    /// compound assignment like `([key]=val ...)`.  Returns true when the
3811    /// first non-whitespace char after `(` is `[`.
3812    fn looks_like_assoc_assign(&self) -> bool {
3813        let mut chars = self.lookahead_chars();
3814        // Skip the `(` we haven't consumed yet
3815        if chars.next() != Some('(') {
3816            return false;
3817        }
3818        // Skip optional whitespace
3819        for ch in chars {
3820            match ch {
3821                ' ' | '\t' => continue,
3822                '[' => return true,
3823                _ => return false,
3824            }
3825        }
3826        false
3827    }
3828
3829    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3830        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3831    }
3832
3833    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3834        word.segments().any(|segment| {
3835            matches!(
3836                segment.kind(),
3837                LexedWordSegmentKind::SingleQuoted
3838                    | LexedWordSegmentKind::DollarSingleQuoted
3839                    | LexedWordSegmentKind::DoubleQuoted
3840                    | LexedWordSegmentKind::DollarDoubleQuoted
3841            )
3842        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3843    }
3844
3845    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3846        text.contains(['*', '?'])
3847            || text.ends_with('}') && text.contains("${")
3848            || text.ends_with(']')
3849                && text
3850                    .rfind('[')
3851                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3852    }
3853
3854    fn is_word_char(ch: char) -> bool {
3855        !matches!(
3856            ch,
3857            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3858        )
3859    }
3860
3861    const fn is_ascii_word_byte(byte: u8) -> bool {
3862        !matches!(
3863            byte,
3864            b' ' | b'\t'
3865                | b'\n'
3866                | b';'
3867                | b'|'
3868                | b'&'
3869                | b'>'
3870                | b'<'
3871                | b'('
3872                | b')'
3873                | b'{'
3874                | b'}'
3875                | b'\''
3876                | b'"'
3877        )
3878    }
3879
3880    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3881        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3882    }
3883
3884    fn is_plain_word_char(ch: char) -> bool {
3885        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3886    }
3887
3888    /// Read here document content until the delimiter line is found
3889    pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3890        let mut content = String::with_capacity(64);
3891        let mut current_line = String::with_capacity(64);
3892
3893        // Save rest of current line (after the delimiter token on the command line).
3894        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3895        // tokenize the pipe and subsequent command after the heredoc body.
3896        //
3897        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3898        // so we track quoting state and continue across newlines until quotes close.
3899        let mut rest_of_line = String::with_capacity(32);
3900        let rest_of_line_start = self.current_position();
3901        let mut in_double_quote = false;
3902        let mut in_single_quote = false;
3903        let mut in_comment = false;
3904        let mut saw_non_whitespace_tail = false;
3905        let mut consecutive_backslashes = 0usize;
3906        let mut previous_tail_char = None;
3907        while let Some(ch) = self.peek_char() {
3908            self.advance();
3909            if in_comment {
3910                if ch == '\n' {
3911                    break;
3912                }
3913                rest_of_line.push(ch);
3914                previous_tail_char = Some(ch);
3915                continue;
3916            }
3917            if ch == '#'
3918                && !in_single_quote
3919                && !in_double_quote
3920                && self.comments_enabled()
3921                && heredoc_tail_hash_starts_comment(previous_tail_char)
3922            {
3923                in_comment = true;
3924                rest_of_line.push(ch);
3925                previous_tail_char = Some(ch);
3926                consecutive_backslashes = 0;
3927                continue;
3928            }
3929            let backslash_continues_line = ch == '\\'
3930                && !in_single_quote
3931                && self.peek_char() == Some('\n')
3932                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3933                && consecutive_backslashes.is_multiple_of(2);
3934            if backslash_continues_line {
3935                rest_of_line.push(ch);
3936                rest_of_line.push('\n');
3937                self.advance();
3938                consecutive_backslashes = 0;
3939                continue;
3940            }
3941            if ch == '\n' && !in_double_quote && !in_single_quote {
3942                break;
3943            }
3944            if ch == '"' && !in_single_quote {
3945                in_double_quote = !in_double_quote;
3946            } else if ch == '\'' && !in_double_quote {
3947                in_single_quote = !in_single_quote;
3948            } else if ch == '\\' && in_double_quote {
3949                // Escaped char inside double quotes — skip the next char too
3950                rest_of_line.push(ch);
3951                if let Some(next) = self.peek_char() {
3952                    rest_of_line.push(next);
3953                    self.advance();
3954                }
3955                continue;
3956            }
3957            rest_of_line.push(ch);
3958            if !ch.is_whitespace() {
3959                saw_non_whitespace_tail = true;
3960            }
3961            if ch == '\\' && !in_single_quote {
3962                consecutive_backslashes += 1;
3963            } else {
3964                consecutive_backslashes = 0;
3965            }
3966            previous_tail_char = Some(ch);
3967        }
3968
3969        // If we just drained a heredoc replay buffer (for example when multiple
3970        // heredocs share one command line), resume tracking from the true cursor
3971        // position before we measure the body span.
3972        self.sync_offset_to_cursor();
3973        let content_start = self.current_position();
3974        let mut current_line_start = content_start;
3975        let content_end;
3976
3977        // Read lines until we find the delimiter
3978        loop {
3979            if self.reinject_buf.is_empty() {
3980                // When the body reading drains a reinject buffer (from a
3981                // previous heredoc on the same command line), the virtual
3982                // offset drifts away from the cursor. Snap it back before
3983                // any source-based work so spans and `post_heredoc_offset`
3984                // stay within bounds.
3985                self.sync_offset_to_cursor();
3986                let rest = self.cursor.rest();
3987                if rest.is_empty() {
3988                    content_end = self.current_position();
3989                    break;
3990                }
3991
3992                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3993                let line = &rest[..line_len];
3994                let has_newline = line_len < rest.len();
3995
3996                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3997                    content_end = current_line_start;
3998                    self.consume_source_bytes(line_len);
3999                    if has_newline {
4000                        self.consume_ascii_chars(1);
4001                    }
4002                    break;
4003                }
4004
4005                content.push_str(line);
4006                self.consume_source_bytes(line_len);
4007
4008                if has_newline {
4009                    self.consume_ascii_chars(1);
4010                    content.push('\n');
4011                    current_line_start = self.current_position();
4012                    continue;
4013                }
4014
4015                content_end = self.current_position();
4016                break;
4017            }
4018
4019            match self.peek_char() {
4020                Some('\n') => {
4021                    self.advance();
4022                    // Check if current line matches delimiter
4023                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4024                        content_end = current_line_start;
4025                        break;
4026                    }
4027                    content.push_str(&current_line);
4028                    content.push('\n');
4029                    current_line.clear();
4030                    current_line_start = self.current_position();
4031                }
4032                Some(ch) => {
4033                    current_line.push(ch);
4034                    self.advance();
4035                }
4036                None => {
4037                    // End of input - check last line
4038                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4039                        content_end = current_line_start;
4040                        break;
4041                    }
4042                    if !current_line.is_empty() {
4043                        content.push_str(&current_line);
4044                    }
4045                    content_end = self.current_position();
4046                    break;
4047                }
4048            }
4049        }
4050
4051        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4052        // redirects, command words, additional heredocs) stay visible to the
4053        // parser. Always replay a terminating newline so parsing stops before
4054        // tokens that originally lived on later source lines, like `}` or `do`.
4055        let post_heredoc_offset = self.offset;
4056        self.offset = rest_of_line_start.offset;
4057        for ch in rest_of_line.chars() {
4058            self.reinject_buf.push_back(ch);
4059        }
4060        self.reinject_buf.push_back('\n');
4061        self.reinject_resume_offset = Some(post_heredoc_offset);
4062
4063        HeredocRead {
4064            content,
4065            content_span: Span::from_positions(content_start, content_end),
4066        }
4067    }
4068
4069    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4070        let mut chars = self.cursor.rest().chars();
4071        if chars.next() != Some('\n') {
4072            return false;
4073        }
4074
4075        for ch in chars {
4076            if matches!(ch, ' ' | '\t') {
4077                continue;
4078            }
4079            if ch == '\n' {
4080                return false;
4081            }
4082            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4083                || (ch == '#' && self.comments_enabled());
4084        }
4085
4086        false
4087    }
4088}
4089
4090fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4091    let line = if strip_tabs {
4092        line.trim_start_matches('\t')
4093    } else {
4094        line
4095    };
4096
4097    if line == delimiter {
4098        return true;
4099    }
4100
4101    let Some(trailing) = line.strip_prefix(delimiter) else {
4102        return false;
4103    };
4104
4105    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4106}
4107
4108fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4109    previous_tail_char.is_none_or(|prev| {
4110        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4111    })
4112}
4113
4114fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4115    let ch = input.get(index..)?.chars().next()?;
4116    Some((ch, index + ch.len_utf8()))
4117}
4118
4119fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4120    let mut index = 0usize;
4121    let mut depth = 0usize;
4122    let mut in_single = false;
4123    let mut in_double = false;
4124    let mut in_backtick = false;
4125    let mut escaped = false;
4126
4127    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4128        let was_escaped = escaped;
4129        if ch == '\\' && !in_single {
4130            escaped = !escaped;
4131            index = next_index;
4132            continue;
4133        }
4134        escaped = false;
4135
4136        match ch {
4137            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4138            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4139            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4140            '(' if !in_single
4141                && !in_double
4142                && !in_backtick
4143                && !was_escaped
4144                && prefix[next_index..].starts_with('(') =>
4145            {
4146                depth += 1;
4147                index = next_index + '('.len_utf8();
4148                continue;
4149            }
4150            ')' if !in_single
4151                && !in_double
4152                && !in_backtick
4153                && !was_escaped
4154                && prefix[next_index..].starts_with(')') =>
4155            {
4156                depth = depth.saturating_sub(1);
4157                index = next_index + ')'.len_utf8();
4158                continue;
4159            }
4160            _ => {}
4161        }
4162
4163        index = next_index;
4164    }
4165
4166    depth > 0
4167}
4168
4169fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4170    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4171    let prefix = &input[line_start..index];
4172    line_has_unclosed_double_paren(prefix)
4173}
4174
4175fn hash_starts_comment(input: &str, index: usize) -> bool {
4176    if inside_unclosed_double_paren_on_line(input, index) {
4177        return false;
4178    }
4179
4180    let next = &input[index + '#'.len_utf8()..];
4181    input[..index]
4182        .chars()
4183        .next_back()
4184        .is_none_or(|prev| match prev {
4185            '(' => {
4186                let whitespace_index = next.find(char::is_whitespace);
4187                let close_index = next.find(')');
4188
4189                match (whitespace_index, close_index) {
4190                    (Some(whitespace), Some(close)) => whitespace < close,
4191                    (Some(_), None) | (None, None) => true,
4192                    (None, Some(_)) => false,
4193                }
4194            }
4195            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4196        })
4197}
4198
4199fn heredoc_delimiter_is_terminator(
4200    ch: char,
4201    in_single: bool,
4202    in_double: bool,
4203    escaped: bool,
4204) -> bool {
4205    !in_single
4206        && !in_double
4207        && !escaped
4208        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4209}
4210
4211fn scan_double_quoted_command_substitution_segment(
4212    input: &str,
4213    mut index: usize,
4214    subst_depth: usize,
4215) -> Option<usize> {
4216    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4217        match ch {
4218            '"' => return Some(next_index),
4219            '\\' => {
4220                index = next_index;
4221                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4222                    index = escaped_next;
4223                }
4224            }
4225            '$' if input[next_index..].starts_with('{') => {
4226                let consumed = scan_command_subst_parameter_expansion_len(
4227                    &input[next_index + '{'.len_utf8()..],
4228                    subst_depth,
4229                    0,
4230                )?;
4231                index = next_index + '{'.len_utf8() + consumed;
4232            }
4233            '$' if input[next_index..].starts_with('(')
4234                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4235            {
4236                let consumed = scan_command_substitution_body_len_inner(
4237                    &input[next_index + '('.len_utf8()..],
4238                    subst_depth + 1,
4239                )?;
4240                index = next_index + '('.len_utf8() + consumed;
4241            }
4242            _ => index = next_index,
4243        }
4244    }
4245
4246    None
4247}
4248
4249fn scan_command_subst_parameter_expansion_len(
4250    input: &str,
4251    subst_depth: usize,
4252    parameter_depth: usize,
4253) -> Option<usize> {
4254    if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4255        return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4256    }
4257
4258    let mut index = 0usize;
4259    let mut in_single = false;
4260    let mut in_double = false;
4261    let mut in_ansi_c_single = false;
4262    let mut in_backtick = false;
4263    let mut escaped = false;
4264    let mut ansi_c_quote_pending = false;
4265
4266    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4267        let was_escaped = escaped;
4268        if ch == '\\' && !in_single {
4269            escaped = !escaped;
4270            index = next_index;
4271            ansi_c_quote_pending = false;
4272            continue;
4273        }
4274        escaped = false;
4275
4276        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4277            if input[next_index..].starts_with('{')
4278                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4279                    &input[next_index + '{'.len_utf8()..],
4280                    subst_depth,
4281                    parameter_depth + 1,
4282                )
4283            {
4284                index = next_index + '{'.len_utf8() + consumed;
4285                ansi_c_quote_pending = false;
4286                continue;
4287            }
4288
4289            if input[next_index..].starts_with('(')
4290                && !input[next_index + '('.len_utf8()..].starts_with('(')
4291                && let Some(consumed) = scan_command_substitution_body_len_inner(
4292                    &input[next_index + '('.len_utf8()..],
4293                    subst_depth + 1,
4294                )
4295            {
4296                index = next_index + '('.len_utf8() + consumed;
4297                ansi_c_quote_pending = false;
4298                continue;
4299            }
4300        }
4301
4302        if !in_single
4303            && !in_ansi_c_single
4304            && !in_double
4305            && !in_backtick
4306            && !was_escaped
4307            && matches!(ch, '<' | '>')
4308            && input[next_index..].starts_with('(')
4309            && let Some(consumed) = scan_command_substitution_body_len_inner(
4310                &input[next_index + '('.len_utf8()..],
4311                subst_depth + 1,
4312            )
4313        {
4314            index = next_index + '('.len_utf8() + consumed;
4315            ansi_c_quote_pending = false;
4316            continue;
4317        }
4318
4319        match ch {
4320            '\'' if !in_double && !in_backtick && !was_escaped => {
4321                if in_ansi_c_single {
4322                    in_ansi_c_single = false;
4323                } else if !in_single && ansi_c_quote_pending {
4324                    in_ansi_c_single = true;
4325                } else {
4326                    in_single = !in_single;
4327                }
4328            }
4329            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4330                in_double = !in_double
4331            }
4332            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4333                in_backtick = !in_backtick
4334            }
4335            '}' if !in_single
4336                && !in_ansi_c_single
4337                && !in_double
4338                && !in_backtick
4339                && !was_escaped =>
4340            {
4341                return Some(next_index);
4342            }
4343            _ => {}
4344        }
4345
4346        ansi_c_quote_pending = ch == '$'
4347            && !in_single
4348            && !in_ansi_c_single
4349            && !in_double
4350            && !in_backtick
4351            && !was_escaped;
4352        index = next_index;
4353    }
4354
4355    None
4356}
4357
4358fn scan_command_subst_parameter_expansion_len_balanced(
4359    input: &str,
4360    subst_depth: usize,
4361) -> Option<usize> {
4362    let mut index = 0usize;
4363    let mut brace_depth = 1usize;
4364    let mut in_single = false;
4365    let mut in_double = false;
4366    let mut in_ansi_c_single = false;
4367    let mut in_backtick = false;
4368    let mut escaped = false;
4369    let mut ansi_c_quote_pending = false;
4370
4371    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4372        let was_escaped = escaped;
4373        if ch == '\\' && !in_single {
4374            escaped = !escaped;
4375            index = next_index;
4376            ansi_c_quote_pending = false;
4377            continue;
4378        }
4379        escaped = false;
4380
4381        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4382            if input[next_index..].starts_with('{') {
4383                brace_depth = brace_depth.saturating_add(1);
4384                index = next_index + '{'.len_utf8();
4385                ansi_c_quote_pending = false;
4386                continue;
4387            }
4388
4389            if input[next_index..].starts_with('(')
4390                && !input[next_index + '('.len_utf8()..].starts_with('(')
4391                && let Some(consumed) = scan_command_substitution_body_len_inner(
4392                    &input[next_index + '('.len_utf8()..],
4393                    subst_depth + 1,
4394                )
4395            {
4396                index = next_index + '('.len_utf8() + consumed;
4397                ansi_c_quote_pending = false;
4398                continue;
4399            }
4400        }
4401
4402        if !in_single
4403            && !in_ansi_c_single
4404            && !in_double
4405            && !in_backtick
4406            && !was_escaped
4407            && matches!(ch, '<' | '>')
4408            && input[next_index..].starts_with('(')
4409            && let Some(consumed) = scan_command_substitution_body_len_inner(
4410                &input[next_index + '('.len_utf8()..],
4411                subst_depth + 1,
4412            )
4413        {
4414            index = next_index + '('.len_utf8() + consumed;
4415            ansi_c_quote_pending = false;
4416            continue;
4417        }
4418
4419        match ch {
4420            '\'' if !in_double && !in_backtick && !was_escaped => {
4421                if in_ansi_c_single {
4422                    in_ansi_c_single = false;
4423                } else if !in_single && ansi_c_quote_pending {
4424                    in_ansi_c_single = true;
4425                } else {
4426                    in_single = !in_single;
4427                }
4428            }
4429            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4430                in_double = !in_double
4431            }
4432            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4433                in_backtick = !in_backtick
4434            }
4435            '}' if !in_single
4436                && !in_ansi_c_single
4437                && !in_double
4438                && !in_backtick
4439                && !was_escaped =>
4440            {
4441                brace_depth = brace_depth.saturating_sub(1);
4442                if brace_depth == 0 {
4443                    return Some(next_index);
4444                }
4445            }
4446            _ => {}
4447        }
4448
4449        ansi_c_quote_pending = ch == '$'
4450            && !in_single
4451            && !in_ansi_c_single
4452            && !in_double
4453            && !in_backtick
4454            && !was_escaped;
4455        index = next_index;
4456    }
4457
4458    None
4459}
4460
4461fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4462    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4463        if !matches!(ch, ' ' | '\t') {
4464            break;
4465        }
4466        index = next_index;
4467    }
4468
4469    let start = index;
4470    let mut cooked = String::new();
4471    let mut in_single = false;
4472    let mut in_double = false;
4473    let mut escaped = false;
4474
4475    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4476        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4477            break;
4478        }
4479
4480        index = next_index;
4481        if escaped {
4482            cooked.push(ch);
4483            escaped = false;
4484            continue;
4485        }
4486
4487        match ch {
4488            '\\' if !in_single => escaped = true,
4489            '\'' if !in_double => in_single = !in_single,
4490            '"' if !in_single => in_double = !in_double,
4491            _ => cooked.push(ch),
4492        }
4493    }
4494
4495    (index > start).then_some((index, cooked))
4496}
4497
4498fn skip_command_subst_pending_heredoc(
4499    input: &str,
4500    mut index: usize,
4501    delimiter: &str,
4502    strip_tabs: bool,
4503) -> usize {
4504    while index <= input.len() {
4505        let rest = &input[index..];
4506        let line_len = rest.find('\n').unwrap_or(rest.len());
4507        let line = &rest[..line_len];
4508        let has_newline = line_len < rest.len();
4509
4510        index += line_len;
4511        if has_newline {
4512            index += '\n'.len_utf8();
4513        }
4514
4515        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4516            return index;
4517        }
4518    }
4519
4520    index
4521}
4522
4523fn scan_command_subst_ansi_c_single_quoted_segment(
4524    input: &str,
4525    quote_index: usize,
4526) -> Option<usize> {
4527    let mut index = quote_index + '\''.len_utf8();
4528
4529    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4530        index = next_index;
4531        if ch == '\\' {
4532            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4533                index = escaped_next;
4534            }
4535            continue;
4536        }
4537
4538        if ch == '\'' {
4539            return Some(index);
4540        }
4541    }
4542
4543    None
4544}
4545
4546fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4547    let mut index = start;
4548
4549    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4550        index = next_index;
4551        if ch == '\\' {
4552            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4553                index = escaped_next;
4554            }
4555            continue;
4556        }
4557
4558        if ch == '`' {
4559            return Some(index);
4560        }
4561    }
4562
4563    None
4564}
4565
4566fn flush_scanned_command_subst_keyword(
4567    current_word: &mut String,
4568    pending_case_headers: &mut usize,
4569    case_clause_depths: &mut SmallVec<[usize; 4]>,
4570    depth: usize,
4571    word_started_at_command_start: &mut bool,
4572) {
4573    if current_word.is_empty() {
4574        *word_started_at_command_start = false;
4575        return;
4576    }
4577
4578    match current_word.as_str() {
4579        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4580        "in" if *pending_case_headers > 0 => {
4581            *pending_case_headers -= 1;
4582            case_clause_depths.push(depth);
4583        }
4584        "esac" if *word_started_at_command_start => {
4585            case_clause_depths.pop();
4586        }
4587        _ => {}
4588    }
4589
4590    current_word.clear();
4591    *word_started_at_command_start = false;
4592}
4593
4594pub(super) fn scan_command_substitution_body_len_inner(
4595    input: &str,
4596    subst_depth: usize,
4597) -> Option<usize> {
4598    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4599        return None;
4600    }
4601
4602    let mut index = 0usize;
4603    let mut depth = 1;
4604    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4605    let mut pending_case_headers = 0usize;
4606    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4607    let mut current_word = String::with_capacity(16);
4608    let mut at_command_start = true;
4609    let mut expecting_redirection_target = false;
4610    let mut current_word_started_at_command_start = false;
4611
4612    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4613        match ch {
4614            '#' if hash_starts_comment(input, index) => {
4615                let had_word = !current_word.is_empty();
4616                flush_scanned_command_subst_keyword(
4617                    &mut current_word,
4618                    &mut pending_case_headers,
4619                    &mut case_clause_depths,
4620                    depth,
4621                    &mut current_word_started_at_command_start,
4622                );
4623                if had_word && expecting_redirection_target {
4624                    expecting_redirection_target = false;
4625                }
4626                index = next_index;
4627                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4628                    index = comment_next;
4629                    if comment_ch == '\n' {
4630                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4631                            index = skip_command_subst_pending_heredoc(
4632                                input, index, &delimiter, strip_tabs,
4633                            );
4634                        }
4635                        at_command_start = true;
4636                        expecting_redirection_target = false;
4637                        break;
4638                    }
4639                }
4640            }
4641            '(' => {
4642                flush_scanned_command_subst_keyword(
4643                    &mut current_word,
4644                    &mut pending_case_headers,
4645                    &mut case_clause_depths,
4646                    depth,
4647                    &mut current_word_started_at_command_start,
4648                );
4649                depth += 1;
4650                index = next_index;
4651                at_command_start = true;
4652                expecting_redirection_target = false;
4653            }
4654            ')' => {
4655                flush_scanned_command_subst_keyword(
4656                    &mut current_word,
4657                    &mut pending_case_headers,
4658                    &mut case_clause_depths,
4659                    depth,
4660                    &mut current_word_started_at_command_start,
4661                );
4662                if case_clause_depths
4663                    .last()
4664                    .is_some_and(|case_depth| *case_depth == depth)
4665                {
4666                    index = next_index;
4667                    at_command_start = true;
4668                    expecting_redirection_target = false;
4669                    continue;
4670                }
4671                depth -= 1;
4672                index = next_index;
4673                if depth == 0 {
4674                    return Some(index);
4675                }
4676                at_command_start = false;
4677                expecting_redirection_target = false;
4678            }
4679            '"' => {
4680                let had_word = !current_word.is_empty();
4681                flush_scanned_command_subst_keyword(
4682                    &mut current_word,
4683                    &mut pending_case_headers,
4684                    &mut case_clause_depths,
4685                    depth,
4686                    &mut current_word_started_at_command_start,
4687                );
4688                if had_word && expecting_redirection_target {
4689                    expecting_redirection_target = false;
4690                }
4691                index = scan_double_quoted_command_substitution_segment(
4692                    input,
4693                    next_index,
4694                    subst_depth,
4695                )?;
4696                if expecting_redirection_target {
4697                    expecting_redirection_target = false;
4698                } else {
4699                    at_command_start = false;
4700                }
4701            }
4702            '\'' => {
4703                let had_word = !current_word.is_empty();
4704                flush_scanned_command_subst_keyword(
4705                    &mut current_word,
4706                    &mut pending_case_headers,
4707                    &mut case_clause_depths,
4708                    depth,
4709                    &mut current_word_started_at_command_start,
4710                );
4711                if had_word && expecting_redirection_target {
4712                    expecting_redirection_target = false;
4713                }
4714                index = next_index;
4715                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4716                    index = quoted_next;
4717                    if quoted_ch == '\'' {
4718                        break;
4719                    }
4720                }
4721                if expecting_redirection_target {
4722                    expecting_redirection_target = false;
4723                } else {
4724                    at_command_start = false;
4725                }
4726            }
4727            '`' => {
4728                let had_word = !current_word.is_empty();
4729                flush_scanned_command_subst_keyword(
4730                    &mut current_word,
4731                    &mut pending_case_headers,
4732                    &mut case_clause_depths,
4733                    depth,
4734                    &mut current_word_started_at_command_start,
4735                );
4736                if had_word && expecting_redirection_target {
4737                    expecting_redirection_target = false;
4738                }
4739                index = scan_command_subst_backtick_segment(input, next_index)?;
4740                if expecting_redirection_target {
4741                    expecting_redirection_target = false;
4742                } else {
4743                    at_command_start = false;
4744                }
4745            }
4746            '$' if input[next_index..].starts_with('\'') => {
4747                let had_word = !current_word.is_empty();
4748                flush_scanned_command_subst_keyword(
4749                    &mut current_word,
4750                    &mut pending_case_headers,
4751                    &mut case_clause_depths,
4752                    depth,
4753                    &mut current_word_started_at_command_start,
4754                );
4755                if had_word && expecting_redirection_target {
4756                    expecting_redirection_target = false;
4757                }
4758                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4759                if expecting_redirection_target {
4760                    expecting_redirection_target = false;
4761                } else {
4762                    at_command_start = false;
4763                }
4764            }
4765            '\\' => {
4766                let had_word = !current_word.is_empty();
4767                flush_scanned_command_subst_keyword(
4768                    &mut current_word,
4769                    &mut pending_case_headers,
4770                    &mut case_clause_depths,
4771                    depth,
4772                    &mut current_word_started_at_command_start,
4773                );
4774                if had_word && expecting_redirection_target {
4775                    expecting_redirection_target = false;
4776                }
4777                index = next_index;
4778                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4779                    index = escaped_next;
4780                }
4781                if expecting_redirection_target {
4782                    expecting_redirection_target = false;
4783                } else {
4784                    at_command_start = false;
4785                }
4786            }
4787            '>' => {
4788                let word_was_redirection_fd = current_word_started_at_command_start
4789                    && !current_word.is_empty()
4790                    && current_word.chars().all(|current| current.is_ascii_digit());
4791                flush_scanned_command_subst_keyword(
4792                    &mut current_word,
4793                    &mut pending_case_headers,
4794                    &mut case_clause_depths,
4795                    depth,
4796                    &mut current_word_started_at_command_start,
4797                );
4798                if word_was_redirection_fd {
4799                    at_command_start = true;
4800                }
4801                index = next_index;
4802                expecting_redirection_target = true;
4803            }
4804            '<' if input[next_index..].starts_with('<') => {
4805                let word_was_redirection_fd = current_word_started_at_command_start
4806                    && !current_word.is_empty()
4807                    && current_word.chars().all(|current| current.is_ascii_digit());
4808                let had_word = !current_word.is_empty();
4809                flush_scanned_command_subst_keyword(
4810                    &mut current_word,
4811                    &mut pending_case_headers,
4812                    &mut case_clause_depths,
4813                    depth,
4814                    &mut current_word_started_at_command_start,
4815                );
4816                if had_word && expecting_redirection_target {
4817                    expecting_redirection_target = false;
4818                }
4819                if word_was_redirection_fd {
4820                    at_command_start = true;
4821                }
4822                if inside_unclosed_double_paren_on_line(input, index) {
4823                    index = next_index + '<'.len_utf8();
4824                    continue;
4825                }
4826
4827                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4828                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4829                    expecting_redirection_target = true;
4830                    continue;
4831                }
4832
4833                let strip_tabs = input[next_index..].starts_with("<-");
4834                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4835                if let Some((delimiter_index, delimiter)) =
4836                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4837                {
4838                    pending_heredocs.push((delimiter, strip_tabs));
4839                    index = delimiter_index;
4840                    expecting_redirection_target = false;
4841                } else {
4842                    index = next_index;
4843                    expecting_redirection_target = true;
4844                }
4845            }
4846            '\n' => {
4847                flush_scanned_command_subst_keyword(
4848                    &mut current_word,
4849                    &mut pending_case_headers,
4850                    &mut case_clause_depths,
4851                    depth,
4852                    &mut current_word_started_at_command_start,
4853                );
4854                index = next_index;
4855                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4856                    index =
4857                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4858                }
4859                at_command_start = true;
4860                expecting_redirection_target = false;
4861            }
4862            '$' if input[next_index..].starts_with('{') => {
4863                let had_word = !current_word.is_empty();
4864                flush_scanned_command_subst_keyword(
4865                    &mut current_word,
4866                    &mut pending_case_headers,
4867                    &mut case_clause_depths,
4868                    depth,
4869                    &mut current_word_started_at_command_start,
4870                );
4871                if had_word && expecting_redirection_target {
4872                    expecting_redirection_target = false;
4873                }
4874                let consumed = scan_command_subst_parameter_expansion_len(
4875                    &input[next_index + '{'.len_utf8()..],
4876                    subst_depth,
4877                    0,
4878                )?;
4879                index = next_index + '{'.len_utf8() + consumed;
4880                if expecting_redirection_target {
4881                    expecting_redirection_target = false;
4882                } else {
4883                    at_command_start = false;
4884                }
4885            }
4886            '$' if input[next_index..].starts_with('(')
4887                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4888            {
4889                let had_word = !current_word.is_empty();
4890                flush_scanned_command_subst_keyword(
4891                    &mut current_word,
4892                    &mut pending_case_headers,
4893                    &mut case_clause_depths,
4894                    depth,
4895                    &mut current_word_started_at_command_start,
4896                );
4897                if had_word && expecting_redirection_target {
4898                    expecting_redirection_target = false;
4899                }
4900                let consumed = scan_command_substitution_body_len_inner(
4901                    &input[next_index + '('.len_utf8()..],
4902                    subst_depth + 1,
4903                )?;
4904                index = next_index + '('.len_utf8() + consumed;
4905                if expecting_redirection_target {
4906                    expecting_redirection_target = false;
4907                } else {
4908                    at_command_start = false;
4909                }
4910            }
4911            _ => {
4912                if ch.is_ascii_alphanumeric() || ch == '_' {
4913                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4914                    {
4915                        current_word_started_at_command_start = true;
4916                        at_command_start = false;
4917                    }
4918                    current_word.push(ch);
4919                } else {
4920                    let had_word = !current_word.is_empty();
4921                    flush_scanned_command_subst_keyword(
4922                        &mut current_word,
4923                        &mut pending_case_headers,
4924                        &mut case_clause_depths,
4925                        depth,
4926                        &mut current_word_started_at_command_start,
4927                    );
4928                    if had_word && expecting_redirection_target {
4929                        expecting_redirection_target = false;
4930                    }
4931                    match ch {
4932                        ' ' | '\t' => {}
4933                        ';' | '|' | '&' => {
4934                            at_command_start = true;
4935                            expecting_redirection_target = false;
4936                        }
4937                        _ => {
4938                            if !expecting_redirection_target {
4939                                at_command_start = false;
4940                            }
4941                        }
4942                    }
4943                }
4944                index = next_index;
4945            }
4946        }
4947    }
4948
4949    None
4950}
4951
4952pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4953    scan_command_substitution_body_len_inner(input, 0)
4954}
4955
4956#[cfg(test)]
4957mod tests {
4958    use super::*;
4959
4960    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4961        match token.kind {
4962            kind if kind.is_word_like() => token.word_string(),
4963            TokenKind::Comment => token
4964                .span
4965                .slice(source)
4966                .strip_prefix('#')
4967                .map(str::to_string),
4968            TokenKind::Error => token
4969                .error_kind()
4970                .map(LexerErrorKind::message)
4971                .map(str::to_string),
4972            _ => None,
4973        }
4974    }
4975
4976    fn assert_next_token(
4977        lexer: &mut Lexer<'_>,
4978        expected_kind: TokenKind,
4979        expected_text: Option<&str>,
4980    ) {
4981        let token = lexer.next_lexed_token().unwrap();
4982        assert_eq!(token.kind, expected_kind);
4983        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4984    }
4985
4986    fn assert_next_token_with_comments(
4987        lexer: &mut Lexer<'_>,
4988        expected_kind: TokenKind,
4989        expected_text: Option<&str>,
4990    ) {
4991        let token = lexer.next_lexed_token_with_comments().unwrap();
4992        assert_eq!(token.kind, expected_kind);
4993        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4994    }
4995
4996    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4997        let mut lexer = Lexer::new(input);
4998
4999        while let Some(token) = lexer.next_lexed_token() {
5000            if token.kind == TokenKind::Newline {
5001                continue;
5002            }
5003
5004            assert_eq!(
5005                token.span.start.line, token.span.end.line,
5006                "token should stay on one line: {:?}",
5007                token
5008            );
5009        }
5010    }
5011
5012    #[test]
5013    fn test_simple_words() {
5014        let mut lexer = Lexer::new("echo hello world");
5015
5016        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5017        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5018        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5019        assert!(lexer.next_lexed_token().is_none());
5020    }
5021
5022    #[test]
5023    fn test_single_quoted_string() {
5024        let mut lexer = Lexer::new("echo 'hello world'");
5025
5026        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5027        // Single-quoted strings return LiteralWord (no variable expansion)
5028        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5029        assert!(lexer.next_lexed_token().is_none());
5030    }
5031
5032    #[test]
5033    fn test_double_quoted_string() {
5034        let mut lexer = Lexer::new("echo \"hello world\"");
5035
5036        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5037        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5038        assert!(lexer.next_lexed_token().is_none());
5039    }
5040
5041    #[test]
5042    fn test_brace_expansion_token_ignores_quoted_closers() {
5043        let mut lexer = Lexer::new("echo {\"}\",a}\n");
5044
5045        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5046        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5047        assert_next_token(&mut lexer, TokenKind::Newline, None);
5048        assert!(lexer.next_lexed_token().is_none());
5049    }
5050
5051    #[test]
5052    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5053        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5054
5055        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5056        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5057        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5058        assert_next_token(&mut lexer, TokenKind::Newline, None);
5059        assert!(lexer.next_lexed_token().is_none());
5060    }
5061
5062    #[test]
5063    fn test_double_quoted_expansion_token_keeps_source_backing() {
5064        let source = r#""$bar""#;
5065        let mut lexer = Lexer::new(source);
5066
5067        let token = lexer.next_lexed_token().unwrap();
5068        assert_eq!(token.kind, TokenKind::QuotedWord);
5069        assert_eq!(token.word_text(), Some("$bar"));
5070
5071        let word = token.word().unwrap();
5072        let segment = word.single_segment().unwrap();
5073        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5074        assert_eq!(segment.span().unwrap().slice(source), "$bar");
5075    }
5076
5077    #[test]
5078    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5079        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5080        let mut lexer = Lexer::new(source);
5081
5082        let token = lexer.next_lexed_token().unwrap();
5083        assert_eq!(token.kind, TokenKind::QuotedWord);
5084        assert_eq!(
5085            token.word_text(),
5086            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5087        );
5088    }
5089
5090    #[test]
5091    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5092        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5093        let mut lexer = Lexer::new(source);
5094
5095        let token = lexer.next_lexed_token().unwrap();
5096        assert_eq!(token.kind, TokenKind::QuotedWord);
5097        assert_eq!(
5098            token.word_text(),
5099            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5100        );
5101    }
5102
5103    #[test]
5104    fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5105        let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5106        let mut lexer = Lexer::new(source);
5107
5108        let token = lexer.next_lexed_token().unwrap();
5109        assert_eq!(token.kind, TokenKind::QuotedWord);
5110        assert_eq!(
5111            token.word_text(),
5112            Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5113        );
5114    }
5115
5116    #[test]
5117    fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5118        let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5119        let mut lexer = Lexer::new(source);
5120
5121        let token = lexer.next_lexed_token().unwrap();
5122        assert_eq!(token.kind, TokenKind::QuotedWord);
5123        assert_eq!(
5124            token.word_text(),
5125            Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5126        );
5127    }
5128
5129    #[test]
5130    fn test_mixed_word_keeps_segment_kinds() {
5131        let source = r#"foo"bar"'baz'"#;
5132        let mut lexer = Lexer::new(source);
5133
5134        let token = lexer.next_lexed_token().unwrap();
5135        assert_eq!(token.kind, TokenKind::Word);
5136
5137        let word = token.word().unwrap();
5138        let segments: Vec<_> = word
5139            .segments()
5140            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5141            .collect();
5142
5143        assert_eq!(
5144            segments,
5145            vec![
5146                (LexedWordSegmentKind::Plain, "foo".to_string()),
5147                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5148                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5149            ]
5150        );
5151        assert_eq!(word.joined_text(), "foobarbaz");
5152        assert_eq!(
5153            word.segments()
5154                .next()
5155                .and_then(LexedWordSegment::span)
5156                .unwrap()
5157                .slice(source),
5158            "foo"
5159        );
5160    }
5161
5162    #[test]
5163    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5164        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5165
5166        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5167        let body = &source[..consumed];
5168
5169        assert!(body.contains("field, direction"));
5170        assert!(body.ends_with(')'));
5171    }
5172
5173    #[test]
5174    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5175        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5176
5177        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5178        let body = &source[..consumed];
5179
5180        assert!(body.contains("printf '%s' y"));
5181        assert!(body.ends_with(')'));
5182    }
5183
5184    #[test]
5185    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5186        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5187
5188        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5189        let body = &source[..consumed];
5190
5191        assert!(body.contains("printf %s 1,2"));
5192        assert!(body.ends_with(')'));
5193    }
5194
5195    #[test]
5196    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5197        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5198
5199        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5200        let body = &source[..consumed];
5201
5202        assert!(body.contains("field, direction"));
5203        assert!(body.ends_with(')'));
5204    }
5205
5206    #[test]
5207    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5208        let source = "printf %s ${x//foo/)},1)\"";
5209
5210        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5211        let body = &source[..consumed];
5212
5213        assert!(body.contains("${x//foo/)},1"));
5214        assert!(body.ends_with(')'));
5215    }
5216
5217    #[test]
5218    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5219        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5220
5221        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5222        let body = &source[..consumed];
5223
5224        assert!(body.contains("printf %s 1,2"));
5225        assert!(body.ends_with(')'));
5226    }
5227
5228    #[test]
5229    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5230        let source = "[[ \"$buf\" == (#b)(*) ]]";
5231        let index = source.find('#').expect("expected hash");
5232
5233        assert!(!hash_starts_comment(source, index));
5234    }
5235
5236    #[test]
5237    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5238        let source = "(#comment with )";
5239        let index = source.find('#').expect("expected hash");
5240
5241        assert!(hash_starts_comment(source, index));
5242    }
5243
5244    #[test]
5245    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5246        let source = "(( #c < 256 ))";
5247        let index = source.find('#').expect("expected hash");
5248
5249        assert!(!hash_starts_comment(source, index));
5250    }
5251
5252    #[test]
5253    fn test_hash_starts_comment_respects_quoted_double_parens() {
5254        let source = "printf '((' # comment";
5255        let index = source.find('#').expect("expected hash");
5256
5257        assert!(hash_starts_comment(source, index));
5258    }
5259
5260    #[test]
5261    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5262        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5263
5264        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5265        let body = &source[..consumed];
5266
5267        assert!(body.contains("printf %s 1,2"));
5268        assert!(body.ends_with(')'));
5269    }
5270
5271    #[test]
5272    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5273        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5274
5275        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5276        let body = &source[..consumed];
5277
5278        assert!(body.contains("printf %s 1,2"));
5279        assert!(body.ends_with(')'));
5280    }
5281
5282    #[test]
5283    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5284        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5285
5286        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5287        let body = &source[..consumed];
5288
5289        assert!(body.contains("printf %s 1,2"));
5290        assert!(body.ends_with(')'));
5291    }
5292
5293    #[test]
5294    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5295        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5296
5297        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5298        let body = &source[..consumed];
5299
5300        assert!(body.contains("printf %s 1,2"));
5301        assert!(body.ends_with("))"));
5302    }
5303
5304    #[test]
5305    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5306        let source = "printf %s 1,2; echo case in)\"";
5307
5308        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5309        let body = &source[..consumed];
5310
5311        assert!(body.contains("echo case in"));
5312        assert!(body.ends_with(')'));
5313    }
5314
5315    #[test]
5316    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5317        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5318
5319        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5320        let body = &source[..consumed];
5321
5322        assert!(body.contains("$'a\\'b'"));
5323        assert!(body.contains("printf %s 1,2"));
5324        assert!(body.ends_with(')'));
5325    }
5326
5327    #[test]
5328    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5329        let source = "printf %s `echo foo)`; printf %s ok)\"";
5330
5331        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5332        let body = &source[..consumed];
5333
5334        assert!(body.contains("`echo foo)`"));
5335        assert!(body.contains("printf %s ok"));
5336        assert!(body.ends_with(')'));
5337    }
5338
5339    #[test]
5340    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5341        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5342
5343        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5344        let body = &source[..consumed];
5345
5346        assert!(body.contains("${x/`echo }`/foo)},1"));
5347        assert!(body.ends_with(')'));
5348    }
5349
5350    #[test]
5351    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5352     {
5353        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5354
5355        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5356        let body = &source[..consumed];
5357
5358        assert!(body.contains("${x/<(echo })/foo)},1"));
5359        assert!(body.ends_with(')'));
5360    }
5361
5362    #[test]
5363    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5364        let source = "printf %s 1,2; echo case in)";
5365
5366        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5367        let body = &source[..consumed];
5368
5369        assert_eq!(body, source);
5370    }
5371
5372    #[test]
5373    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5374        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5375
5376        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5377        let body = &source[..consumed];
5378
5379        assert_eq!(body, source);
5380    }
5381
5382    #[test]
5383    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5384        let source = "printf %s `echo foo)`; printf %s ok)";
5385
5386        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5387        let body = &source[..consumed];
5388
5389        assert_eq!(body, source);
5390    }
5391
5392    #[test]
5393    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5394        let source = "echo \"$line\" | cut -d' ' -f2-)";
5395
5396        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5397        let body = &source[..consumed];
5398
5399        assert_eq!(body, source);
5400    }
5401
5402    #[test]
5403    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5404        let source = "echo \"${@}\" | tr -d '[:space:]')";
5405
5406        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5407        let body = &source[..consumed];
5408
5409        assert_eq!(body, source);
5410    }
5411
5412    #[test]
5413    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5414        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5415
5416        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5417        let body = &source[..consumed];
5418
5419        assert_eq!(body, source);
5420    }
5421
5422    #[test]
5423    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5424        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5425
5426        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5427        let body = &source[..consumed];
5428
5429        assert_eq!(body, source);
5430    }
5431
5432    #[test]
5433    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5434        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5435        let mut lexer = Lexer::new(source);
5436
5437        let first = lexer.next_lexed_token().expect("expected first token");
5438        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5439        assert_eq!(first.word_string().as_deref(), Some("echo"));
5440
5441        let second = lexer.next_lexed_token().expect("expected second token");
5442        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5443        assert_eq!(
5444            second.word_string().as_deref(),
5445            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5446        );
5447    }
5448
5449    #[test]
5450    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5451        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5452        let start = source.find("$(").expect("expected command substitution") + 2;
5453        let consumed =
5454            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5455        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5456    }
5457
5458    #[test]
5459    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5460        let source = "echo $(echo $(basename $filename .fuzz))";
5461        let start = source.find("$(").expect("expected command substitution") + 2;
5462        let consumed =
5463            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5464        assert_eq!(
5465            &source[start..start + consumed],
5466            "echo $(basename $filename .fuzz))"
5467        );
5468    }
5469
5470    #[test]
5471    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5472        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5473        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5474        assert_eq!(consumed, source.len());
5475    }
5476
5477    #[test]
5478    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5479        let source = "'foo'bar";
5480        let mut lexer = Lexer::new(source);
5481
5482        let token = lexer.next_lexed_token().unwrap();
5483        assert_eq!(token.kind, TokenKind::LiteralWord);
5484
5485        let word = token.word().unwrap();
5486        let segments: Vec<_> = word
5487            .segments()
5488            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5489            .collect();
5490
5491        assert_eq!(
5492            segments,
5493            vec![
5494                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5495                (LexedWordSegmentKind::Plain, "bar".to_string()),
5496            ]
5497        );
5498        assert_eq!(word.joined_text(), "foobar");
5499        assert_eq!(
5500            word.segments()
5501                .nth(1)
5502                .and_then(LexedWordSegment::span)
5503                .unwrap()
5504                .slice(source),
5505            "bar"
5506        );
5507    }
5508
5509    #[test]
5510    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5511        let source = "$(printf hi)";
5512        let mut lexer = Lexer::new(source);
5513
5514        let token = lexer.next_lexed_token().unwrap();
5515        assert_eq!(token.kind, TokenKind::Word);
5516
5517        let word = token.word().unwrap();
5518        let segment = word.single_segment().unwrap();
5519        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5520        assert_eq!(segment.as_str(), source);
5521        assert_eq!(segment.span().unwrap().slice(source), source);
5522    }
5523
5524    #[test]
5525    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5526        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5527        let mut lexer = Lexer::new(source);
5528
5529        let token = lexer.next_lexed_token().unwrap();
5530        assert_eq!(token.kind, TokenKind::Word);
5531
5532        let word = token.word().unwrap();
5533        let segment = word.single_segment().unwrap();
5534        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5535        assert_eq!(segment.as_str(), source);
5536        assert_eq!(segment.span().unwrap().slice(source), source);
5537    }
5538
5539    #[test]
5540    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5541        let source = "\"foo\"$(printf hi)";
5542        let mut lexer = Lexer::new(source);
5543
5544        let token = lexer.next_lexed_token().unwrap();
5545        assert_eq!(token.kind, TokenKind::Word);
5546
5547        let word = token.word().unwrap();
5548        let continuation = word.segments().nth(1).unwrap();
5549        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5550        assert_eq!(continuation.as_str(), "$(printf hi)");
5551        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5552    }
5553
5554    #[test]
5555    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5556        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5557        let mut lexer = Lexer::new(source);
5558
5559        let token = lexer.next_lexed_token().unwrap();
5560        assert_eq!(token.kind, TokenKind::QuotedWord);
5561
5562        let word = token.word().unwrap();
5563        let segment = word.single_segment().unwrap();
5564        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5565        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5566        assert_eq!(
5567            segment.span().unwrap().slice(source),
5568            "${arr[$RANDOM % ${#arr[@]}]}"
5569        );
5570    }
5571
5572    #[test]
5573    fn test_ansi_c_control_escape_can_consume_quote() {
5574        let mut lexer = Lexer::new("echo $'\\c''");
5575
5576        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5577        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5578        assert!(lexer.next_lexed_token().is_none());
5579    }
5580
5581    #[test]
5582    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5583        let source = r#"out_line="${out_line//'"'/'\"'}"
5584"#;
5585        let mut lexer = Lexer::new(source);
5586
5587        assert_next_token(
5588            &mut lexer,
5589            TokenKind::Word,
5590            Some(r#"out_line=${out_line//'"'/'"'}"#),
5591        );
5592        assert_next_token(&mut lexer, TokenKind::Newline, None);
5593        assert!(lexer.next_lexed_token().is_none());
5594    }
5595
5596    #[test]
5597    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5598        let source = r#"out_line="${out_line//'"'/'\"'}"
5599echo "Error: Missing python3!"
5600cat << 'EOF' > "${pywrapper}"
5601import os
5602EOF
5603"#;
5604        let mut lexer = Lexer::new(source);
5605
5606        assert_next_token(
5607            &mut lexer,
5608            TokenKind::Word,
5609            Some(r#"out_line=${out_line//'"'/'"'}"#),
5610        );
5611        assert_next_token(&mut lexer, TokenKind::Newline, None);
5612        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5613        assert_next_token(
5614            &mut lexer,
5615            TokenKind::QuotedWord,
5616            Some("Error: Missing python3!"),
5617        );
5618        assert_next_token(&mut lexer, TokenKind::Newline, None);
5619        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5620        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5621        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5622        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5623        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5624    }
5625
5626    #[test]
5627    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5628        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5629        let mut lexer = Lexer::new(source);
5630
5631        let token = lexer.next_lexed_token().unwrap();
5632        assert_eq!(token.kind, TokenKind::Word);
5633        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5634        assert!(token.source_slice(source).is_none());
5635        assert_eq!(
5636            token.word_string().as_deref(),
5637            Some("crypt=${crypt//\\/\\\\}")
5638        );
5639        assert_next_token(&mut lexer, TokenKind::Newline, None);
5640        assert!(lexer.next_lexed_token().is_none());
5641    }
5642
5643    #[test]
5644    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5645        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5646        let mut lexer = Lexer::new(source);
5647
5648        assert_next_token(
5649            &mut lexer,
5650            TokenKind::Word,
5651            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5652        );
5653        assert_next_token(&mut lexer, TokenKind::Newline, None);
5654        assert_next_token(&mut lexer, TokenKind::Newline, None);
5655        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5656        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5657        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5658        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5659        assert_next_token(&mut lexer, TokenKind::Newline, None);
5660        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5661        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5662        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5663        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5664        assert_next_token(&mut lexer, TokenKind::Newline, None);
5665        assert_next_token(
5666            &mut lexer,
5667            TokenKind::Word,
5668            Some(
5669                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5670            ),
5671        );
5672        assert_next_token(&mut lexer, TokenKind::Newline, None);
5673        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5674        assert_next_token(&mut lexer, TokenKind::Newline, None);
5675        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5676        assert_next_token(&mut lexer, TokenKind::Newline, None);
5677        assert!(lexer.next_lexed_token().is_none());
5678    }
5679
5680    #[test]
5681    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5682        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5683        let mut lexer = Lexer::new(source);
5684
5685        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5686        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5687        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5688        assert_next_token(&mut lexer, TokenKind::Newline, None);
5689        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5690        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5691        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5692        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5693        assert_next_token(&mut lexer, TokenKind::Newline, None);
5694        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5695        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5696        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5697        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5698        assert_next_token(&mut lexer, TokenKind::Newline, None);
5699        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5700        assert_next_token(&mut lexer, TokenKind::Newline, None);
5701        assert!(lexer.next_lexed_token().is_none());
5702    }
5703
5704    #[test]
5705    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5706        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5707        let mut lexer = Lexer::new(source);
5708
5709        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5710        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5711        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5712        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5713        assert_next_token(&mut lexer, TokenKind::And, None);
5714        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5715        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5716        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5717        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5718        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5719        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5720        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5721        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5722        assert_next_token(&mut lexer, TokenKind::Newline, None);
5723        assert!(lexer.next_lexed_token().is_none());
5724    }
5725
5726    #[test]
5727    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5728        let source = "echo -{$(echo a),b}-\n";
5729        let mut lexer = Lexer::new(source);
5730
5731        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5732        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5733        assert_next_token(&mut lexer, TokenKind::Newline, None);
5734        assert!(lexer.next_lexed_token().is_none());
5735    }
5736
5737    #[test]
5738    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5739        let source = "echo -{$((1 + 2)),b}-\n";
5740        let mut lexer = Lexer::new(source);
5741
5742        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5743        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5744        assert_next_token(&mut lexer, TokenKind::Newline, None);
5745        assert!(lexer.next_lexed_token().is_none());
5746    }
5747
5748    #[test]
5749    fn test_operators() {
5750        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5751
5752        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5753        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5754        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5755        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5756        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5757        assert_next_token(&mut lexer, TokenKind::And, None);
5758        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5759        assert_next_token(&mut lexer, TokenKind::Or, None);
5760        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5761        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5762        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5763        assert_next_token(&mut lexer, TokenKind::Background, None);
5764        assert!(lexer.next_lexed_token().is_none());
5765    }
5766
5767    #[test]
5768    fn test_double_left_bracket_requires_separator() {
5769        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5770
5771        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5772        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5773        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5774        assert_next_token(&mut lexer, TokenKind::Newline, None);
5775        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5776        assert_next_token(&mut lexer, TokenKind::Newline, None);
5777        assert!(lexer.next_lexed_token().is_none());
5778    }
5779
5780    #[test]
5781    fn test_redirects() {
5782        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5783
5784        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5785        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5786        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5787        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5788        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5789        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5790        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5791        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5792        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5793        let token = lexer.next_lexed_token().unwrap();
5794        assert_eq!(token.kind, TokenKind::Clobber);
5795        assert_eq!(token.fd_value(), Some(2));
5796        assert_eq!(token_text(&token, lexer.input), None);
5797        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5798        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5799        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5800        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5801        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5802        assert_next_token(&mut lexer, TokenKind::HereString, None);
5803        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5804        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5805        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5806        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5807        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5808    }
5809
5810    #[test]
5811    fn test_comment() {
5812        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5813
5814        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5815        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5816        assert_next_token(&mut lexer, TokenKind::Newline, None);
5817        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5818        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5819    }
5820
5821    #[test]
5822    fn test_comment_token_with_span() {
5823        let mut lexer = Lexer::new("# lead\necho hi # tail");
5824
5825        let comment = lexer.next_lexed_token_with_comments().unwrap();
5826        assert_eq!(comment.kind, TokenKind::Comment);
5827        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5828        assert_eq!(comment.span.start.line, 1);
5829        assert_eq!(comment.span.start.column, 1);
5830        assert_eq!(comment.span.end.line, 1);
5831        assert_eq!(comment.span.end.column, 7);
5832
5833        assert_next_token(&mut lexer, TokenKind::Newline, None);
5834        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5835        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5836
5837        let inline = lexer.next_lexed_token_with_comments().unwrap();
5838        assert_eq!(inline.kind, TokenKind::Comment);
5839        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5840        assert_eq!(inline.span.start.line, 2);
5841        assert_eq!(inline.span.start.column, 9);
5842    }
5843
5844    #[test]
5845    fn test_comment_token_preserves_hash_boundaries() {
5846        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5847
5848        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5849        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5850        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5851        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5852        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5853        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5854        assert!(lexer.next_lexed_token_with_comments().is_none());
5855    }
5856
5857    #[test]
5858    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5859        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5860
5861        let mut saw_comment = false;
5862        while let Some(token) = lexer.next_lexed_token_with_comments() {
5863            if token.kind == TokenKind::Comment {
5864                saw_comment = true;
5865                break;
5866            }
5867        }
5868
5869        assert!(
5870            !saw_comment,
5871            "zsh inline glob controls inside [[ ]] should not lex as comments"
5872        );
5873    }
5874
5875    #[test]
5876    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5877        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5878
5879        let mut saw_comment = false;
5880        while let Some(token) = lexer.next_lexed_token_with_comments() {
5881            if token.kind == TokenKind::Comment {
5882                saw_comment = true;
5883                break;
5884            }
5885        }
5886
5887        assert!(
5888            !saw_comment,
5889            "zsh arithmetic char literals inside (( )) should not lex as comments"
5890        );
5891    }
5892
5893    #[test]
5894    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5895        let mut lexer = Lexer::new(
5896            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5897        );
5898
5899        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5900        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5901        assert_next_token(
5902            &mut lexer,
5903            TokenKind::LiteralWord,
5904            Some("\\e]133;C;cmdline_url=%s\\a"),
5905        );
5906        assert_next_token(
5907            &mut lexer,
5908            TokenKind::QuotedWord,
5909            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5910        );
5911        assert_next_token(&mut lexer, TokenKind::Newline, None);
5912    }
5913
5914    #[test]
5915    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5916        let mut lexer = Lexer::new(
5917            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5918        );
5919
5920        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5921        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5922        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5923        assert_next_token(&mut lexer, TokenKind::Newline, None);
5924        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5925        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5926        assert_next_token(
5927            &mut lexer,
5928            TokenKind::LiteralWord,
5929            Some("\\e]133;C;cmdline_url=%s\\a"),
5930        );
5931        assert_next_token(
5932            &mut lexer,
5933            TokenKind::QuotedWord,
5934            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5935        );
5936        assert_next_token(&mut lexer, TokenKind::Newline, None);
5937        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5938        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5939        assert_next_token(&mut lexer, TokenKind::Newline, None);
5940    }
5941
5942    #[test]
5943    fn test_variable_words() {
5944        let mut lexer = Lexer::new("echo $HOME $USER");
5945
5946        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5947        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5948        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5949        assert!(lexer.next_lexed_token().is_none());
5950    }
5951
5952    #[test]
5953    fn test_pipeline_tokens() {
5954        let mut lexer = Lexer::new("echo hello | cat");
5955
5956        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5957        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5958        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5959        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5960        assert!(lexer.next_lexed_token().is_none());
5961    }
5962
5963    #[test]
5964    fn test_read_heredoc() {
5965        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5966        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5967        let content = lexer.read_heredoc("EOF", false);
5968        assert_eq!(content.content, "hello\nworld\n");
5969    }
5970
5971    #[test]
5972    fn test_read_heredoc_single_line() {
5973        let mut lexer = Lexer::new("\ntest\nEOF");
5974        let content = lexer.read_heredoc("EOF", false);
5975        assert_eq!(content.content, "test\n");
5976    }
5977
5978    #[test]
5979    fn test_read_heredoc_full_scenario() {
5980        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5981        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5982
5983        // Parser would read these tokens
5984        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5985        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5986        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5987
5988        // Now read heredoc content
5989        let content = lexer.read_heredoc("EOF", false);
5990        assert_eq!(content.content, "hello\nworld\n");
5991    }
5992
5993    #[test]
5994    fn test_read_heredoc_with_redirect() {
5995        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5996        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5997        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5998        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5999        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6000        let content = lexer.read_heredoc("EOF", false);
6001        assert_eq!(content.content, "hello\n");
6002        // The redirect tokens are now available from the lexer
6003        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6004        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
6005    }
6006
6007    #[test]
6008    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
6009        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
6010        let mut lexer = Lexer::new(source);
6011
6012        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6013        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6014        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6015
6016        let heredoc = lexer.read_heredoc("EOF", false);
6017        assert_eq!(heredoc.content, "hello\n");
6018
6019        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6020        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6021        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6022        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6023        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6024        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6025        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6026    }
6027
6028    #[test]
6029    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6030        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6031        let mut lexer = Lexer::new(source);
6032
6033        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6034        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6035        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6036
6037        let heredoc = lexer.read_heredoc("EOF", false);
6038        assert_eq!(heredoc.content, "1\n2\n3\n");
6039    }
6040
6041    #[test]
6042    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6043        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6044        let mut lexer = Lexer::new(source);
6045
6046        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6047        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6048        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6049
6050        let heredoc = lexer.read_heredoc("EOF", false);
6051        assert_eq!(heredoc.content, "body\n");
6052    }
6053
6054    #[test]
6055    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6056        let source = "cat <<EOF # note \\\nbody\nEOF\n";
6057        let mut lexer = Lexer::new(source);
6058
6059        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6060        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6061        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6062
6063        let heredoc = lexer.read_heredoc("EOF", false);
6064        assert_eq!(heredoc.content, "body\n");
6065    }
6066
6067    #[test]
6068    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6069        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6070        let mut lexer = Lexer::new(source);
6071
6072        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6073        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6074        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6075        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6076
6077        let heredoc = lexer.read_heredoc("EOF", false);
6078        assert_eq!(heredoc.content, "body\n");
6079
6080        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6081    }
6082
6083    #[test]
6084    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6085        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6086        let mut lexer = Lexer::new(source);
6087
6088        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6089        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6090        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6091
6092        let heredoc = lexer.read_heredoc("EOF", false);
6093        assert_eq!(heredoc.content, "1\n");
6094
6095        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6096        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6097    }
6098
6099    #[test]
6100    fn test_read_heredoc_with_redirect_preserves_following_spans() {
6101        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6102        let mut lexer = Lexer::new(source);
6103
6104        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6105        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6106        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6107
6108        let heredoc = lexer.read_heredoc("EOF", false);
6109        assert_eq!(heredoc.content, "hello\n");
6110
6111        let redirect = lexer.next_lexed_token_with_comments().unwrap();
6112        assert_eq!(redirect.kind, TokenKind::RedirectOut);
6113        assert_eq!(redirect.span.slice(source), ">");
6114
6115        let target = lexer.next_lexed_token_with_comments().unwrap();
6116        assert_eq!(target.kind, TokenKind::Word);
6117        assert_eq!(
6118            token_text(&target, lexer.input).as_deref(),
6119            Some("file.txt")
6120        );
6121        assert_eq!(target.span.slice(source), "file.txt");
6122
6123        let newline = lexer.next_lexed_token_with_comments().unwrap();
6124        assert_eq!(newline.kind, TokenKind::Newline);
6125        assert_eq!(newline.span.slice(source), "\n");
6126
6127        let comment = lexer.next_lexed_token_with_comments().unwrap();
6128        assert_eq!(comment.kind, TokenKind::Comment);
6129        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6130        assert_eq!(comment.span.slice(source), "# done");
6131    }
6132
6133    #[test]
6134    fn test_comment_with_unicode() {
6135        // Comment containing multi-byte UTF-8 characters
6136        let source = "# café résumé\necho ok";
6137        let mut lexer = Lexer::new(source);
6138
6139        let comment = lexer.next_lexed_token_with_comments().unwrap();
6140        assert_eq!(comment.kind, TokenKind::Comment);
6141        assert_eq!(
6142            token_text(&comment, lexer.input).as_deref(),
6143            Some(" café résumé")
6144        );
6145        // Span should cover exactly the comment bytes (including #)
6146        let start = comment.span.start.offset;
6147        let end = comment.span.end.offset;
6148        assert_eq!(start, 0);
6149        assert_eq!(&source[start..end], "# café résumé");
6150        assert!(source.is_char_boundary(start));
6151        assert!(source.is_char_boundary(end));
6152
6153        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6154        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6155    }
6156
6157    #[test]
6158    fn test_comment_with_cjk_characters() {
6159        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
6160        let source = "# 你好世界\necho ok";
6161        let mut lexer = Lexer::new(source);
6162
6163        let comment = lexer.next_lexed_token_with_comments().unwrap();
6164        assert_eq!(comment.kind, TokenKind::Comment);
6165        assert_eq!(
6166            token_text(&comment, lexer.input).as_deref(),
6167            Some(" 你好世界")
6168        );
6169        let start = comment.span.start.offset;
6170        let end = comment.span.end.offset;
6171        assert_eq!(&source[start..end], "# 你好世界");
6172        assert!(source.is_char_boundary(start));
6173        assert!(source.is_char_boundary(end));
6174    }
6175
6176    #[test]
6177    fn test_heredoc_with_comments_inside() {
6178        // Comments inside heredoc body should NOT appear as comment tokens
6179        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6180        let mut lexer = Lexer::new(source);
6181
6182        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6183        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6184        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6185
6186        let heredoc = lexer.read_heredoc("EOF", false);
6187        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6188
6189        // After heredoc, replayed line termination should appear before
6190        // tokens from following source lines.
6191        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6192        let comment = lexer.next_lexed_token_with_comments().unwrap();
6193        assert_eq!(comment.kind, TokenKind::Comment);
6194        assert_eq!(
6195            token_text(&comment, lexer.input).as_deref(),
6196            Some(" real comment")
6197        );
6198    }
6199
6200    #[test]
6201    fn test_heredoc_with_hash_in_variable() {
6202        // ${var#pattern} inside heredoc should not produce comment tokens
6203        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6204        let mut lexer = Lexer::new(source);
6205
6206        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6207        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6208        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6209
6210        let heredoc = lexer.read_heredoc("EOF", false);
6211        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6212    }
6213
6214    #[test]
6215    fn test_heredoc_span_does_not_leak() {
6216        // Heredoc content span must be within source bounds and must not
6217        // overlap with content before or after.
6218        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6219        let mut lexer = Lexer::new(source);
6220
6221        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6222        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6223        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6224
6225        let heredoc = lexer.read_heredoc("EOF", false);
6226        let start = heredoc.content_span.start.offset;
6227        let end = heredoc.content_span.end.offset;
6228        assert!(
6229            end <= source.len(),
6230            "heredoc span end ({end}) exceeds source length ({})",
6231            source.len()
6232        );
6233        assert_eq!(&source[start..end], "hello\nworld\n");
6234
6235        // Tokens after heredoc should still parse correctly
6236        assert_next_token(&mut lexer, TokenKind::Newline, None);
6237        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6238        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6239    }
6240
6241    #[test]
6242    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6243        let source = "\
6244cat <<\\_ACEOF
6245Use these variables to override the choices made by `configure' or to help
6246it to find libraries and programs with nonstandard names/locations.
6247_ACEOF
6248ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6249ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6250";
6251        let mut lexer = Lexer::new(source);
6252
6253        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6254        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6255        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6256        assert_eq!(delimiter.kind, TokenKind::Word);
6257        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6258
6259        let heredoc = lexer.read_heredoc("_ACEOF", false);
6260        assert_eq!(
6261            heredoc.content,
6262            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6263        );
6264
6265        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6266
6267        let first = lexer.next_lexed_token_with_comments().unwrap();
6268        assert_eq!(first.kind, TokenKind::Word);
6269        assert_eq!(
6270            first.span.slice(source),
6271            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6272        );
6273        let first_segments = first
6274            .word()
6275            .unwrap()
6276            .segments()
6277            .map(|segment| {
6278                (
6279                    segment.kind(),
6280                    segment.as_str().to_string(),
6281                    segment.span().map(|span| span.slice(source).to_string()),
6282                )
6283            })
6284            .collect::<Vec<_>>();
6285        assert_eq!(
6286            first_segments,
6287            vec![
6288                (
6289                    LexedWordSegmentKind::Plain,
6290                    "ac_dir_suffix=/".to_string(),
6291                    Some("ac_dir_suffix=/".to_string()),
6292                ),
6293                (
6294                    LexedWordSegmentKind::Plain,
6295                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6296                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6297                ),
6298            ]
6299        );
6300
6301        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6302
6303        let second = lexer.next_lexed_token_with_comments().unwrap();
6304        assert_eq!(second.kind, TokenKind::Word);
6305        assert_eq!(
6306            second.span.slice(source),
6307            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6308        );
6309        let second_segments = second
6310            .word()
6311            .unwrap()
6312            .segments()
6313            .map(|segment| {
6314                (
6315                    segment.kind(),
6316                    segment.as_str().to_string(),
6317                    segment.span().map(|span| span.slice(source).to_string()),
6318                )
6319            })
6320            .collect::<Vec<_>>();
6321        assert_eq!(
6322            second_segments,
6323            vec![
6324                (
6325                    LexedWordSegmentKind::Plain,
6326                    "ac_top_builddir_sub=".to_string(),
6327                    Some("ac_top_builddir_sub=".to_string()),
6328                ),
6329                (
6330                    LexedWordSegmentKind::Plain,
6331                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6332                    Some(
6333                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6334                            .to_string(),
6335                    ),
6336                ),
6337            ]
6338        );
6339    }
6340
6341    #[test]
6342    fn test_heredoc_with_unicode_content() {
6343        // Heredoc containing multi-byte characters; spans must be on char boundaries
6344        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6345        let mut lexer = Lexer::new(source);
6346
6347        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6348        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6349        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6350
6351        let heredoc = lexer.read_heredoc("EOF", false);
6352        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6353        let start = heredoc.content_span.start.offset;
6354        let end = heredoc.content_span.end.offset;
6355        assert!(
6356            source.is_char_boundary(start),
6357            "heredoc span start ({start}) not on char boundary"
6358        );
6359        assert!(
6360            source.is_char_boundary(end),
6361            "heredoc span end ({end}) not on char boundary"
6362        );
6363        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6364    }
6365
6366    #[test]
6367    fn test_assoc_compound_assignment() {
6368        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6369        // assignment as a single Word token
6370        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6371        assert_next_token(
6372            &mut lexer,
6373            TokenKind::Word,
6374            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6375        );
6376        assert!(lexer.next_lexed_token().is_none());
6377    }
6378
6379    #[test]
6380    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6381        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6382        let mut lexer = Lexer::new(source);
6383
6384        let token = lexer.next_lexed_token().unwrap();
6385        assert_eq!(token.kind, TokenKind::Word);
6386        assert_eq!(token.span.slice(source), source);
6387        assert!(lexer.next_lexed_token().is_none());
6388    }
6389
6390    #[test]
6391    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6392        let source = r#"foo\_bar@(baz|qux)"#;
6393        let mut lexer = Lexer::new(source);
6394
6395        let token = lexer.next_lexed_token().unwrap();
6396        assert_eq!(token.kind, TokenKind::Word);
6397        assert_eq!(token.span.slice(source), source);
6398        assert!(lexer.next_lexed_token().is_none());
6399    }
6400
6401    #[test]
6402    fn test_indexed_array_not_collapsed() {
6403        // arr=("hello world") should NOT be collapsed — parser handles
6404        // quoted elements token-by-token via the LeftParen path
6405        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6406        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6407        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6408    }
6409
6410    #[test]
6411    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6412        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6413        let mut lexer = Lexer::new(source);
6414
6415        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6416        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6417
6418        let token = lexer.next_lexed_token().unwrap();
6419        assert_eq!(token.kind, TokenKind::Word);
6420        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6421
6422        let word = token.word().unwrap();
6423        let segments: Vec<_> = word
6424            .segments()
6425            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6426            .collect();
6427        assert_eq!(
6428            segments,
6429            vec![
6430                (
6431                    LexedWordSegmentKind::DoubleQuoted,
6432                    "$plugin_dir".to_string()
6433                ),
6434                (LexedWordSegmentKind::Plain, "/*".to_string()),
6435                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6436            ]
6437        );
6438
6439        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6440        assert!(lexer.next_lexed_token().is_none());
6441    }
6442
6443    #[test]
6444    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6445        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6446        let mut lexer = Lexer::new(source);
6447
6448        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6449        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6450
6451        let token = lexer.next_lexed_token().unwrap();
6452        assert_eq!(token.kind, TokenKind::Word);
6453        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6454
6455        let word = token.word().unwrap();
6456        let segments: Vec<_> = word
6457            .segments()
6458            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6459            .collect();
6460        assert_eq!(
6461            segments,
6462            vec![
6463                (
6464                    LexedWordSegmentKind::DoubleQuoted,
6465                    "$__GREP_CACHE_FILE".to_string()
6466                ),
6467                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6468            ]
6469        );
6470
6471        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6472        assert!(lexer.next_lexed_token().is_none());
6473    }
6474
6475    #[test]
6476    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6477        let source = r#"$dir/${~pats}(N)"#;
6478        let mut lexer = Lexer::new(source);
6479
6480        let token = lexer.next_lexed_token().unwrap();
6481        assert_eq!(token.kind, TokenKind::Word);
6482        assert_eq!(token.span.slice(source), source);
6483        assert!(lexer.next_lexed_token().is_none());
6484    }
6485
6486    #[test]
6487    fn test_dollar_word_does_not_absorb_function_parens() {
6488        let mut lexer = Lexer::new(r#"foo$x()"#);
6489
6490        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6491        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6492        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6493        assert!(lexer.next_lexed_token().is_none());
6494    }
6495
6496    #[test]
6497    fn test_command_substitution_word_does_not_absorb_function_parens() {
6498        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6499
6500        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6501        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6502        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6503        assert!(lexer.next_lexed_token().is_none());
6504    }
6505
6506    /// Regression test for fuzz crash: single digit at EOF should not panic
6507    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6508    #[test]
6509    fn test_digit_at_eof_no_panic() {
6510        // A lone digit with no following redirect operator must not panic
6511        let mut lexer = Lexer::new("2");
6512        let token = lexer.next_lexed_token();
6513        assert!(token.is_some());
6514    }
6515
6516    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6517    #[test]
6518    fn test_nested_brace_expansion_single_token() {
6519        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6520        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6521        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6522        // No more tokens — everything was consumed
6523        assert!(lexer.next_lexed_token().is_none());
6524    }
6525
6526    /// Simple ${var} still works after brace depth change.
6527    #[test]
6528    fn test_simple_brace_expansion_unchanged() {
6529        let mut lexer = Lexer::new("${foo}");
6530        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6531        assert!(lexer.next_lexed_token().is_none());
6532    }
6533
6534    #[test]
6535    fn test_nvm_fixture_lexes_without_stalling() {
6536        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6537        let mut lexer = Lexer::new(input);
6538        let mut tokens = 0usize;
6539
6540        while lexer.next_lexed_token().is_some() {
6541            tokens += 1;
6542            assert!(
6543                tokens < 100_000,
6544                "lexer should continue making progress on the nvm fixture"
6545            );
6546        }
6547
6548        assert!(tokens > 0, "nvm fixture should produce at least one token");
6549    }
6550
6551    #[test]
6552    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6553        let input = concat!(
6554            "case \"${_input_type:-}\" in\n",
6555            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6556            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6557            "esac\n",
6558        );
6559
6560        assert_non_newline_tokens_stay_on_one_line(input);
6561
6562        let mut lexer = Lexer::new(input);
6563        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6564            .map(|token| (token.kind, token_text(&token, input)))
6565            .collect::<Vec<_>>();
6566        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6567        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6568    }
6569
6570    #[test]
6571    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6572        let input = concat!(
6573            "case $2 in\n",
6574            "  cygwin*) bin='cygwin32/bin' ;|\n",
6575            "esac\n",
6576        );
6577
6578        let mut lexer = Lexer::new(input);
6579        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6580            .map(|token| (token.kind, token_text(&token, input)))
6581            .collect::<Vec<_>>();
6582
6583        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6584        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6585        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6586    }
6587
6588    #[test]
6589    fn test_inline_if_with_array_append_stays_line_local() {
6590        let input = concat!(
6591            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6592            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6593        );
6594
6595        assert_non_newline_tokens_stay_on_one_line(input);
6596    }
6597
6598    #[test]
6599    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6600        let source = "unsetopt interactive_comments\n#literal\n";
6601        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6602        let mut lexer = Lexer::with_profile(source, &profile);
6603
6604        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6605        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6606        assert_next_token(&mut lexer, TokenKind::Newline, None);
6607        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6608    }
6609
6610    #[test]
6611    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6612        let source = "setopt rc_quotes\nprint 'a''b'\n";
6613        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6614        let mut lexer = Lexer::with_profile(source, &profile);
6615
6616        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6617        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6618        assert_next_token(&mut lexer, TokenKind::Newline, None);
6619        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6620        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6621    }
6622
6623    #[test]
6624    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6625        let source = "setopt ignore_braces\n{ echo }\n";
6626        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6627        let mut lexer = Lexer::with_profile(source, &profile);
6628
6629        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6630        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6631        assert_next_token(&mut lexer, TokenKind::Newline, None);
6632        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6633        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6634        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6635    }
6636
6637    #[test]
6638    fn test_heredoc_in_arithmetic_fuzz_crash() {
6639        // Regression test: the fuzzer found that heredoc re-injection inside
6640        // arithmetic context can push self.offset past self.input.len(),
6641        // causing a panic in read_unquoted_segment's borrowed-slice path.
6642        let data: &[u8] = &[
6643            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6644            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6645            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6646            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6647            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6648            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6649            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6650            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6651            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6652            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6653            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6654            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6655            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6656            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6657            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6658            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6659            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6660            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6661            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6662        ];
6663        let input = std::str::from_utf8(data).unwrap();
6664        let script = format!("echo $(({input}))\n");
6665        // Must not panic.
6666        let _ = crate::parser::Parser::new(&script).parse();
6667    }
6668}
shuck_parser/parser/lexer.rs

shuck_parser/parser/
lexer.rs