shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub(crate) fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub(crate) const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub(crate) fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub(crate) fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub(crate) fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub(crate) const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333///
334/// Public consumers can inspect the token kind and source span. Word payloads,
335/// descriptor payloads, and lexer recovery details are currently parser-internal
336/// so the lexer can evolve without expanding the public API.
337#[derive(Debug, Clone, PartialEq, Eq)]
338pub struct LexedToken<'a> {
339    /// Token kind used by the parser.
340    pub kind: TokenKind,
341    /// Source span covered by the token.
342    pub span: Span,
343    pub(crate) flags: TokenFlags,
344    payload: TokenPayload<'a>,
345}
346
347impl<'a> LexedToken<'a> {
348    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
349        match kind {
350            TokenKind::Word => LexedWordSegmentKind::Plain,
351            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
352            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
353            _ => LexedWordSegmentKind::Composite,
354        }
355    }
356
357    pub(crate) fn punctuation(kind: TokenKind) -> Self {
358        Self {
359            kind,
360            span: Span::new(),
361            flags: TokenFlags::empty(),
362            payload: TokenPayload::None,
363        }
364    }
365
366    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
367        let flags = if word.has_cooked_text() {
368            TokenFlags::cooked_text()
369        } else {
370            TokenFlags::empty()
371        };
372
373        Self {
374            kind,
375            span: Span::new(),
376            flags,
377            payload: TokenPayload::Word(word),
378        }
379    }
380
381    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
382        Self::with_word_payload(
383            kind,
384            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
385        )
386    }
387
388    fn owned_word(kind: TokenKind, text: String) -> Self {
389        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
390    }
391
392    fn comment() -> Self {
393        Self {
394            kind: TokenKind::Comment,
395            span: Span::new(),
396            flags: TokenFlags::empty(),
397            payload: TokenPayload::None,
398        }
399    }
400
401    fn fd(kind: TokenKind, fd: i32) -> Self {
402        Self {
403            kind,
404            span: Span::new(),
405            flags: TokenFlags::empty(),
406            payload: TokenPayload::Fd(fd),
407        }
408    }
409
410    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
411        Self {
412            kind,
413            span: Span::new(),
414            flags: TokenFlags::empty(),
415            payload: TokenPayload::FdPair(src_fd, dst_fd),
416        }
417    }
418
419    fn error(kind: LexerErrorKind) -> Self {
420        Self {
421            kind: TokenKind::Error,
422            span: Span::new(),
423            flags: TokenFlags::empty(),
424            payload: TokenPayload::Error(kind),
425        }
426    }
427
428    pub(crate) fn with_span(mut self, span: Span) -> Self {
429        self.span = span;
430        self
431    }
432
433    pub(crate) fn rebased(mut self, base: Position) -> Self {
434        self.span = self.span.rebased(base);
435        self.payload = match self.payload {
436            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
437            payload => payload,
438        };
439        self
440    }
441
442    pub(crate) fn with_synthetic_flag(mut self) -> Self {
443        self.flags = self.flags.with_synthetic();
444        self
445    }
446
447    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
448        let payload = match self.payload {
449            TokenPayload::None => TokenPayload::None,
450            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
451            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
452            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
453            TokenPayload::Error(kind) => TokenPayload::Error(kind),
454        };
455
456        LexedToken {
457            kind: self.kind,
458            span: self.span,
459            flags: self.flags,
460            payload,
461        }
462    }
463
464    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
465        let payload = match self.payload {
466            TokenPayload::None => TokenPayload::None,
467            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
468            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
469            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
470            TokenPayload::Error(kind) => TokenPayload::Error(kind),
471        };
472
473        LexedToken {
474            kind: self.kind,
475            span: self.span,
476            flags: self.flags,
477            payload,
478        }
479    }
480
481    /// Borrow the token text when it is a single-segment word token.
482    pub(crate) fn word_text(&self) -> Option<&str> {
483        self.kind
484            .is_word_like()
485            .then_some(())
486            .and_then(|_| match &self.payload {
487                TokenPayload::Word(word) => word.text(),
488                _ => None,
489            })
490    }
491
492    /// Return an owned string containing the token's word text.
493    pub(crate) fn word_string(&self) -> Option<String> {
494        self.kind
495            .is_word_like()
496            .then_some(())
497            .and_then(|_| match &self.payload {
498                TokenPayload::Word(word) => Some(word.joined_text()),
499                _ => None,
500            })
501    }
502
503    /// Borrow the structured word payload for word-like tokens.
504    pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
505        match &self.payload {
506            TokenPayload::Word(word) => Some(word),
507            _ => None,
508        }
509    }
510
511    /// Borrow the original source slice when the token is source-backed and uncooked.
512    pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
513        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
514            return None;
515        }
516
517        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
518            .then(|| &source[self.span.start.offset..self.span.end.offset])
519    }
520
521    /// Return the file-descriptor payload for redirection tokens that carry one.
522    pub(crate) fn fd_value(&self) -> Option<i32> {
523        match self.payload {
524            TokenPayload::Fd(fd) => Some(fd),
525            _ => None,
526        }
527    }
528
529    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
530    pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
531        match self.payload {
532            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
533            _ => None,
534        }
535    }
536
537    /// Return the lexer error payload when this token represents `TokenKind::Error`.
538    pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
539        match self.payload {
540            TokenPayload::Error(kind) => Some(kind),
541            _ => None,
542        }
543    }
544}
545
546/// Result of reading a heredoc body from the source.
547#[derive(Debug, Clone, PartialEq)]
548pub(crate) struct HeredocRead {
549    /// Decoded heredoc content.
550    pub content: String,
551    /// Source span covering the heredoc body content.
552    pub content_span: Span,
553}
554
555/// Maximum nesting depth for command substitution in the lexer.
556/// Prevents stack overflow from deeply nested $() patterns.
557const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
558const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
559
560#[derive(Clone, Debug)]
561struct Cursor<'a> {
562    rest: &'a str,
563}
564
565impl<'a> Cursor<'a> {
566    fn new(source: &'a str) -> Self {
567        Self { rest: source }
568    }
569
570    fn first(&self) -> Option<char> {
571        self.rest.chars().next()
572    }
573
574    fn second(&self) -> Option<char> {
575        let mut chars = self.rest.chars();
576        chars.next()?;
577        chars.next()
578    }
579
580    fn third(&self) -> Option<char> {
581        let mut chars = self.rest.chars();
582        chars.next()?;
583        chars.next()?;
584        chars.next()
585    }
586
587    fn bump(&mut self) -> Option<char> {
588        let ch = self.first()?;
589        self.rest = &self.rest[ch.len_utf8()..];
590        Some(ch)
591    }
592
593    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
594        let start = self.rest;
595        let mut end = 0;
596
597        for ch in start.chars() {
598            if !predicate(ch) {
599                break;
600            }
601            end += ch.len_utf8();
602        }
603
604        self.rest = &start[end..];
605        &start[..end]
606    }
607
608    fn rest(&self) -> &'a str {
609        self.rest
610    }
611
612    fn skip_bytes(&mut self, count: usize) {
613        self.rest = &self.rest[count..];
614    }
615
616    fn find_byte(&self, byte: u8) -> Option<usize> {
617        memchr(byte, self.rest.as_bytes())
618    }
619}
620
621#[derive(Clone, Debug)]
622struct PositionMap<'a> {
623    source: &'a str,
624    line_starts: Arc<[usize]>,
625    cached: Position,
626}
627
628#[cfg(feature = "benchmarking")]
629#[derive(Clone, Copy, Debug, Default)]
630pub(crate) struct LexerBenchmarkCounters {
631    pub(crate) current_position_calls: u64,
632}
633
634impl<'a> PositionMap<'a> {
635    fn new(source: &'a str) -> Self {
636        let mut line_starts =
637            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
638        line_starts.push(0);
639        line_starts.extend(
640            source
641                .bytes()
642                .enumerate()
643                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
644        );
645
646        Self {
647            source,
648            line_starts: line_starts.into(),
649            cached: Position::new(),
650        }
651    }
652
653    fn position(&mut self, offset: usize) -> Position {
654        if offset == self.cached.offset {
655            return self.cached;
656        }
657
658        let position = if offset > self.cached.offset && offset <= self.source.len() {
659            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
660        } else {
661            self.position_uncached(offset)
662        };
663        self.cached = position;
664        position
665    }
666
667    fn position_uncached(&self, offset: usize) -> Position {
668        let offset = offset.min(self.source.len());
669        let line_index = self
670            .line_starts
671            .partition_point(|start| *start <= offset)
672            .saturating_sub(1);
673        let line_start = self.line_starts[line_index];
674        let line_text = &self.source[line_start..offset];
675        let column = if line_text.is_ascii() {
676            line_text.len() + 1
677        } else {
678            line_text.chars().count() + 1
679        };
680
681        Position {
682            line: line_index + 1,
683            column,
684            offset,
685        }
686    }
687
688    fn advance_from(mut position: Position, text: &str) -> Position {
689        position.offset += text.len();
690        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
691        if newline_count == 0 {
692            position.column += if text.is_ascii() {
693                text.len()
694            } else {
695                text.chars().count()
696            };
697            return position;
698        }
699
700        position.line += newline_count;
701        let tail_start = memrchr(b'\n', text.as_bytes())
702            .map(|index| index + 1)
703            .unwrap_or_default();
704        let tail = &text[tail_start..];
705        position.column = if tail.is_ascii() {
706            tail.len() + 1
707        } else {
708            tail.chars().count() + 1
709        };
710        position
711    }
712}
713
714/// Source-backed lexer for shell scripts.
715///
716/// The public lexer surface is intended for lower-level tooling and
717/// benchmarks. It tokenizes using the default bash profile; use the parser
718/// constructors when dialect or zsh option state matters.
719#[derive(Clone)]
720pub struct Lexer<'a> {
721    #[allow(dead_code)] // Stored for error reporting in future
722    input: &'a str,
723    /// Current byte offset in the input/reinjected stream.
724    offset: usize,
725    cursor: Cursor<'a>,
726    position_map: PositionMap<'a>,
727    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
728    /// Consumed before `cursor`.
729    reinject_buf: VecDeque<char>,
730    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
731    reinject_resume_offset: Option<usize>,
732    /// Maximum allowed nesting depth for command substitution
733    max_subst_depth: usize,
734    initial_zsh_options: Option<ZshOptionState>,
735    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
736    zsh_timeline_index: usize,
737    #[cfg(feature = "benchmarking")]
738    benchmark_counters: Option<LexerBenchmarkCounters>,
739}
740
741impl<'a> Lexer<'a> {
742    /// Create a new bash-profile lexer for the given input.
743    pub fn new(input: &'a str) -> Self {
744        Self::with_max_subst_depth_and_profile(
745            input,
746            DEFAULT_MAX_SUBST_DEPTH,
747            &ShellProfile::native(super::ShellDialect::Bash),
748            None,
749        )
750    }
751
752    /// Create a new lexer with a custom max substitution nesting depth.
753    /// Limits recursion in read_command_subst_into().
754    pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
755        Self::with_max_subst_depth_and_profile(
756            input,
757            max_depth,
758            &ShellProfile::native(super::ShellDialect::Bash),
759            None,
760        )
761    }
762
763    /// Create a new lexer using the provided shell profile.
764    #[cfg(test)]
765    fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
766        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
767            .then(|| ZshOptionTimeline::build(input, shell_profile))
768            .flatten()
769            .map(Arc::new);
770        Self::with_max_subst_depth_and_profile(
771            input,
772            DEFAULT_MAX_SUBST_DEPTH,
773            shell_profile,
774            zsh_timeline,
775        )
776    }
777
778    pub(crate) fn with_max_subst_depth_and_profile(
779        input: &'a str,
780        max_depth: usize,
781        shell_profile: &ShellProfile,
782        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
783    ) -> Self {
784        Self {
785            input,
786            offset: 0,
787            cursor: Cursor::new(input),
788            position_map: PositionMap::new(input),
789            reinject_buf: VecDeque::new(),
790            reinject_resume_offset: None,
791            max_subst_depth: max_depth,
792            initial_zsh_options: shell_profile.zsh_options().cloned(),
793            zsh_timeline,
794            zsh_timeline_index: 0,
795            #[cfg(feature = "benchmarking")]
796            benchmark_counters: None,
797        }
798    }
799
800    pub(super) fn position_at_offset(&self, offset: usize) -> Position {
801        self.position_map.position_uncached(offset)
802    }
803
804    fn current_position(&mut self) -> Position {
805        #[cfg(feature = "benchmarking")]
806        self.maybe_record_current_position_call();
807        self.position_map.position(self.offset)
808    }
809
810    #[cfg(feature = "benchmarking")]
811    pub(crate) fn enable_benchmark_counters(&mut self) {
812        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
813    }
814
815    #[cfg(feature = "benchmarking")]
816    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
817        self.benchmark_counters.unwrap_or_default()
818    }
819
820    #[cfg(feature = "benchmarking")]
821    fn maybe_record_current_position_call(&mut self) {
822        if let Some(counters) = &mut self.benchmark_counters {
823            counters.current_position_calls += 1;
824        }
825    }
826
827    fn sync_offset_to_cursor(&mut self) {
828        if self.reinject_buf.is_empty()
829            && let Some(offset) = self.reinject_resume_offset.take()
830        {
831            self.offset = offset;
832        }
833    }
834
835    /// Get the next token kind from the input.
836    ///
837    /// This skips whitespace and line comments, matching
838    /// [`Lexer::next_lexed_token`]. It is useful for callers that only need the
839    /// token stream shape.
840    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
841        self.next_lexed_token().map(|token| token.kind)
842    }
843
844    fn peek_char(&mut self) -> Option<char> {
845        self.sync_offset_to_cursor();
846        if let Some(&ch) = self.reinject_buf.front() {
847            Some(ch)
848        } else {
849            self.cursor.first()
850        }
851    }
852
853    fn advance(&mut self) -> Option<char> {
854        self.sync_offset_to_cursor();
855        let ch = if !self.reinject_buf.is_empty() {
856            self.reinject_buf.pop_front()
857        } else {
858            self.cursor.bump()
859        };
860        if let Some(c) = ch {
861            self.offset += c.len_utf8();
862        }
863        ch
864    }
865
866    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
867        self.reinject_buf
868            .iter()
869            .copied()
870            .chain(self.cursor.rest().chars())
871    }
872
873    fn second_char(&self) -> Option<char> {
874        match self.reinject_buf.len() {
875            0 => self.cursor.second(),
876            1 => self.cursor.first(),
877            _ => self.reinject_buf.get(1).copied(),
878        }
879    }
880
881    fn third_char(&self) -> Option<char> {
882        match self.reinject_buf.len() {
883            0 => self.cursor.third(),
884            1 => self.cursor.second(),
885            2 => self.cursor.first(),
886            _ => self.reinject_buf.get(2).copied(),
887        }
888    }
889
890    fn fourth_char(&self) -> Option<char> {
891        match self.reinject_buf.len() {
892            0 => self.cursor.rest().chars().nth(3),
893            1 => self.cursor.third(),
894            2 => self.cursor.second(),
895            3 => self.cursor.first(),
896            _ => self.reinject_buf.get(3).copied(),
897        }
898    }
899
900    fn consume_source_bytes(&mut self, byte_len: usize) {
901        debug_assert!(self.reinject_buf.is_empty());
902        self.sync_offset_to_cursor();
903        self.offset += byte_len;
904        self.cursor.skip_bytes(byte_len);
905    }
906
907    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
908        debug_assert!(self.reinject_buf.is_empty());
909        self.offset += byte_len;
910    }
911
912    fn consume_ascii_chars(&mut self, count: usize) {
913        if self.reinject_buf.is_empty() {
914            self.consume_source_bytes(count);
915            return;
916        }
917
918        for _ in 0..count {
919            self.advance();
920        }
921    }
922
923    fn source_horizontal_whitespace_len(&self) -> usize {
924        self.cursor
925            .rest()
926            .as_bytes()
927            .iter()
928            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
929            .count()
930    }
931
932    fn source_ascii_plain_word_len(&self) -> usize {
933        self.cursor
934            .rest()
935            .as_bytes()
936            .iter()
937            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
938            .count()
939    }
940
941    fn find_double_quote_special(source: &str) -> Option<usize> {
942        source
943            .as_bytes()
944            .iter()
945            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
946    }
947
948    fn ensure_capture_from_source(
949        &self,
950        capture: &mut Option<String>,
951        start: Position,
952        end: Position,
953    ) {
954        if capture.is_none() {
955            *capture = Some(self.input[start.offset..end.offset].to_string());
956        }
957    }
958
959    fn push_capture_char(capture: &mut Option<String>, ch: char) {
960        if let Some(text) = capture.as_mut() {
961            text.push(ch);
962        }
963    }
964
965    fn push_capture_str(capture: &mut Option<String>, text: &str) {
966        if let Some(current) = capture.as_mut() {
967            current.push_str(text);
968        }
969    }
970
971    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
972        if let Some(timeline) = self.zsh_timeline.as_ref() {
973            while self.zsh_timeline_index < timeline.entries.len()
974                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
975            {
976                self.zsh_timeline_index += 1;
977            }
978            return if self.zsh_timeline_index == 0 {
979                self.initial_zsh_options.as_ref()
980            } else {
981                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
982            };
983        }
984
985        self.initial_zsh_options.as_ref()
986    }
987
988    fn comments_enabled(&mut self) -> bool {
989        !self
990            .current_zsh_options()
991            .is_some_and(|options| options.interactive_comments.is_definitely_off())
992    }
993
994    fn rc_quotes_enabled(&mut self) -> bool {
995        self.current_zsh_options()
996            .is_some_and(|options| options.rc_quotes.is_definitely_on())
997    }
998
999    fn ignore_braces_enabled(&mut self) -> bool {
1000        self.current_zsh_options()
1001            .is_some_and(|options| options.ignore_braces.is_definitely_on())
1002    }
1003
1004    fn ignore_close_braces_enabled(&mut self) -> bool {
1005        self.current_zsh_options().is_some_and(|options| {
1006            options.ignore_braces.is_definitely_on()
1007                || options.ignore_close_braces.is_definitely_on()
1008        })
1009    }
1010
1011    fn should_treat_hash_as_word_char(&mut self) -> bool {
1012        if !self.comments_enabled() {
1013            return true;
1014        }
1015        self.reinject_buf.is_empty()
1016            && (self
1017                .input
1018                .get(..self.offset)
1019                .and_then(|prefix| prefix.chars().next_back())
1020                .is_some_and(|prev| {
1021                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1022                })
1023                || self.is_inside_unclosed_double_paren_on_line())
1024    }
1025
1026    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1027        capture
1028            .as_deref()
1029            .unwrap_or(&self.input[start.offset..self.offset])
1030    }
1031
1032    fn current_word_surface_is_single_char(
1033        &self,
1034        start: Position,
1035        capture: &Option<String>,
1036        target: char,
1037    ) -> bool {
1038        let text = self.current_word_text(start, capture);
1039        if !text.contains('\x00') {
1040            let mut encoded = [0; 4];
1041            return text == target.encode_utf8(&mut encoded);
1042        }
1043
1044        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1045        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1046    }
1047
1048    fn current_word_surface_last_char<'b>(
1049        &'b self,
1050        start: Position,
1051        capture: &'b Option<String>,
1052    ) -> Option<char> {
1053        self.current_word_text(start, capture)
1054            .chars()
1055            .rev()
1056            .find(|&ch| ch != '\x00')
1057    }
1058
1059    fn current_word_surface_ends_with_char(
1060        &self,
1061        start: Position,
1062        capture: &Option<String>,
1063        target: char,
1064    ) -> bool {
1065        self.current_word_surface_last_char(start, capture) == Some(target)
1066    }
1067
1068    fn current_word_surface_ends_with_extglob_prefix(
1069        &self,
1070        start: Position,
1071        capture: &Option<String>,
1072    ) -> bool {
1073        self.current_word_surface_last_char(start, capture)
1074            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1075    }
1076
1077    /// Get the next source-backed token from the input, skipping line comments.
1078    ///
1079    /// Returned tokens expose their [`TokenKind`] and source [`Span`]. Comments
1080    /// are omitted from this public stream; the parser uses an internal variant
1081    /// when it needs to preserve them for AST attachment.
1082    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1083        self.skip_whitespace();
1084        let start = self.current_position();
1085        let token = self.next_lexed_token_inner(false)?;
1086        let end = self.current_position();
1087        Some(token.with_span(Span::from_positions(start, end)))
1088    }
1089
1090    /// Get the next source-backed token from the input, preserving line comments.
1091    pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1092        self.skip_whitespace();
1093        let start = self.current_position();
1094        let token = self.next_lexed_token_inner(true)?;
1095        let end = self.current_position();
1096        Some(token.with_span(Span::from_positions(start, end)))
1097    }
1098
1099    /// Internal: get next token without recording position (called after whitespace skip)
1100    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1101        let ch = self.peek_char()?;
1102
1103        match ch {
1104            '\n' => {
1105                self.consume_ascii_chars(1);
1106                Some(LexedToken::punctuation(TokenKind::Newline))
1107            }
1108            ';' => {
1109                if self.second_char() == Some(';') {
1110                    if self.third_char() == Some('&') {
1111                        self.consume_ascii_chars(3);
1112                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1113                    } else {
1114                        self.consume_ascii_chars(2);
1115                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1116                    }
1117                } else if self.second_char() == Some('|') {
1118                    self.consume_ascii_chars(2);
1119                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1120                } else if self.second_char() == Some('&') {
1121                    self.consume_ascii_chars(2);
1122                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1123                } else {
1124                    self.consume_ascii_chars(1);
1125                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1126                }
1127            }
1128            '|' => {
1129                if self.second_char() == Some('|') {
1130                    self.consume_ascii_chars(2);
1131                    Some(LexedToken::punctuation(TokenKind::Or))
1132                } else if self.second_char() == Some('&') {
1133                    self.consume_ascii_chars(2);
1134                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1135                } else {
1136                    self.consume_ascii_chars(1);
1137                    Some(LexedToken::punctuation(TokenKind::Pipe))
1138                }
1139            }
1140            '&' => {
1141                if self.second_char() == Some('&') {
1142                    self.consume_ascii_chars(2);
1143                    Some(LexedToken::punctuation(TokenKind::And))
1144                } else if self.second_char() == Some('>') {
1145                    if self.third_char() == Some('>') {
1146                        self.consume_ascii_chars(3);
1147                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1148                    } else {
1149                        self.consume_ascii_chars(2);
1150                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1151                    }
1152                } else if self.second_char() == Some('|') {
1153                    self.consume_ascii_chars(2);
1154                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1155                } else if self.second_char() == Some('!') {
1156                    self.consume_ascii_chars(2);
1157                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1158                } else {
1159                    self.consume_ascii_chars(1);
1160                    Some(LexedToken::punctuation(TokenKind::Background))
1161                }
1162            }
1163            '>' => {
1164                if self.second_char() == Some('>') {
1165                    if self.third_char() == Some('|') {
1166                        self.consume_ascii_chars(3);
1167                    } else {
1168                        self.consume_ascii_chars(2);
1169                    }
1170                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1171                } else if self.second_char() == Some('|') {
1172                    self.consume_ascii_chars(2);
1173                    Some(LexedToken::punctuation(TokenKind::Clobber))
1174                } else if self.second_char() == Some('(') {
1175                    self.consume_ascii_chars(2);
1176                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1177                } else if self.second_char() == Some('&') {
1178                    self.consume_ascii_chars(2);
1179                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1180                } else {
1181                    self.consume_ascii_chars(1);
1182                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1183                }
1184            }
1185            '<' => {
1186                if self.second_char() == Some('<') {
1187                    if self.third_char() == Some('<') {
1188                        self.consume_ascii_chars(3);
1189                        Some(LexedToken::punctuation(TokenKind::HereString))
1190                    } else if self.third_char() == Some('-') {
1191                        self.consume_ascii_chars(3);
1192                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1193                    } else {
1194                        self.consume_ascii_chars(2);
1195                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1196                    }
1197                } else if self.second_char() == Some('>') {
1198                    self.consume_ascii_chars(2);
1199                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1200                } else if self.second_char() == Some('(') {
1201                    self.consume_ascii_chars(2);
1202                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1203                } else if self.second_char() == Some('&') {
1204                    self.consume_ascii_chars(2);
1205                    Some(LexedToken::punctuation(TokenKind::DupInput))
1206                } else {
1207                    self.consume_ascii_chars(1);
1208                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1209                }
1210            }
1211            '(' => {
1212                if self.second_char() == Some('(') {
1213                    self.consume_ascii_chars(2);
1214                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1215                } else {
1216                    self.consume_ascii_chars(1);
1217                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1218                }
1219            }
1220            ')' => {
1221                if self.second_char() == Some(')') {
1222                    self.consume_ascii_chars(2);
1223                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1224                } else {
1225                    self.consume_ascii_chars(1);
1226                    Some(LexedToken::punctuation(TokenKind::RightParen))
1227                }
1228            }
1229            '{' => {
1230                let start = self.current_position();
1231                if self.ignore_braces_enabled() {
1232                    self.consume_ascii_chars(1);
1233                    match self.peek_char() {
1234                        Some(' ') | Some('\t') | Some('\n') | None => {
1235                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1236                        }
1237                        _ => self.read_word_starting_with("{", start),
1238                    }
1239                } else if self.looks_like_brace_expansion() {
1240                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1241                    // vs a brace group like { cmd; }
1242                    // Note: { must be followed by space/newline to be a brace group
1243                    self.read_brace_expansion_word()
1244                } else if self.is_brace_group_start() {
1245                    self.advance();
1246                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1247                } else if self.brace_literal_starts_case_pattern_delimiter() {
1248                    self.read_word_starting_with("{", start)
1249                } else {
1250                    self.read_brace_literal_word()
1251                }
1252            }
1253            '}' => {
1254                self.consume_ascii_chars(1);
1255                if self.ignore_close_braces_enabled() {
1256                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1257                } else {
1258                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1259                }
1260            }
1261            '[' => {
1262                let start = self.current_position();
1263                self.consume_ascii_chars(1);
1264                if self.peek_char() == Some('[')
1265                    && matches!(
1266                        self.second_char(),
1267                        Some(' ') | Some('\t') | Some('\n') | None
1268                    )
1269                {
1270                    self.consume_ascii_chars(1);
1271                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1272                } else {
1273                    // `[` can start the test command when followed by whitespace, or it can be
1274                    // ordinary word text such as a glob bracket expression.
1275                    //
1276                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1277                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1278                    // structural tokens mid-word.
1279                    match self.peek_char() {
1280                        Some(' ') | Some('\t') | Some('\n') | None => {
1281                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1282                        }
1283                        _ => self.read_word_starting_with("[", start),
1284                    }
1285                }
1286            }
1287            ']' => {
1288                if self.second_char() == Some(']') {
1289                    self.consume_ascii_chars(2);
1290                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1291                } else {
1292                    self.consume_ascii_chars(1);
1293                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1294                }
1295            }
1296            '\'' => self.read_single_quoted_string(),
1297            '"' => self.read_double_quoted_string(),
1298            '#' => {
1299                if self.should_treat_hash_as_word_char() {
1300                    let start = self.current_position();
1301                    return self.read_word_starting_with("#", start);
1302                }
1303                if preserve_comments {
1304                    self.read_comment();
1305                    Some(LexedToken::comment())
1306                } else {
1307                    self.skip_comment();
1308                    self.next_lexed_token_inner(false)
1309                }
1310            }
1311            // Handle file descriptor redirects like 2> or 2>&1
1312            '0'..='9' => self.read_word_or_fd_redirect(),
1313            _ => self.read_word(),
1314        }
1315    }
1316
1317    fn skip_whitespace(&mut self) {
1318        while let Some(ch) = self.peek_char() {
1319            if self.reinject_buf.is_empty() {
1320                let whitespace_len = self.source_horizontal_whitespace_len();
1321                if whitespace_len > 0 {
1322                    self.consume_source_bytes(whitespace_len);
1323                    continue;
1324                }
1325
1326                if self.cursor.rest().starts_with("\\\n") {
1327                    self.consume_source_bytes(2);
1328                    continue;
1329                }
1330            }
1331
1332            if ch == ' ' || ch == '\t' {
1333                self.consume_ascii_chars(1);
1334            } else if ch == '\\' {
1335                // Check for backslash-newline (line continuation) between tokens
1336                if self.second_char() == Some('\n') {
1337                    self.consume_ascii_chars(2);
1338                } else {
1339                    break;
1340                }
1341            } else {
1342                break;
1343            }
1344        }
1345    }
1346
1347    fn skip_comment(&mut self) {
1348        if self.reinject_buf.is_empty() {
1349            let end = self
1350                .cursor
1351                .find_byte(b'\n')
1352                .unwrap_or(self.cursor.rest().len());
1353            self.consume_source_bytes(end);
1354            return;
1355        }
1356
1357        while let Some(ch) = self.peek_char() {
1358            if ch == '\n' {
1359                break;
1360            }
1361            self.advance();
1362        }
1363    }
1364
1365    fn read_comment(&mut self) {
1366        debug_assert_eq!(self.peek_char(), Some('#'));
1367
1368        if self.reinject_buf.is_empty() {
1369            let rest = self.cursor.rest();
1370            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1371            self.consume_source_bytes(end);
1372            return;
1373        }
1374
1375        self.advance(); // consume '#'
1376
1377        while let Some(ch) = self.peek_char() {
1378            if ch == '\n' {
1379                break;
1380            }
1381            self.advance();
1382        }
1383    }
1384
1385    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1386        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1387            return false;
1388        }
1389
1390        let line_start = self.input[..self.offset]
1391            .rfind('\n')
1392            .map_or(0, |index| index + 1);
1393        let prefix = &self.input[line_start..self.offset];
1394        line_has_unclosed_double_paren(prefix)
1395    }
1396
1397    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1398    /// or just a regular word starting with a digit
1399    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1400        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1401            let Some(fd) = first_digit.to_digit(10) else {
1402                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1403            };
1404            let fd = fd as i32;
1405
1406            match (self.second_char(), self.third_char()) {
1407                (Some('>'), Some('>')) => {
1408                    if self.fourth_char() == Some('|') {
1409                        self.consume_ascii_chars(4);
1410                    } else {
1411                        self.consume_ascii_chars(3);
1412                    }
1413                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1414                }
1415                (Some('>'), Some('|')) => {
1416                    self.consume_ascii_chars(3);
1417                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1418                }
1419                (Some('>'), Some('&')) => {
1420                    self.consume_ascii_chars(3);
1421
1422                    let mut target_str = String::with_capacity(4);
1423                    while let Some(c) = self.peek_char() {
1424                        if c.is_ascii_digit() {
1425                            target_str.push(c);
1426                            self.advance();
1427                        } else {
1428                            break;
1429                        }
1430                    }
1431
1432                    if target_str.is_empty() {
1433                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1434                    }
1435
1436                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1437                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1438                }
1439                (Some('>'), _) => {
1440                    self.consume_ascii_chars(2);
1441                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1442                }
1443                (Some('<'), Some('&')) => {
1444                    self.consume_ascii_chars(3);
1445
1446                    let mut target_str = String::with_capacity(4);
1447                    while let Some(c) = self.peek_char() {
1448                        if c.is_ascii_digit() || c == '-' {
1449                            target_str.push(c);
1450                            self.advance();
1451                            if c == '-' {
1452                                break;
1453                            }
1454                        } else {
1455                            break;
1456                        }
1457                    }
1458
1459                    if target_str == "-" {
1460                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1461                    }
1462                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1463                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1464                }
1465                (Some('<'), Some('>')) => {
1466                    self.consume_ascii_chars(3);
1467                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1468                }
1469                (Some('<'), Some('<')) => {}
1470                (Some('<'), _) => {
1471                    self.consume_ascii_chars(2);
1472                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1473                }
1474                _ => {}
1475            }
1476        }
1477
1478        // Not a fd redirect pattern, read as regular word
1479        self.read_word()
1480    }
1481
1482    fn read_word_starting_with(
1483        &mut self,
1484        _prefix: &str,
1485        start: Position,
1486    ) -> Option<LexedToken<'a>> {
1487        let segment = match self.read_unquoted_segment(start) {
1488            Ok(segment) => segment,
1489            Err(kind) => return Some(LexedToken::error(kind)),
1490        };
1491        if segment.as_str().is_empty() {
1492            return None;
1493        }
1494        let mut lexed_word = LexedWord::from_segment(segment);
1495        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1496            return Some(LexedToken::error(kind));
1497        }
1498        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1499    }
1500
1501    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1502        let start = self.current_position();
1503
1504        if self.reinject_buf.is_empty() {
1505            let ascii_len = self.source_ascii_plain_word_len();
1506            let chunk = if ascii_len > 0
1507                && self
1508                    .cursor
1509                    .rest()
1510                    .as_bytes()
1511                    .get(ascii_len)
1512                    .is_none_or(|byte| byte.is_ascii())
1513            {
1514                self.consume_source_bytes(ascii_len);
1515                &self.input[start.offset..self.offset]
1516            } else {
1517                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1518                self.advance_scanned_source_bytes(chunk.len());
1519                chunk
1520            };
1521            if !chunk.is_empty() {
1522                let continues = matches!(
1523                    self.peek_char(),
1524                    Some(next)
1525                        if Self::is_word_char(next)
1526                            || next == '$'
1527                            || matches!(next, '\'' | '"')
1528                            || next == '{'
1529                            || (next == '\\' && self.second_char() == Some('\n'))
1530                            || (next == '('
1531                                && (chunk.ends_with('=')
1532                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1533                );
1534
1535                if !continues {
1536                    let end = self.current_position();
1537                    return Some(LexedToken::borrowed_word(
1538                        TokenKind::Word,
1539                        &self.input[start.offset..self.offset],
1540                        Some(Span::from_positions(start, end)),
1541                    ));
1542                }
1543
1544                if self.peek_char() == Some('(')
1545                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1546                {
1547                    return self.read_complex_word(start);
1548                }
1549
1550                let end = self.current_position();
1551                return self.finish_segmented_word(LexedWord::borrowed(
1552                    LexedWordSegmentKind::Plain,
1553                    &self.input[start.offset..self.offset],
1554                    Some(Span::from_positions(start, end)),
1555                ));
1556            }
1557        }
1558
1559        self.read_complex_word(start)
1560    }
1561
1562    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1563        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1564            return Some(LexedToken::error(kind));
1565        }
1566
1567        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1568    }
1569
1570    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1571        if self.peek_char() == Some('$') {
1572            match self.second_char() {
1573                Some('\'') => return self.read_dollar_single_quoted_string(),
1574                Some('"') => return self.read_dollar_double_quoted_string(),
1575                _ => {}
1576            }
1577        }
1578
1579        let segment = match self.read_unquoted_segment(start) {
1580            Ok(segment) => segment,
1581            Err(kind) => return Some(LexedToken::error(kind)),
1582        };
1583
1584        if segment.as_str().is_empty() {
1585            return None;
1586        }
1587
1588        self.finish_segmented_word(LexedWord::from_segment(segment))
1589    }
1590
1591    fn read_unquoted_segment(
1592        &mut self,
1593        start: Position,
1594    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1595        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1596        while let Some(ch) = self.peek_char() {
1597            if ch == '"' || ch == '\'' {
1598                break;
1599            } else if ch == '$' {
1600                if matches!(self.second_char(), Some('\'') | Some('"'))
1601                    && (self.current_position().offset > start.offset
1602                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1603                {
1604                    break;
1605                }
1606
1607                // Handle variable references and command substitution
1608                self.advance();
1609
1610                Self::push_capture_char(&mut word, ch); // push the '$'
1611
1612                // Check for $[ / $( / ${ forms before falling back to variable text.
1613                if self.peek_char() == Some('[') {
1614                    Self::push_capture_char(&mut word, '[');
1615                    self.advance();
1616                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1617                        return Err(LexerErrorKind::CommandSubstitution);
1618                    }
1619                } else if self.peek_char() == Some('(') {
1620                    if self.second_char() == Some('(') {
1621                        if !self.read_arithmetic_expansion_into(&mut word) {
1622                            return Err(LexerErrorKind::CommandSubstitution);
1623                        }
1624                    } else {
1625                        Self::push_capture_char(&mut word, '(');
1626                        self.advance();
1627                        if !self.read_command_subst_into(&mut word) {
1628                            return Err(LexerErrorKind::CommandSubstitution);
1629                        }
1630                    }
1631                } else if self.peek_char() == Some('{') {
1632                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1633                    // doesn't stop at the inner }.
1634                    Self::push_capture_char(&mut word, '{');
1635                    self.advance();
1636                    let _ = self.read_param_expansion_into(&mut word, start);
1637                } else {
1638                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1639                    if let Some(c) = self.peek_char() {
1640                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1641                            || c.is_ascii_digit()
1642                        {
1643                            Self::push_capture_char(&mut word, c);
1644                            self.advance();
1645                        } else {
1646                            // Read variable name (alphanumeric + _)
1647                            while let Some(c) = self.peek_char() {
1648                                if c.is_ascii_alphanumeric() || c == '_' {
1649                                    Self::push_capture_char(&mut word, c);
1650                                    self.advance();
1651                                } else {
1652                                    break;
1653                                }
1654                            }
1655                        }
1656                    }
1657                }
1658            } else if ch == '{' {
1659                if self.looks_like_mid_word_brace_segment() {
1660                    // Keep balanced {...} forms attached to the current word so
1661                    // plain literals like foo{bar} and brace expansions stay intact.
1662                    Self::push_capture_char(&mut word, ch);
1663                    self.advance();
1664                    self.consume_mid_word_brace_segment(&mut word);
1665                } else {
1666                    // Unmatched literal braces in regexes like ^{ should not swallow
1667                    // trailing delimiters such as ]] or then.
1668                    Self::push_capture_char(&mut word, ch);
1669                    self.advance();
1670                }
1671            } else if ch == '`' {
1672                // Preserve legacy backticks verbatim so the parser can keep the
1673                // original syntax form.
1674                let capture_end = self.current_position();
1675                self.ensure_capture_from_source(&mut word, start, capture_end);
1676                Self::push_capture_char(&mut word, ch);
1677                self.advance(); // consume opening `
1678                let mut closed = false;
1679                while let Some(c) = self.peek_char() {
1680                    Self::push_capture_char(&mut word, c);
1681                    self.advance();
1682                    if c == '`' {
1683                        closed = true;
1684                        break;
1685                    }
1686                    if c == '\\'
1687                        && let Some(next) = self.peek_char()
1688                    {
1689                        Self::push_capture_char(&mut word, next);
1690                        self.advance();
1691                    }
1692                }
1693                if !closed {
1694                    return Err(LexerErrorKind::BacktickSubstitution);
1695                }
1696            } else if ch == '\\' {
1697                let capture_end = self.current_position();
1698                self.ensure_capture_from_source(&mut word, start, capture_end);
1699                self.advance();
1700                if let Some(next) = self.peek_char() {
1701                    if next == '\n' {
1702                        // Line continuation: skip backslash + newline
1703                        self.advance();
1704                    } else {
1705                        // Escaped character: backslash quotes the next char
1706                        // (quote removal — only the literal char survives).
1707                        // Preserve source/decoded alignment with a sentinel so
1708                        // downstream word decoding keeps later spans anchored.
1709                        Self::push_capture_char(&mut word, '\x00');
1710                        Self::push_capture_char(&mut word, next);
1711                        self.advance();
1712                        if next == '{'
1713                            && self.current_word_surface_is_single_char(start, &word, '{')
1714                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1715                        {
1716                            let mut depth = 1;
1717                            while let Some(c) = self.peek_char() {
1718                                Self::push_capture_char(&mut word, c);
1719                                self.advance();
1720                                match c {
1721                                    '{' => depth += 1,
1722                                    '}' => {
1723                                        depth -= 1;
1724                                        if depth == 0 {
1725                                            break;
1726                                        }
1727                                    }
1728                                    _ => {}
1729                                }
1730                            }
1731                        }
1732                    }
1733                } else {
1734                    Self::push_capture_char(&mut word, '\\');
1735                }
1736            } else if ch == '('
1737                && self.current_word_surface_ends_with_char(start, &word, '=')
1738                && self.looks_like_assoc_assign()
1739            {
1740                // Associative compound assignment: var=([k]="v" ...) — keep entire
1741                // (...) as part of word so declare -A m=([k]="v") stays one token.
1742                Self::push_capture_char(&mut word, ch);
1743                self.advance();
1744                let mut depth = 1;
1745                while let Some(c) = self.peek_char() {
1746                    Self::push_capture_char(&mut word, c);
1747                    self.advance();
1748                    match c {
1749                        '(' => depth += 1,
1750                        ')' => {
1751                            depth -= 1;
1752                            if depth == 0 {
1753                                break;
1754                            }
1755                        }
1756                        '"' => {
1757                            while let Some(qc) = self.peek_char() {
1758                                Self::push_capture_char(&mut word, qc);
1759                                self.advance();
1760                                if qc == '"' {
1761                                    break;
1762                                }
1763                                if qc == '\\'
1764                                    && let Some(esc) = self.peek_char()
1765                                {
1766                                    Self::push_capture_char(&mut word, esc);
1767                                    self.advance();
1768                                }
1769                            }
1770                        }
1771                        '\'' => {
1772                            while let Some(qc) = self.peek_char() {
1773                                Self::push_capture_char(&mut word, qc);
1774                                self.advance();
1775                                if qc == '\'' {
1776                                    break;
1777                                }
1778                            }
1779                        }
1780                        '\\' => {
1781                            if let Some(esc) = self.peek_char() {
1782                                Self::push_capture_char(&mut word, esc);
1783                                self.advance();
1784                            }
1785                        }
1786                        _ => {}
1787                    }
1788                }
1789            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1790            {
1791                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1792                // Consume through matching ) including nested parens
1793                Self::push_capture_char(&mut word, ch);
1794                self.advance();
1795                let mut depth = 1;
1796                while let Some(c) = self.peek_char() {
1797                    Self::push_capture_char(&mut word, c);
1798                    self.advance();
1799                    match c {
1800                        '(' => depth += 1,
1801                        ')' => {
1802                            depth -= 1;
1803                            if depth == 0 {
1804                                break;
1805                            }
1806                        }
1807                        '\\' => {
1808                            if let Some(esc) = self.peek_char() {
1809                                Self::push_capture_char(&mut word, esc);
1810                                self.advance();
1811                            }
1812                        }
1813                        _ => {}
1814                    }
1815                }
1816            } else if Self::is_plain_word_char(ch) {
1817                if self.reinject_buf.is_empty() {
1818                    let ascii_len = self.source_ascii_plain_word_len();
1819                    let chunk = if ascii_len > 0
1820                        && self
1821                            .cursor
1822                            .rest()
1823                            .as_bytes()
1824                            .get(ascii_len)
1825                            .is_none_or(|byte| byte.is_ascii())
1826                    {
1827                        self.consume_source_bytes(ascii_len);
1828                        &self.input[self.offset - ascii_len..self.offset]
1829                    } else {
1830                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1831                        self.advance_scanned_source_bytes(chunk.len());
1832                        chunk
1833                    };
1834                    Self::push_capture_str(&mut word, chunk);
1835                } else {
1836                    Self::push_capture_char(&mut word, ch);
1837                    self.advance();
1838                }
1839            } else {
1840                break;
1841            }
1842        }
1843
1844        if let Some(word) = word {
1845            let span = Some(Span::from_positions(start, self.current_position()));
1846            Ok(LexedWordSegment::owned_with_spans(
1847                LexedWordSegmentKind::Plain,
1848                word,
1849                span,
1850                span,
1851            ))
1852        } else {
1853            let end = self.current_position();
1854            Ok(LexedWordSegment::borrowed(
1855                LexedWordSegmentKind::Plain,
1856                &self.input[start.offset..self.offset],
1857                Some(Span::from_positions(start, end)),
1858            ))
1859        }
1860    }
1861
1862    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1863        let segment = match self.read_single_quoted_segment() {
1864            Ok(segment) => segment,
1865            Err(kind) => return Some(LexedToken::error(kind)),
1866        };
1867        let mut word = LexedWord::from_segment(segment);
1868        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1869            return Some(LexedToken::error(kind));
1870        }
1871
1872        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1873    }
1874
1875    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1876        debug_assert_eq!(self.peek_char(), Some('\''));
1877
1878        let wrapper_start = self.current_position();
1879        self.consume_ascii_chars(1); // consume opening '
1880        let content_start = self.current_position();
1881        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1882        let mut content_end = content_start;
1883        let mut content = String::with_capacity(16);
1884        let mut closed = false;
1885
1886        if can_borrow {
1887            let rest = self.cursor.rest();
1888            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1889                self.consume_source_bytes(quote_index);
1890                content_end = self.current_position();
1891                self.consume_ascii_chars(1); // consume closing '
1892                closed = true;
1893            } else {
1894                self.consume_source_bytes(rest.len());
1895            }
1896        }
1897
1898        while let Some(ch) = self.peek_char() {
1899            if closed {
1900                break;
1901            }
1902            if ch == '\'' {
1903                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1904                    if !can_borrow {
1905                        content.push('\'');
1906                    }
1907                    self.advance();
1908                    self.advance();
1909                    continue;
1910                }
1911                content_end = self.current_position();
1912                self.consume_ascii_chars(1); // consume closing '
1913                closed = true;
1914                break;
1915            }
1916            if !can_borrow {
1917                content.push(ch);
1918            }
1919            self.advance();
1920        }
1921
1922        if !closed {
1923            return Err(LexerErrorKind::SingleQuote);
1924        }
1925
1926        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1927        let content_span = Some(Span::from_positions(content_start, content_end));
1928
1929        if can_borrow {
1930            Ok(LexedWordSegment::borrowed_with_spans(
1931                LexedWordSegmentKind::SingleQuoted,
1932                &self.input[content_start.offset..content_end.offset],
1933                content_span,
1934                wrapper_span,
1935            ))
1936        } else {
1937            Ok(LexedWordSegment::owned_with_spans(
1938                LexedWordSegmentKind::SingleQuoted,
1939                content,
1940                content_span,
1941                wrapper_span,
1942            ))
1943        }
1944    }
1945
1946    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1947        let segment = match self.read_dollar_single_quoted_segment() {
1948            Ok(segment) => segment,
1949            Err(kind) => return Some(LexedToken::error(kind)),
1950        };
1951        let mut word = LexedWord::from_segment(segment);
1952        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1953            return Some(LexedToken::error(kind));
1954        }
1955
1956        let kind = if word.single_segment().is_some() {
1957            TokenKind::LiteralWord
1958        } else {
1959            TokenKind::Word
1960        };
1961
1962        Some(LexedToken::with_word_payload(kind, word))
1963    }
1964
1965    fn read_dollar_single_quoted_segment(
1966        &mut self,
1967    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1968        debug_assert_eq!(self.peek_char(), Some('$'));
1969        debug_assert_eq!(self.second_char(), Some('\''));
1970
1971        let wrapper_start = self.current_position();
1972        self.consume_ascii_chars(2); // consume $'
1973        let content_start = self.current_position();
1974        let mut out = String::with_capacity(16);
1975
1976        while let Some(ch) = self.peek_char() {
1977            if ch == '\'' {
1978                let content_end = self.current_position();
1979                self.advance();
1980                let wrapper_span =
1981                    Some(Span::from_positions(wrapper_start, self.current_position()));
1982                let content_span = Some(Span::from_positions(content_start, content_end));
1983                return Ok(LexedWordSegment::owned_with_spans(
1984                    LexedWordSegmentKind::DollarSingleQuoted,
1985                    out,
1986                    content_span,
1987                    wrapper_span,
1988                ));
1989            }
1990
1991            if ch == '\\' {
1992                self.advance();
1993                if let Some(esc) = self.peek_char() {
1994                    self.advance();
1995                    match esc {
1996                        'n' => out.push('\n'),
1997                        't' => out.push('\t'),
1998                        'r' => out.push('\r'),
1999                        'a' => out.push('\x07'),
2000                        'b' => out.push('\x08'),
2001                        'f' => out.push('\x0C'),
2002                        'v' => out.push('\x0B'),
2003                        'e' | 'E' => out.push('\x1B'),
2004                        '\\' => out.push('\\'),
2005                        '\'' => out.push('\''),
2006                        '"' => out.push('"'),
2007                        '?' => out.push('?'),
2008                        'c' => {
2009                            if let Some(control) = self.peek_char() {
2010                                self.advance();
2011                                out.push(((control as u32 & 0x1F) as u8) as char);
2012                            } else {
2013                                out.push('\\');
2014                                out.push('c');
2015                            }
2016                        }
2017                        'x' => {
2018                            let mut hex = String::new();
2019                            for _ in 0..2 {
2020                                if let Some(h) = self.peek_char() {
2021                                    if h.is_ascii_hexdigit() {
2022                                        hex.push(h);
2023                                        self.advance();
2024                                    } else {
2025                                        break;
2026                                    }
2027                                }
2028                            }
2029                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2030                                out.push(val as char);
2031                            }
2032                        }
2033                        'u' => {
2034                            let mut hex = String::new();
2035                            for _ in 0..4 {
2036                                if let Some(h) = self.peek_char() {
2037                                    if h.is_ascii_hexdigit() {
2038                                        hex.push(h);
2039                                        self.advance();
2040                                    } else {
2041                                        break;
2042                                    }
2043                                }
2044                            }
2045                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2046                                && let Some(c) = char::from_u32(val)
2047                            {
2048                                out.push(c);
2049                            }
2050                        }
2051                        'U' => {
2052                            let mut hex = String::new();
2053                            for _ in 0..8 {
2054                                if let Some(h) = self.peek_char() {
2055                                    if h.is_ascii_hexdigit() {
2056                                        hex.push(h);
2057                                        self.advance();
2058                                    } else {
2059                                        break;
2060                                    }
2061                                }
2062                            }
2063                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2064                                && let Some(c) = char::from_u32(val)
2065                            {
2066                                out.push(c);
2067                            }
2068                        }
2069                        '0'..='7' => {
2070                            let mut oct = String::new();
2071                            oct.push(esc);
2072                            for _ in 0..2 {
2073                                if let Some(o) = self.peek_char() {
2074                                    if o.is_ascii_digit() && o < '8' {
2075                                        oct.push(o);
2076                                        self.advance();
2077                                    } else {
2078                                        break;
2079                                    }
2080                                }
2081                            }
2082                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2083                                out.push(val as char);
2084                            }
2085                        }
2086                        _ => {
2087                            out.push('\\');
2088                            out.push(esc);
2089                        }
2090                    }
2091                } else {
2092                    out.push('\\');
2093                }
2094                continue;
2095            }
2096
2097            out.push(ch);
2098            self.advance();
2099        }
2100
2101        Err(LexerErrorKind::SingleQuote)
2102    }
2103
2104    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2105        let start = self.current_position();
2106
2107        if self.reinject_buf.is_empty() {
2108            let ascii_len = self.source_ascii_plain_word_len();
2109            let chunk = if ascii_len > 0
2110                && self
2111                    .cursor
2112                    .rest()
2113                    .as_bytes()
2114                    .get(ascii_len)
2115                    .is_none_or(|byte| byte.is_ascii())
2116            {
2117                self.consume_source_bytes(ascii_len);
2118                &self.input[start.offset..self.offset]
2119            } else {
2120                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2121                self.advance_scanned_source_bytes(chunk.len());
2122                chunk
2123            };
2124            if chunk.is_empty() {
2125                return None;
2126            }
2127
2128            let end = self.current_position();
2129            return Some(LexedWordSegment::borrowed(
2130                LexedWordSegmentKind::Plain,
2131                &self.input[start.offset..self.offset],
2132                Some(Span::from_positions(start, end)),
2133            ));
2134        }
2135
2136        let ch = self.peek_char()?;
2137        if !Self::is_plain_word_char(ch) {
2138            return None;
2139        }
2140
2141        let mut text = String::with_capacity(16);
2142        while let Some(ch) = self.peek_char() {
2143            if !Self::is_plain_word_char(ch) {
2144                break;
2145            }
2146            text.push(ch);
2147            self.advance();
2148        }
2149
2150        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2151    }
2152
2153    /// After a closing quote, read any adjacent quoted or unquoted word chars
2154    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2155    fn append_segmented_continuation(
2156        &mut self,
2157        word: &mut LexedWord<'a>,
2158    ) -> Result<(), LexerErrorKind> {
2159        loop {
2160            match self.peek_char() {
2161                Some('\\') if self.second_char() == Some('\n') => {
2162                    self.advance();
2163                    self.advance();
2164                    continue;
2165                }
2166                Some('\'') => {
2167                    word.push_segment(self.read_single_quoted_segment()?);
2168                }
2169                Some('"') => {
2170                    word.push_segment(self.read_double_quoted_segment()?);
2171                }
2172                Some('$') if self.second_char() == Some('\'') => {
2173                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2174                }
2175                Some('$') if self.second_char() == Some('"') => {
2176                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2177                }
2178                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2179                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2180                        unreachable!("peeked '(' should produce a suffix segment");
2181                    };
2182                    word.push_segment(segment);
2183                }
2184                _ => {
2185                    if let Some(segment) = self.read_plain_continuation_segment() {
2186                        word.push_segment(segment);
2187                        continue;
2188                    }
2189
2190                    let start = self.current_position();
2191                    let plain = self.read_unquoted_segment(start)?;
2192                    if plain.as_str().is_empty() {
2193                        break;
2194                    }
2195                    word.push_segment(plain);
2196                }
2197            }
2198        }
2199
2200        Ok(())
2201    }
2202
2203    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2204        debug_assert_eq!(self.peek_char(), Some('('));
2205
2206        let start = self.current_position();
2207        let mut depth = 0usize;
2208        let mut escaped = false;
2209        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2210
2211        while let Some(ch) = self.peek_char() {
2212            if let Some(text) = text.as_mut() {
2213                text.push(ch);
2214            }
2215            self.advance();
2216
2217            if escaped {
2218                escaped = false;
2219                continue;
2220            }
2221
2222            match ch {
2223                '\\' => escaped = true,
2224                '(' => depth += 1,
2225                ')' => {
2226                    depth = depth.saturating_sub(1);
2227                    if depth == 0 {
2228                        break;
2229                    }
2230                }
2231                _ => {}
2232            }
2233        }
2234
2235        let end = self.current_position();
2236        let span = Some(Span::from_positions(start, end));
2237        if let Some(text) = text {
2238            Some(LexedWordSegment::owned_with_spans(
2239                LexedWordSegmentKind::Plain,
2240                text,
2241                span,
2242                span,
2243            ))
2244        } else {
2245            Some(LexedWordSegment::borrowed_with_spans(
2246                LexedWordSegmentKind::Plain,
2247                &self.input[start.offset..end.offset],
2248                span,
2249                span,
2250            ))
2251        }
2252    }
2253
2254    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2255        self.read_double_quoted_word(false)
2256    }
2257
2258    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2259        self.read_double_quoted_word(true)
2260    }
2261
2262    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2263        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2264            Ok(segment) => segment,
2265            Err(kind) => return Some(LexedToken::error(kind)),
2266        };
2267        let mut word = LexedWord::from_segment(segment);
2268        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2269            return Some(LexedToken::error(kind));
2270        }
2271
2272        let kind = if word.single_segment().is_some() {
2273            TokenKind::QuotedWord
2274        } else {
2275            TokenKind::Word
2276        };
2277
2278        Some(LexedToken::with_word_payload(kind, word))
2279    }
2280
2281    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2282        self.read_double_quoted_segment_with_dollar(false)
2283    }
2284
2285    fn read_dollar_double_quoted_segment(
2286        &mut self,
2287    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2288        self.read_double_quoted_segment_with_dollar(true)
2289    }
2290
2291    fn read_double_quoted_segment_with_dollar(
2292        &mut self,
2293        dollar: bool,
2294    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2295        if dollar {
2296            debug_assert_eq!(self.peek_char(), Some('$'));
2297            debug_assert_eq!(self.second_char(), Some('"'));
2298        } else {
2299            debug_assert_eq!(self.peek_char(), Some('"'));
2300        }
2301
2302        let wrapper_start = self.current_position();
2303        if dollar {
2304            self.consume_ascii_chars(2); // consume $"
2305        } else {
2306            self.consume_ascii_chars(1); // consume opening "
2307        }
2308        let content_start = self.current_position();
2309        let mut content_end = content_start;
2310        let mut simple = self.reinject_buf.is_empty();
2311        let mut borrowable = self.reinject_buf.is_empty();
2312        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2313        let mut closed = false;
2314
2315        while let Some(ch) = self.peek_char() {
2316            if simple {
2317                if self.reinject_buf.is_empty() {
2318                    let rest = self.cursor.rest();
2319                    match Self::find_double_quote_special(rest) {
2320                        Some(index) if index > 0 => {
2321                            self.consume_source_bytes(index);
2322                            continue;
2323                        }
2324                        None => {
2325                            self.consume_source_bytes(rest.len());
2326                            return Err(LexerErrorKind::DoubleQuote);
2327                        }
2328                        _ => {}
2329                    }
2330                }
2331
2332                match ch {
2333                    '"' => {
2334                        content_end = self.current_position();
2335                        self.consume_ascii_chars(1); // consume closing "
2336                        closed = true;
2337                        break;
2338                    }
2339                    '\\' | '$' | '`' => {
2340                        simple = false;
2341                        if ch == '`' {
2342                            borrowable = false;
2343                            let capture_end = self.current_position();
2344                            self.ensure_capture_from_source(
2345                                &mut content,
2346                                content_start,
2347                                capture_end,
2348                            );
2349                        }
2350                    }
2351                    _ => {
2352                        self.advance();
2353                    }
2354                }
2355                if simple {
2356                    continue;
2357                }
2358            }
2359
2360            match ch {
2361                '"' => {
2362                    if borrowable {
2363                        content_end = self.current_position();
2364                    }
2365                    self.consume_ascii_chars(1); // consume closing "
2366                    closed = true;
2367                    break;
2368                }
2369                '\\' => {
2370                    let escape_start = self.current_position();
2371                    self.advance();
2372                    if let Some(next) = self.peek_char() {
2373                        match next {
2374                            '\n' => {
2375                                borrowable = false;
2376                                self.ensure_capture_from_source(
2377                                    &mut content,
2378                                    content_start,
2379                                    escape_start,
2380                                );
2381                                self.advance();
2382                            }
2383                            '$' => {
2384                                borrowable = false;
2385                                self.ensure_capture_from_source(
2386                                    &mut content,
2387                                    content_start,
2388                                    escape_start,
2389                                );
2390                                Self::push_capture_char(&mut content, '\x00');
2391                                Self::push_capture_char(&mut content, '$');
2392                                self.advance();
2393                            }
2394                            '"' | '\\' | '`' => {
2395                                borrowable = false;
2396                                self.ensure_capture_from_source(
2397                                    &mut content,
2398                                    content_start,
2399                                    escape_start,
2400                                );
2401                                if next == '\\' {
2402                                    Self::push_capture_char(&mut content, '\x00');
2403                                }
2404                                if next == '`' {
2405                                    Self::push_capture_char(&mut content, '\x00');
2406                                }
2407                                Self::push_capture_char(&mut content, next);
2408                                self.advance();
2409                                content_end = self.current_position();
2410                            }
2411                            _ => {
2412                                Self::push_capture_char(&mut content, '\\');
2413                                Self::push_capture_char(&mut content, next);
2414                                self.advance();
2415                                content_end = self.current_position();
2416                            }
2417                        }
2418                    }
2419                }
2420                '$' => {
2421                    Self::push_capture_char(&mut content, '$');
2422                    self.advance();
2423                    if self.peek_char() == Some('(') {
2424                        if self.second_char() == Some('(') {
2425                            self.read_arithmetic_expansion_into(&mut content);
2426                        } else {
2427                            Self::push_capture_char(&mut content, '(');
2428                            self.advance();
2429                            self.read_command_subst_into(&mut content);
2430                        }
2431                    } else if self.peek_char() == Some('{') {
2432                        Self::push_capture_char(&mut content, '{');
2433                        self.advance();
2434                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2435                    }
2436                    content_end = self.current_position();
2437                }
2438                '`' => {
2439                    borrowable = false;
2440                    let capture_end = self.current_position();
2441                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2442                    Self::push_capture_char(&mut content, '`');
2443                    self.advance(); // consume opening `
2444                    while let Some(c) = self.peek_char() {
2445                        Self::push_capture_char(&mut content, c);
2446                        self.advance();
2447                        if c == '`' {
2448                            break;
2449                        }
2450                        if c == '\\'
2451                            && let Some(next) = self.peek_char()
2452                        {
2453                            Self::push_capture_char(&mut content, next);
2454                            self.advance();
2455                        }
2456                    }
2457                    content_end = self.current_position();
2458                }
2459                _ => {
2460                    Self::push_capture_char(&mut content, ch);
2461                    self.advance();
2462                    content_end = self.current_position();
2463                }
2464            }
2465        }
2466
2467        if !closed {
2468            return Err(LexerErrorKind::DoubleQuote);
2469        }
2470
2471        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2472        let content_span = Some(Span::from_positions(content_start, content_end));
2473
2474        if borrowable {
2475            Ok(LexedWordSegment::borrowed_with_spans(
2476                if dollar {
2477                    LexedWordSegmentKind::DollarDoubleQuoted
2478                } else {
2479                    LexedWordSegmentKind::DoubleQuoted
2480                },
2481                &self.input[content_start.offset..content_end.offset],
2482                content_span,
2483                wrapper_span,
2484            ))
2485        } else {
2486            Ok(LexedWordSegment::owned_with_spans(
2487                if dollar {
2488                    LexedWordSegmentKind::DollarDoubleQuoted
2489                } else {
2490                    LexedWordSegmentKind::DoubleQuoted
2491                },
2492                content.unwrap_or_default(),
2493                content_span,
2494                wrapper_span,
2495            ))
2496        }
2497    }
2498
2499    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2500        debug_assert_eq!(self.peek_char(), Some('('));
2501        debug_assert_eq!(self.second_char(), Some('('));
2502
2503        Self::push_capture_char(content, '(');
2504        self.advance();
2505        Self::push_capture_char(content, '(');
2506        self.advance();
2507
2508        let mut depth = 2;
2509        while let Some(c) = self.peek_char() {
2510            match c {
2511                '\\' => {
2512                    Self::push_capture_char(content, c);
2513                    self.advance();
2514                    if let Some(next) = self.peek_char() {
2515                        Self::push_capture_char(content, next);
2516                        self.advance();
2517                    }
2518                }
2519                '\'' => {
2520                    Self::push_capture_char(content, c);
2521                    self.advance();
2522                    while let Some(quoted) = self.peek_char() {
2523                        Self::push_capture_char(content, quoted);
2524                        self.advance();
2525                        if quoted == '\'' {
2526                            break;
2527                        }
2528                    }
2529                }
2530                '"' => {
2531                    let mut escaped = false;
2532                    Self::push_capture_char(content, c);
2533                    self.advance();
2534                    while let Some(quoted) = self.peek_char() {
2535                        Self::push_capture_char(content, quoted);
2536                        self.advance();
2537                        if escaped {
2538                            escaped = false;
2539                            continue;
2540                        }
2541                        match quoted {
2542                            '\\' => escaped = true,
2543                            '"' => break,
2544                            _ => {}
2545                        }
2546                    }
2547                }
2548                '`' => {
2549                    let mut escaped = false;
2550                    Self::push_capture_char(content, c);
2551                    self.advance();
2552                    while let Some(quoted) = self.peek_char() {
2553                        Self::push_capture_char(content, quoted);
2554                        self.advance();
2555                        if escaped {
2556                            escaped = false;
2557                            continue;
2558                        }
2559                        match quoted {
2560                            '\\' => escaped = true,
2561                            '`' => break,
2562                            _ => {}
2563                        }
2564                    }
2565                }
2566                '(' => {
2567                    Self::push_capture_char(content, c);
2568                    self.advance();
2569                    depth += 1;
2570                }
2571                ')' => {
2572                    Self::push_capture_char(content, c);
2573                    self.advance();
2574                    depth -= 1;
2575                    if depth == 0 {
2576                        return true;
2577                    }
2578                }
2579                _ => {
2580                    Self::push_capture_char(content, c);
2581                    self.advance();
2582                }
2583            }
2584        }
2585
2586        false
2587    }
2588
2589    fn read_legacy_arithmetic_into(
2590        &mut self,
2591        content: &mut Option<String>,
2592        segment_start: Position,
2593    ) -> bool {
2594        let mut bracket_depth = 1;
2595
2596        while let Some(c) = self.peek_char() {
2597            match c {
2598                '\\' => {
2599                    Self::push_capture_char(content, c);
2600                    self.advance();
2601                    if let Some(next) = self.peek_char() {
2602                        Self::push_capture_char(content, next);
2603                        self.advance();
2604                    }
2605                }
2606                '\'' => {
2607                    Self::push_capture_char(content, c);
2608                    self.advance();
2609                    while let Some(quoted) = self.peek_char() {
2610                        Self::push_capture_char(content, quoted);
2611                        self.advance();
2612                        if quoted == '\'' {
2613                            break;
2614                        }
2615                    }
2616                }
2617                '"' => {
2618                    let mut escaped = false;
2619                    Self::push_capture_char(content, c);
2620                    self.advance();
2621                    while let Some(quoted) = self.peek_char() {
2622                        Self::push_capture_char(content, quoted);
2623                        self.advance();
2624                        if escaped {
2625                            escaped = false;
2626                            continue;
2627                        }
2628                        match quoted {
2629                            '\\' => escaped = true,
2630                            '"' => break,
2631                            _ => {}
2632                        }
2633                    }
2634                }
2635                '`' => {
2636                    let mut escaped = false;
2637                    Self::push_capture_char(content, c);
2638                    self.advance();
2639                    while let Some(quoted) = self.peek_char() {
2640                        Self::push_capture_char(content, quoted);
2641                        self.advance();
2642                        if escaped {
2643                            escaped = false;
2644                            continue;
2645                        }
2646                        match quoted {
2647                            '\\' => escaped = true,
2648                            '`' => break,
2649                            _ => {}
2650                        }
2651                    }
2652                }
2653                '[' => {
2654                    Self::push_capture_char(content, c);
2655                    self.advance();
2656                    bracket_depth += 1;
2657                }
2658                ']' => {
2659                    Self::push_capture_char(content, c);
2660                    self.advance();
2661                    bracket_depth -= 1;
2662                    if bracket_depth == 0 {
2663                        return true;
2664                    }
2665                }
2666                '$' => {
2667                    Self::push_capture_char(content, c);
2668                    self.advance();
2669                    if self.peek_char() == Some('(') {
2670                        if self.second_char() == Some('(') {
2671                            if !self.read_arithmetic_expansion_into(content) {
2672                                return false;
2673                            }
2674                        } else {
2675                            Self::push_capture_char(content, '(');
2676                            self.advance();
2677                            if !self.read_command_subst_into(content) {
2678                                return false;
2679                            }
2680                        }
2681                    } else if self.peek_char() == Some('{') {
2682                        Self::push_capture_char(content, '{');
2683                        self.advance();
2684                        if !self.read_param_expansion_into(content, segment_start) {
2685                            return false;
2686                        }
2687                    } else if self.peek_char() == Some('[') {
2688                        Self::push_capture_char(content, '[');
2689                        self.advance();
2690                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2691                            return false;
2692                        }
2693                    }
2694                }
2695                _ => {
2696                    Self::push_capture_char(content, c);
2697                    self.advance();
2698                }
2699            }
2700        }
2701
2702        false
2703    }
2704
2705    /// Read command substitution content after `$(`, handling nested parens and quotes.
2706    /// Appends chars to `content` and adds the closing `)`.
2707    /// `subst_depth` tracks nesting to prevent stack overflow.
2708    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2709        self.read_command_subst_into_depth(content, 0)
2710    }
2711
2712    fn flush_command_subst_keyword(
2713        current_word: &mut String,
2714        pending_case_headers: &mut usize,
2715        case_clause_depths: &mut SmallVec<[usize; 4]>,
2716        depth: usize,
2717        word_started_at_command_start: &mut bool,
2718    ) {
2719        if current_word.is_empty() {
2720            *word_started_at_command_start = false;
2721            return;
2722        }
2723
2724        match current_word.as_str() {
2725            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2726            "in" if *pending_case_headers > 0 => {
2727                *pending_case_headers -= 1;
2728                case_clause_depths.push(depth);
2729            }
2730            "esac" if *word_started_at_command_start => {
2731                case_clause_depths.pop();
2732            }
2733            _ => {}
2734        }
2735
2736        current_word.clear();
2737        *word_started_at_command_start = false;
2738    }
2739
2740    fn read_command_subst_heredoc_delimiter_into(
2741        &mut self,
2742        content: &mut Option<String>,
2743    ) -> Option<String> {
2744        while let Some(ch) = self.peek_char() {
2745            if !matches!(ch, ' ' | '\t') {
2746                break;
2747            }
2748            Self::push_capture_char(content, ch);
2749            self.advance();
2750        }
2751
2752        let mut cooked = String::new();
2753        let mut in_single = false;
2754        let mut in_double = false;
2755        let mut escaped = false;
2756        let mut saw_any = false;
2757
2758        while let Some(ch) = self.peek_char() {
2759            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2760                break;
2761            }
2762
2763            saw_any = true;
2764            Self::push_capture_char(content, ch);
2765            self.advance();
2766
2767            if escaped {
2768                cooked.push(ch);
2769                escaped = false;
2770                continue;
2771            }
2772
2773            match ch {
2774                '\\' if !in_single => escaped = true,
2775                '\'' if !in_double => in_single = !in_single,
2776                '"' if !in_single => in_double = !in_double,
2777                _ => cooked.push(ch),
2778            }
2779        }
2780
2781        saw_any.then_some(cooked)
2782    }
2783
2784    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2785        Self::push_capture_char(content, '`');
2786        self.advance();
2787        while let Some(ch) = self.peek_char() {
2788            Self::push_capture_char(content, ch);
2789            self.advance();
2790            if ch == '\\' {
2791                if let Some(esc) = self.peek_char() {
2792                    Self::push_capture_char(content, esc);
2793                    self.advance();
2794                }
2795                continue;
2796            }
2797            if ch == '`' {
2798                break;
2799            }
2800        }
2801    }
2802
2803    fn read_command_subst_pending_heredoc_into(
2804        &mut self,
2805        content: &mut Option<String>,
2806        delimiter: &str,
2807        strip_tabs: bool,
2808    ) -> bool {
2809        loop {
2810            let mut line = String::new();
2811            let mut saw_newline = false;
2812
2813            while let Some(ch) = self.peek_char() {
2814                self.advance();
2815                if ch == '\n' {
2816                    saw_newline = true;
2817                    break;
2818                }
2819                line.push(ch);
2820            }
2821
2822            Self::push_capture_str(content, &line);
2823            if saw_newline {
2824                Self::push_capture_char(content, '\n');
2825            }
2826
2827            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2828                return true;
2829            }
2830        }
2831    }
2832
2833    fn read_command_subst_into_depth(
2834        &mut self,
2835        content: &mut Option<String>,
2836        subst_depth: usize,
2837    ) -> bool {
2838        if subst_depth >= self.max_subst_depth {
2839            // Depth limit exceeded — consume until matching ')' and emit error token
2840            let mut depth = 1;
2841            while let Some(c) = self.peek_char() {
2842                self.advance();
2843                match c {
2844                    '(' => depth += 1,
2845                    ')' => {
2846                        depth -= 1;
2847                        if depth == 0 {
2848                            Self::push_capture_char(content, ')');
2849                            return true;
2850                        }
2851                    }
2852                    _ => {}
2853                }
2854            }
2855            return false;
2856        }
2857
2858        let mut depth = 1;
2859        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2860        let mut pending_case_headers = 0usize;
2861        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2862        let mut current_word = String::with_capacity(16);
2863        let mut at_command_start = true;
2864        let mut expecting_redirection_target = false;
2865        let mut current_word_started_at_command_start = false;
2866        while let Some(c) = self.peek_char() {
2867            match c {
2868                '#' if !self.should_treat_hash_as_word_char() => {
2869                    let had_word = !current_word.is_empty();
2870                    Self::flush_command_subst_keyword(
2871                        &mut current_word,
2872                        &mut pending_case_headers,
2873                        &mut case_clause_depths,
2874                        depth,
2875                        &mut current_word_started_at_command_start,
2876                    );
2877                    if had_word && expecting_redirection_target {
2878                        expecting_redirection_target = false;
2879                    }
2880                    Self::push_capture_char(content, '#');
2881                    self.advance();
2882                    while let Some(comment_ch) = self.peek_char() {
2883                        Self::push_capture_char(content, comment_ch);
2884                        self.advance();
2885                        if comment_ch == '\n' {
2886                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2887                                if !self.read_command_subst_pending_heredoc_into(
2888                                    content, &delimiter, strip_tabs,
2889                                ) {
2890                                    return false;
2891                                }
2892                            }
2893                            at_command_start = true;
2894                            expecting_redirection_target = false;
2895                            break;
2896                        }
2897                    }
2898                }
2899                '(' => {
2900                    Self::flush_command_subst_keyword(
2901                        &mut current_word,
2902                        &mut pending_case_headers,
2903                        &mut case_clause_depths,
2904                        depth,
2905                        &mut current_word_started_at_command_start,
2906                    );
2907                    depth += 1;
2908                    Self::push_capture_char(content, c);
2909                    self.advance();
2910                    at_command_start = true;
2911                    expecting_redirection_target = false;
2912                }
2913                ')' => {
2914                    Self::flush_command_subst_keyword(
2915                        &mut current_word,
2916                        &mut pending_case_headers,
2917                        &mut case_clause_depths,
2918                        depth,
2919                        &mut current_word_started_at_command_start,
2920                    );
2921                    if case_clause_depths
2922                        .last()
2923                        .is_some_and(|case_depth| *case_depth == depth)
2924                    {
2925                        Self::push_capture_char(content, ')');
2926                        self.advance();
2927                        at_command_start = true;
2928                        expecting_redirection_target = false;
2929                        continue;
2930                    }
2931                    depth -= 1;
2932                    self.advance();
2933                    if depth == 0 {
2934                        Self::push_capture_char(content, ')');
2935                        return true;
2936                    }
2937                    Self::push_capture_char(content, c);
2938                    at_command_start = false;
2939                    expecting_redirection_target = false;
2940                }
2941                '"' => {
2942                    let had_word = !current_word.is_empty();
2943                    Self::flush_command_subst_keyword(
2944                        &mut current_word,
2945                        &mut pending_case_headers,
2946                        &mut case_clause_depths,
2947                        depth,
2948                        &mut current_word_started_at_command_start,
2949                    );
2950                    if had_word && expecting_redirection_target {
2951                        expecting_redirection_target = false;
2952                    }
2953                    // Nested double-quoted string inside $()
2954                    Self::push_capture_char(content, '"');
2955                    self.advance();
2956                    while let Some(qc) = self.peek_char() {
2957                        match qc {
2958                            '"' => {
2959                                Self::push_capture_char(content, '"');
2960                                self.advance();
2961                                break;
2962                            }
2963                            '\\' => {
2964                                Self::push_capture_char(content, '\\');
2965                                self.advance();
2966                                if let Some(esc) = self.peek_char() {
2967                                    Self::push_capture_char(content, esc);
2968                                    self.advance();
2969                                }
2970                            }
2971                            '$' => {
2972                                Self::push_capture_char(content, '$');
2973                                self.advance();
2974                                if self.peek_char() == Some('(') {
2975                                    if self.second_char() == Some('(') {
2976                                        if !self.read_arithmetic_expansion_into(content) {
2977                                            return false;
2978                                        }
2979                                    } else {
2980                                        Self::push_capture_char(content, '(');
2981                                        self.advance();
2982                                        if !self
2983                                            .read_command_subst_into_depth(content, subst_depth + 1)
2984                                        {
2985                                            return false;
2986                                        }
2987                                    }
2988                                }
2989                            }
2990                            _ => {
2991                                Self::push_capture_char(content, qc);
2992                                self.advance();
2993                            }
2994                        }
2995                    }
2996                    if expecting_redirection_target {
2997                        expecting_redirection_target = false;
2998                    } else {
2999                        at_command_start = false;
3000                    }
3001                }
3002                '\'' => {
3003                    let had_word = !current_word.is_empty();
3004                    Self::flush_command_subst_keyword(
3005                        &mut current_word,
3006                        &mut pending_case_headers,
3007                        &mut case_clause_depths,
3008                        depth,
3009                        &mut current_word_started_at_command_start,
3010                    );
3011                    if had_word && expecting_redirection_target {
3012                        expecting_redirection_target = false;
3013                    }
3014                    // Single-quoted string inside $()
3015                    Self::push_capture_char(content, '\'');
3016                    self.advance();
3017                    while let Some(qc) = self.peek_char() {
3018                        Self::push_capture_char(content, qc);
3019                        self.advance();
3020                        if qc == '\'' {
3021                            break;
3022                        }
3023                    }
3024                    if expecting_redirection_target {
3025                        expecting_redirection_target = false;
3026                    } else {
3027                        at_command_start = false;
3028                    }
3029                }
3030                '`' => {
3031                    let had_word = !current_word.is_empty();
3032                    Self::flush_command_subst_keyword(
3033                        &mut current_word,
3034                        &mut pending_case_headers,
3035                        &mut case_clause_depths,
3036                        depth,
3037                        &mut current_word_started_at_command_start,
3038                    );
3039                    if had_word && expecting_redirection_target {
3040                        expecting_redirection_target = false;
3041                    }
3042                    self.read_command_subst_backtick_segment_into(content);
3043                    if expecting_redirection_target {
3044                        expecting_redirection_target = false;
3045                    } else {
3046                        at_command_start = false;
3047                    }
3048                }
3049                '$' if self.second_char() == Some('\'') => {
3050                    let had_word = !current_word.is_empty();
3051                    Self::flush_command_subst_keyword(
3052                        &mut current_word,
3053                        &mut pending_case_headers,
3054                        &mut case_clause_depths,
3055                        depth,
3056                        &mut current_word_started_at_command_start,
3057                    );
3058                    if had_word && expecting_redirection_target {
3059                        expecting_redirection_target = false;
3060                    }
3061                    Self::push_capture_char(content, '$');
3062                    self.advance();
3063                    Self::push_capture_char(content, '\'');
3064                    self.advance();
3065                    while let Some(qc) = self.peek_char() {
3066                        Self::push_capture_char(content, qc);
3067                        self.advance();
3068                        if qc == '\\' {
3069                            if let Some(esc) = self.peek_char() {
3070                                Self::push_capture_char(content, esc);
3071                                self.advance();
3072                            }
3073                            continue;
3074                        }
3075                        if qc == '\'' {
3076                            break;
3077                        }
3078                    }
3079                    if expecting_redirection_target {
3080                        expecting_redirection_target = false;
3081                    } else {
3082                        at_command_start = false;
3083                    }
3084                }
3085                '\\' => {
3086                    let had_word = !current_word.is_empty();
3087                    Self::flush_command_subst_keyword(
3088                        &mut current_word,
3089                        &mut pending_case_headers,
3090                        &mut case_clause_depths,
3091                        depth,
3092                        &mut current_word_started_at_command_start,
3093                    );
3094                    if had_word && expecting_redirection_target {
3095                        expecting_redirection_target = false;
3096                    }
3097                    Self::push_capture_char(content, '\\');
3098                    self.advance();
3099                    if let Some(esc) = self.peek_char() {
3100                        Self::push_capture_char(content, esc);
3101                        self.advance();
3102                    }
3103                    if expecting_redirection_target {
3104                        expecting_redirection_target = false;
3105                    } else {
3106                        at_command_start = false;
3107                    }
3108                }
3109                '<' if self.second_char() == Some('<') => {
3110                    let word_was_redirection_fd = current_word_started_at_command_start
3111                        && !current_word.is_empty()
3112                        && current_word.chars().all(|current| current.is_ascii_digit());
3113                    Self::flush_command_subst_keyword(
3114                        &mut current_word,
3115                        &mut pending_case_headers,
3116                        &mut case_clause_depths,
3117                        depth,
3118                        &mut current_word_started_at_command_start,
3119                    );
3120                    if word_was_redirection_fd {
3121                        at_command_start = true;
3122                    }
3123
3124                    Self::push_capture_char(content, '<');
3125                    self.advance();
3126                    Self::push_capture_char(content, '<');
3127                    self.advance();
3128
3129                    if self.peek_char() == Some('<') {
3130                        Self::push_capture_char(content, '<');
3131                        self.advance();
3132                        expecting_redirection_target = true;
3133                        continue;
3134                    }
3135
3136                    let strip_tabs = if self.peek_char() == Some('-') {
3137                        Self::push_capture_char(content, '-');
3138                        self.advance();
3139                        true
3140                    } else {
3141                        false
3142                    };
3143
3144                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3145                    {
3146                        pending_heredocs.push((delimiter, strip_tabs));
3147                        expecting_redirection_target = false;
3148                    } else {
3149                        expecting_redirection_target = true;
3150                    }
3151                }
3152                '>' | '<' => {
3153                    let word_was_redirection_fd = current_word_started_at_command_start
3154                        && !current_word.is_empty()
3155                        && current_word.chars().all(|current| current.is_ascii_digit());
3156                    Self::flush_command_subst_keyword(
3157                        &mut current_word,
3158                        &mut pending_case_headers,
3159                        &mut case_clause_depths,
3160                        depth,
3161                        &mut current_word_started_at_command_start,
3162                    );
3163                    if word_was_redirection_fd {
3164                        at_command_start = true;
3165                    }
3166                    Self::push_capture_char(content, c);
3167                    self.advance();
3168                    expecting_redirection_target = true;
3169                }
3170                '\n' => {
3171                    Self::flush_command_subst_keyword(
3172                        &mut current_word,
3173                        &mut pending_case_headers,
3174                        &mut case_clause_depths,
3175                        depth,
3176                        &mut current_word_started_at_command_start,
3177                    );
3178                    Self::push_capture_char(content, '\n');
3179                    self.advance();
3180                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3181                        if !self.read_command_subst_pending_heredoc_into(
3182                            content, &delimiter, strip_tabs,
3183                        ) {
3184                            return false;
3185                        }
3186                    }
3187                    at_command_start = true;
3188                    expecting_redirection_target = false;
3189                }
3190                _ => {
3191                    if c.is_ascii_alphanumeric() || c == '_' {
3192                        if current_word.is_empty()
3193                            && !expecting_redirection_target
3194                            && at_command_start
3195                        {
3196                            current_word_started_at_command_start = true;
3197                            at_command_start = false;
3198                        }
3199                        current_word.push(c);
3200                    } else {
3201                        let had_word = !current_word.is_empty();
3202                        Self::flush_command_subst_keyword(
3203                            &mut current_word,
3204                            &mut pending_case_headers,
3205                            &mut case_clause_depths,
3206                            depth,
3207                            &mut current_word_started_at_command_start,
3208                        );
3209                        if had_word && expecting_redirection_target {
3210                            expecting_redirection_target = false;
3211                        }
3212                        match c {
3213                            ' ' | '\t' => {}
3214                            ';' | '|' | '&' => {
3215                                at_command_start = true;
3216                                expecting_redirection_target = false;
3217                            }
3218                            _ => {
3219                                if !expecting_redirection_target {
3220                                    at_command_start = false;
3221                                }
3222                            }
3223                        }
3224                    }
3225                    Self::push_capture_char(content, c);
3226                    self.advance();
3227                }
3228            }
3229        }
3230
3231        false
3232    }
3233
3234    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3235    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3236    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3237    fn read_param_expansion_into(
3238        &mut self,
3239        content: &mut Option<String>,
3240        segment_start: Position,
3241    ) -> bool {
3242        let mut borrowable = true;
3243        let mut depth = 1;
3244        let mut literal_brace_depth = 0usize;
3245        let mut in_single = false;
3246        let mut in_double = false;
3247        let mut double_quote_depth = 0usize;
3248        while let Some(c) = self.peek_char() {
3249            if in_single {
3250                match c {
3251                    '\\' => {
3252                        let escape_start = self.current_position();
3253                        if self.second_char() == Some('"') {
3254                            self.advance();
3255                            borrowable = false;
3256                            self.ensure_capture_from_source(content, segment_start, escape_start);
3257                            Self::push_capture_char(content, '"');
3258                            self.advance();
3259                        } else {
3260                            Self::push_capture_char(content, '\\');
3261                            self.advance();
3262                        }
3263                    }
3264                    '\'' => {
3265                        Self::push_capture_char(content, c);
3266                        self.advance();
3267                        in_single = false;
3268                    }
3269                    _ => {
3270                        Self::push_capture_char(content, c);
3271                        self.advance();
3272                    }
3273                }
3274                continue;
3275            }
3276
3277            match c {
3278                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3279                    self.advance();
3280                    Self::push_capture_char(content, '}');
3281                    if depth == 1
3282                        && literal_brace_depth > 0
3283                        && self.has_later_top_level_param_expansion_closer(depth)
3284                    {
3285                        literal_brace_depth -= 1;
3286                        continue;
3287                    }
3288                    depth -= 1;
3289                    if depth == 0 {
3290                        break;
3291                    }
3292                }
3293                '{' if !in_single && !in_double => {
3294                    literal_brace_depth += 1;
3295                    Self::push_capture_char(content, '{');
3296                    self.advance();
3297                }
3298                '"' => {
3299                    // Quotes inside ${...} are part of the expansion, not string delimiters
3300                    Self::push_capture_char(content, '"');
3301                    self.advance();
3302                    in_double = !in_double;
3303                    double_quote_depth = if in_double { depth } else { 0 };
3304                }
3305                '\'' => {
3306                    Self::push_capture_char(content, '\'');
3307                    self.advance();
3308                    if !in_double {
3309                        in_single = true;
3310                    }
3311                }
3312                '\\' => {
3313                    // Inside ${...} within double quotes, same escape rules apply:
3314                    // \", \\, \$, \` produce the escaped char; others keep backslash
3315                    let escape_start = self.current_position();
3316                    self.advance();
3317                    if let Some(esc) = self.peek_char() {
3318                        match esc {
3319                            '$' => {
3320                                borrowable = false;
3321                                self.ensure_capture_from_source(
3322                                    content,
3323                                    segment_start,
3324                                    escape_start,
3325                                );
3326                                Self::push_capture_char(content, '\x00');
3327                                Self::push_capture_char(content, '$');
3328                                self.advance();
3329                            }
3330                            '"' | '\\' | '`' => {
3331                                borrowable = false;
3332                                self.ensure_capture_from_source(
3333                                    content,
3334                                    segment_start,
3335                                    escape_start,
3336                                );
3337                                Self::push_capture_char(content, esc);
3338                                self.advance();
3339                            }
3340                            '}' => {
3341                                // \} should be a literal } without closing the expansion
3342                                Self::push_capture_char(content, '\\');
3343                                Self::push_capture_char(content, '}');
3344                                self.advance();
3345                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3346                            }
3347                            _ => {
3348                                Self::push_capture_char(content, '\\');
3349                                Self::push_capture_char(content, esc);
3350                                self.advance();
3351                            }
3352                        }
3353                    } else {
3354                        Self::push_capture_char(content, '\\');
3355                    }
3356                }
3357                '$' => {
3358                    Self::push_capture_char(content, '$');
3359                    self.advance();
3360                    if self.peek_char() == Some('(') {
3361                        if self.second_char() == Some('(') {
3362                            if !self.read_arithmetic_expansion_into(content) {
3363                                borrowable = false;
3364                            }
3365                        } else {
3366                            Self::push_capture_char(content, '(');
3367                            self.advance();
3368                            self.read_command_subst_into(content);
3369                        }
3370                    } else if self.peek_char() == Some('{') {
3371                        Self::push_capture_char(content, '{');
3372                        self.advance();
3373                        borrowable &= self.read_param_expansion_into(content, segment_start);
3374                    }
3375                }
3376                _ => {
3377                    Self::push_capture_char(content, c);
3378                    self.advance();
3379                }
3380            }
3381        }
3382        borrowable
3383    }
3384
3385    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3386        let mut chars = self.lookahead_chars().peekable();
3387        let mut depth = target_depth;
3388        let mut in_single = false;
3389        let mut in_double = false;
3390        let mut double_quote_depth = 0usize;
3391
3392        while let Some(ch) = chars.next() {
3393            if in_single {
3394                match ch {
3395                    '\'' => in_single = false,
3396                    '\\' if chars.peek() == Some(&'"') => {
3397                        chars.next();
3398                    }
3399                    '\\' => {}
3400                    _ => {}
3401                }
3402                continue;
3403            }
3404
3405            if in_double {
3406                match ch {
3407                    '"' => {
3408                        in_double = false;
3409                        double_quote_depth = 0;
3410                    }
3411                    '\\' => {
3412                        chars.next();
3413                    }
3414                    '$' if chars.peek() == Some(&'{') => {
3415                        chars.next();
3416                        depth += 1;
3417                    }
3418                    '}' if depth > double_quote_depth => {
3419                        depth -= 1;
3420                    }
3421                    _ => {}
3422                }
3423                continue;
3424            }
3425
3426            match ch {
3427                '\n' if depth == target_depth => return false,
3428                '\'' => in_single = true,
3429                '"' => {
3430                    in_double = true;
3431                    double_quote_depth = depth;
3432                }
3433                '\\' => {
3434                    chars.next();
3435                }
3436                '$' if chars.peek() == Some(&'{') => {
3437                    chars.next();
3438                    depth += 1;
3439                }
3440                '}' => {
3441                    if depth == target_depth {
3442                        return true;
3443                    }
3444                    depth -= 1;
3445                }
3446                _ => {}
3447            }
3448        }
3449
3450        false
3451    }
3452
3453    /// Check if the content starting with { looks like a brace expansion
3454    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3455    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3456    /// Caps lookahead to prevent O(n^2) scanning when input
3457    /// contains many unmatched `{` characters (issue #997).
3458    fn looks_like_brace_expansion(&self) -> bool {
3459        const MAX_LOOKAHEAD: usize = 10_000;
3460
3461        let mut chars = self.lookahead_chars();
3462
3463        // Skip the opening {
3464        if chars.next() != Some('{') {
3465            return false;
3466        }
3467
3468        let mut depth = 1;
3469        let mut paren_depth = 0usize;
3470        let mut has_comma = false;
3471        let mut has_dot_dot = false;
3472        let mut escaped = false;
3473        let mut in_single = false;
3474        let mut in_double = false;
3475        let mut in_backtick = false;
3476        let mut prev_char = None;
3477        let mut scanned = 0usize;
3478
3479        for ch in chars {
3480            scanned += 1;
3481            if scanned > MAX_LOOKAHEAD {
3482                return false;
3483            }
3484
3485            let brace_surface_active = !in_single && !in_double && !in_backtick;
3486            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3487
3488            match ch {
3489                _ if escaped => {
3490                    escaped = false;
3491                }
3492                '\\' if !in_single => escaped = true,
3493                '\'' if !in_double && !in_backtick => in_single = !in_single,
3494                '"' if !in_single && !in_backtick => in_double = !in_double,
3495                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3496                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3497                    paren_depth += 1
3498                }
3499                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3500                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3501                '}' if !in_single && !in_double && !in_backtick => {
3502                    depth -= 1;
3503                    if depth == 0 {
3504                        // Found matching }, check if we have brace expansion markers
3505                        return has_comma || has_dot_dot;
3506                    }
3507                }
3508                ',' if at_top_level => has_comma = true,
3509                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3510                // Brace groups have whitespace/newlines/semicolons at depth 1
3511                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3512                _ => {}
3513            }
3514            prev_char = Some(ch);
3515        }
3516
3517        false
3518    }
3519
3520    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3521        let mut brace_depth = 1usize;
3522        let mut paren_depth = 0usize;
3523        let mut escaped = false;
3524        let mut in_single = false;
3525        let mut in_double = false;
3526        let mut in_backtick = false;
3527        let mut prev_char = None;
3528
3529        while let Some(ch) = self.peek_char() {
3530            Self::push_capture_char(word, ch);
3531            self.advance();
3532
3533            if escaped {
3534                escaped = false;
3535                prev_char = Some(ch);
3536                continue;
3537            }
3538
3539            match ch {
3540                '\\' if !in_single => escaped = true,
3541                '\'' if !in_double && !in_backtick => in_single = !in_single,
3542                '"' if !in_single && !in_backtick => in_double = !in_double,
3543                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3544                '(' if !in_single
3545                    && !in_double
3546                    && !in_backtick
3547                    && (paren_depth > 0 || prev_char == Some('$')) =>
3548                {
3549                    paren_depth += 1
3550                }
3551                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3552                    paren_depth -= 1
3553                }
3554                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3555                '}' if !in_single && !in_double && !in_backtick => {
3556                    brace_depth -= 1;
3557                    if brace_depth == 0 {
3558                        break;
3559                    }
3560                }
3561                _ => {}
3562            }
3563
3564            prev_char = Some(ch);
3565        }
3566    }
3567
3568    fn consume_brace_word_body(&mut self, word: &mut String) {
3569        let mut brace_depth = 1usize;
3570        let mut paren_depth = 0usize;
3571        let mut escaped = false;
3572        let mut in_single = false;
3573        let mut in_double = false;
3574        let mut in_backtick = false;
3575        let mut prev_char = None;
3576
3577        while let Some(ch) = self.peek_char() {
3578            word.push(ch);
3579            self.advance();
3580
3581            if escaped {
3582                escaped = false;
3583                prev_char = Some(ch);
3584                continue;
3585            }
3586
3587            match ch {
3588                '\\' if !in_single => escaped = true,
3589                '\'' if !in_double && !in_backtick => in_single = !in_single,
3590                '"' if !in_single && !in_backtick => in_double = !in_double,
3591                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3592                '(' if !in_single
3593                    && !in_double
3594                    && !in_backtick
3595                    && (paren_depth > 0 || prev_char == Some('$')) =>
3596                {
3597                    paren_depth += 1
3598                }
3599                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3600                    paren_depth -= 1
3601                }
3602                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3603                '}' if !in_single && !in_double && !in_backtick => {
3604                    brace_depth -= 1;
3605                    if brace_depth == 0 {
3606                        break;
3607                    }
3608                }
3609                _ => {}
3610            }
3611
3612            prev_char = Some(ch);
3613        }
3614    }
3615
3616    /// Check whether a mid-word `{...}` segment can stay attached to the current
3617    /// word without crossing a top-level word boundary.
3618    fn looks_like_mid_word_brace_segment(&self) -> bool {
3619        const MAX_LOOKAHEAD: usize = 10_000;
3620
3621        let mut chars = self.lookahead_chars();
3622        if chars.next() != Some('{') {
3623            return false;
3624        }
3625
3626        let mut brace_depth = 1;
3627        let mut paren_depth = 0usize;
3628        let mut escaped = false;
3629        let mut in_single = false;
3630        let mut in_double = false;
3631        let mut in_backtick = false;
3632        let mut prev_char = None;
3633        let mut scanned = 0usize;
3634
3635        for ch in chars {
3636            scanned += 1;
3637            if scanned > MAX_LOOKAHEAD {
3638                return false;
3639            }
3640
3641            if !in_single
3642                && !in_double
3643                && !in_backtick
3644                && !escaped
3645                && brace_depth == 1
3646                && paren_depth == 0
3647                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3648            {
3649                return false;
3650            }
3651
3652            if escaped {
3653                escaped = false;
3654                prev_char = Some(ch);
3655                continue;
3656            }
3657
3658            match ch {
3659                '\\' => escaped = true,
3660                '\'' if !in_double && !in_backtick => in_single = !in_single,
3661                '"' if !in_single && !in_backtick => in_double = !in_double,
3662                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3663                '(' if !in_single
3664                    && !in_double
3665                    && !in_backtick
3666                    && (paren_depth > 0 || prev_char == Some('$')) =>
3667                {
3668                    paren_depth += 1
3669                }
3670                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3671                    paren_depth -= 1
3672                }
3673                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3674                '}' if !in_single && !in_double && !in_backtick => {
3675                    brace_depth -= 1;
3676                    if brace_depth == 0 {
3677                        return true;
3678                    }
3679                }
3680                _ => {}
3681            }
3682
3683            prev_char = Some(ch);
3684        }
3685
3686        false
3687    }
3688
3689    /// Check if { is followed by whitespace (brace group start)
3690    fn is_brace_group_start(&self) -> bool {
3691        let mut chars = self.lookahead_chars();
3692        // Skip the opening {
3693        if chars.next() != Some('{') {
3694            return false;
3695        }
3696        // If next char is whitespace or newline, it's a brace group
3697        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3698    }
3699
3700    /// Check whether the text after an escaped `{` looks like a brace-expansion
3701    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3702    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3703        const MAX_LOOKAHEAD: usize = 10_000;
3704
3705        let mut chars = self.lookahead_chars();
3706        let mut depth = 1;
3707        let mut has_comma = false;
3708        let mut has_dot_dot = false;
3709        let mut prev_char = None;
3710        let mut scanned = 0usize;
3711
3712        for ch in chars.by_ref() {
3713            scanned += 1;
3714            if scanned > MAX_LOOKAHEAD {
3715                return false;
3716            }
3717            match ch {
3718                '{' => depth += 1,
3719                '}' => {
3720                    depth -= 1;
3721                    if depth == 0 {
3722                        return has_comma || has_dot_dot;
3723                    }
3724                }
3725                ',' if depth == 1 => has_comma = true,
3726                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3727                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3728                _ => {}
3729            }
3730            prev_char = Some(ch);
3731        }
3732
3733        false
3734    }
3735
3736    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3737        let mut chars = self.lookahead_chars();
3738        if chars.next() != Some('{') {
3739            return false;
3740        }
3741        chars.next() == Some(')')
3742    }
3743
3744    /// Read a {literal} pattern without comma/dot-dot as a word
3745    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3746        let mut word = String::with_capacity(16);
3747
3748        if let Some('{') = self.peek_char() {
3749            word.push('{');
3750            self.advance();
3751        } else {
3752            return None;
3753        }
3754
3755        self.consume_brace_word_body(&mut word);
3756
3757        while let Some(ch) = self.peek_char() {
3758            if Self::is_word_char(ch) {
3759                if self.reinject_buf.is_empty() {
3760                    let chunk = self.cursor.eat_while(Self::is_word_char);
3761                    word.push_str(chunk);
3762                    self.advance_scanned_source_bytes(chunk.len());
3763                } else {
3764                    word.push(ch);
3765                    self.advance();
3766                }
3767            } else {
3768                break;
3769            }
3770        }
3771
3772        Some(LexedToken::owned_word(TokenKind::Word, word))
3773    }
3774
3775    /// Read a brace expansion pattern as a word
3776    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3777        let mut word = String::with_capacity(16);
3778
3779        // Read the opening {
3780        if let Some('{') = self.peek_char() {
3781            word.push('{');
3782            self.advance();
3783        } else {
3784            return None;
3785        }
3786
3787        // Read until matching }
3788        self.consume_brace_word_body(&mut word);
3789
3790        // Continue reading any suffix after the brace pattern
3791        while let Some(ch) = self.peek_char() {
3792            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3793                if ch == '{' {
3794                    // Another brace pattern - include it
3795                    word.push(ch);
3796                    self.advance();
3797                    self.consume_brace_word_body(&mut word);
3798                } else {
3799                    word.push(ch);
3800                    self.advance();
3801                }
3802            } else {
3803                break;
3804            }
3805        }
3806
3807        Some(LexedToken::owned_word(TokenKind::Word, word))
3808    }
3809
3810    /// Peek ahead (without consuming) to see if `=(` starts an associative
3811    /// compound assignment like `([key]=val ...)`.  Returns true when the
3812    /// first non-whitespace char after `(` is `[`.
3813    fn looks_like_assoc_assign(&self) -> bool {
3814        let mut chars = self.lookahead_chars();
3815        // Skip the `(` we haven't consumed yet
3816        if chars.next() != Some('(') {
3817            return false;
3818        }
3819        // Skip optional whitespace
3820        for ch in chars {
3821            match ch {
3822                ' ' | '\t' => continue,
3823                '[' => return true,
3824                _ => return false,
3825            }
3826        }
3827        false
3828    }
3829
3830    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3831        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3832    }
3833
3834    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3835        word.segments().any(|segment| {
3836            matches!(
3837                segment.kind(),
3838                LexedWordSegmentKind::SingleQuoted
3839                    | LexedWordSegmentKind::DollarSingleQuoted
3840                    | LexedWordSegmentKind::DoubleQuoted
3841                    | LexedWordSegmentKind::DollarDoubleQuoted
3842            )
3843        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3844    }
3845
3846    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3847        text.contains(['*', '?'])
3848            || text.ends_with('}') && text.contains("${")
3849            || text.ends_with(']')
3850                && text
3851                    .rfind('[')
3852                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3853    }
3854
3855    fn is_word_char(ch: char) -> bool {
3856        !matches!(
3857            ch,
3858            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3859        )
3860    }
3861
3862    const fn is_ascii_word_byte(byte: u8) -> bool {
3863        !matches!(
3864            byte,
3865            b' ' | b'\t'
3866                | b'\n'
3867                | b';'
3868                | b'|'
3869                | b'&'
3870                | b'>'
3871                | b'<'
3872                | b'('
3873                | b')'
3874                | b'{'
3875                | b'}'
3876                | b'\''
3877                | b'"'
3878        )
3879    }
3880
3881    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3882        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3883    }
3884
3885    fn is_plain_word_char(ch: char) -> bool {
3886        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3887    }
3888
3889    /// Read here document content until the delimiter line is found
3890    pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3891        let mut content = String::with_capacity(64);
3892        let mut current_line = String::with_capacity(64);
3893
3894        // Save rest of current line (after the delimiter token on the command line).
3895        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3896        // tokenize the pipe and subsequent command after the heredoc body.
3897        //
3898        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3899        // so we track quoting state and continue across newlines until quotes close.
3900        let mut rest_of_line = String::with_capacity(32);
3901        let rest_of_line_start = self.current_position();
3902        let mut in_double_quote = false;
3903        let mut in_single_quote = false;
3904        let mut in_comment = false;
3905        let mut saw_non_whitespace_tail = false;
3906        let mut consecutive_backslashes = 0usize;
3907        let mut previous_tail_char = None;
3908        while let Some(ch) = self.peek_char() {
3909            self.advance();
3910            if in_comment {
3911                if ch == '\n' {
3912                    break;
3913                }
3914                rest_of_line.push(ch);
3915                previous_tail_char = Some(ch);
3916                continue;
3917            }
3918            if ch == '#'
3919                && !in_single_quote
3920                && !in_double_quote
3921                && self.comments_enabled()
3922                && heredoc_tail_hash_starts_comment(previous_tail_char)
3923            {
3924                in_comment = true;
3925                rest_of_line.push(ch);
3926                previous_tail_char = Some(ch);
3927                consecutive_backslashes = 0;
3928                continue;
3929            }
3930            let backslash_continues_line = ch == '\\'
3931                && !in_single_quote
3932                && self.peek_char() == Some('\n')
3933                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3934                && consecutive_backslashes.is_multiple_of(2);
3935            if backslash_continues_line {
3936                rest_of_line.push(ch);
3937                rest_of_line.push('\n');
3938                self.advance();
3939                consecutive_backslashes = 0;
3940                continue;
3941            }
3942            if ch == '\n' && !in_double_quote && !in_single_quote {
3943                break;
3944            }
3945            if ch == '"' && !in_single_quote {
3946                in_double_quote = !in_double_quote;
3947            } else if ch == '\'' && !in_double_quote {
3948                in_single_quote = !in_single_quote;
3949            } else if ch == '\\' && in_double_quote {
3950                // Escaped char inside double quotes — skip the next char too
3951                rest_of_line.push(ch);
3952                if let Some(next) = self.peek_char() {
3953                    rest_of_line.push(next);
3954                    self.advance();
3955                }
3956                continue;
3957            }
3958            rest_of_line.push(ch);
3959            if !ch.is_whitespace() {
3960                saw_non_whitespace_tail = true;
3961            }
3962            if ch == '\\' && !in_single_quote {
3963                consecutive_backslashes += 1;
3964            } else {
3965                consecutive_backslashes = 0;
3966            }
3967            previous_tail_char = Some(ch);
3968        }
3969
3970        // If we just drained a heredoc replay buffer (for example when multiple
3971        // heredocs share one command line), resume tracking from the true cursor
3972        // position before we measure the body span.
3973        self.sync_offset_to_cursor();
3974        let content_start = self.current_position();
3975        let mut current_line_start = content_start;
3976        let content_end;
3977
3978        // Read lines until we find the delimiter
3979        loop {
3980            if self.reinject_buf.is_empty() {
3981                // When the body reading drains a reinject buffer (from a
3982                // previous heredoc on the same command line), the virtual
3983                // offset drifts away from the cursor. Snap it back before
3984                // any source-based work so spans and `post_heredoc_offset`
3985                // stay within bounds.
3986                self.sync_offset_to_cursor();
3987                let rest = self.cursor.rest();
3988                if rest.is_empty() {
3989                    content_end = self.current_position();
3990                    break;
3991                }
3992
3993                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3994                let line = &rest[..line_len];
3995                let has_newline = line_len < rest.len();
3996
3997                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3998                    content_end = current_line_start;
3999                    self.consume_source_bytes(line_len);
4000                    if has_newline {
4001                        self.consume_ascii_chars(1);
4002                    }
4003                    break;
4004                }
4005
4006                content.push_str(line);
4007                self.consume_source_bytes(line_len);
4008
4009                if has_newline {
4010                    self.consume_ascii_chars(1);
4011                    content.push('\n');
4012                    current_line_start = self.current_position();
4013                    continue;
4014                }
4015
4016                content_end = self.current_position();
4017                break;
4018            }
4019
4020            match self.peek_char() {
4021                Some('\n') => {
4022                    self.advance();
4023                    // Check if current line matches delimiter
4024                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4025                        content_end = current_line_start;
4026                        break;
4027                    }
4028                    content.push_str(&current_line);
4029                    content.push('\n');
4030                    current_line.clear();
4031                    current_line_start = self.current_position();
4032                }
4033                Some(ch) => {
4034                    current_line.push(ch);
4035                    self.advance();
4036                }
4037                None => {
4038                    // End of input - check last line
4039                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4040                        content_end = current_line_start;
4041                        break;
4042                    }
4043                    if !current_line.is_empty() {
4044                        content.push_str(&current_line);
4045                    }
4046                    content_end = self.current_position();
4047                    break;
4048                }
4049            }
4050        }
4051
4052        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4053        // redirects, command words, additional heredocs) stay visible to the
4054        // parser. Always replay a terminating newline so parsing stops before
4055        // tokens that originally lived on later source lines, like `}` or `do`.
4056        let post_heredoc_offset = self.offset;
4057        self.offset = rest_of_line_start.offset;
4058        for ch in rest_of_line.chars() {
4059            self.reinject_buf.push_back(ch);
4060        }
4061        self.reinject_buf.push_back('\n');
4062        self.reinject_resume_offset = Some(post_heredoc_offset);
4063
4064        HeredocRead {
4065            content,
4066            content_span: Span::from_positions(content_start, content_end),
4067        }
4068    }
4069
4070    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4071        let mut chars = self.cursor.rest().chars();
4072        if chars.next() != Some('\n') {
4073            return false;
4074        }
4075
4076        for ch in chars {
4077            if matches!(ch, ' ' | '\t') {
4078                continue;
4079            }
4080            if ch == '\n' {
4081                return false;
4082            }
4083            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4084                || (ch == '#' && self.comments_enabled());
4085        }
4086
4087        false
4088    }
4089}
4090
4091fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4092    let line = if strip_tabs {
4093        line.trim_start_matches('\t')
4094    } else {
4095        line
4096    };
4097
4098    if line == delimiter {
4099        return true;
4100    }
4101
4102    let Some(trailing) = line.strip_prefix(delimiter) else {
4103        return false;
4104    };
4105
4106    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4107}
4108
4109fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4110    previous_tail_char.is_none_or(|prev| {
4111        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4112    })
4113}
4114
4115fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4116    let ch = input.get(index..)?.chars().next()?;
4117    Some((ch, index + ch.len_utf8()))
4118}
4119
4120fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4121    let mut index = 0usize;
4122    let mut depth = 0usize;
4123    let mut in_single = false;
4124    let mut in_double = false;
4125    let mut in_backtick = false;
4126    let mut escaped = false;
4127
4128    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4129        let was_escaped = escaped;
4130        if ch == '\\' && !in_single {
4131            escaped = !escaped;
4132            index = next_index;
4133            continue;
4134        }
4135        escaped = false;
4136
4137        match ch {
4138            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4139            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4140            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4141            '(' if !in_single
4142                && !in_double
4143                && !in_backtick
4144                && !was_escaped
4145                && prefix[next_index..].starts_with('(') =>
4146            {
4147                depth += 1;
4148                index = next_index + '('.len_utf8();
4149                continue;
4150            }
4151            ')' if !in_single
4152                && !in_double
4153                && !in_backtick
4154                && !was_escaped
4155                && prefix[next_index..].starts_with(')') =>
4156            {
4157                depth = depth.saturating_sub(1);
4158                index = next_index + ')'.len_utf8();
4159                continue;
4160            }
4161            _ => {}
4162        }
4163
4164        index = next_index;
4165    }
4166
4167    depth > 0
4168}
4169
4170fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4171    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4172    let prefix = &input[line_start..index];
4173    line_has_unclosed_double_paren(prefix)
4174}
4175
4176fn hash_starts_comment(input: &str, index: usize) -> bool {
4177    if inside_unclosed_double_paren_on_line(input, index) {
4178        return false;
4179    }
4180
4181    let next = &input[index + '#'.len_utf8()..];
4182    input[..index]
4183        .chars()
4184        .next_back()
4185        .is_none_or(|prev| match prev {
4186            '(' => {
4187                let whitespace_index = next.find(char::is_whitespace);
4188                let close_index = next.find(')');
4189
4190                match (whitespace_index, close_index) {
4191                    (Some(whitespace), Some(close)) => whitespace < close,
4192                    (Some(_), None) | (None, None) => true,
4193                    (None, Some(_)) => false,
4194                }
4195            }
4196            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4197        })
4198}
4199
4200fn heredoc_delimiter_is_terminator(
4201    ch: char,
4202    in_single: bool,
4203    in_double: bool,
4204    escaped: bool,
4205) -> bool {
4206    !in_single
4207        && !in_double
4208        && !escaped
4209        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4210}
4211
4212fn scan_double_quoted_command_substitution_segment(
4213    input: &str,
4214    mut index: usize,
4215    subst_depth: usize,
4216) -> Option<usize> {
4217    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4218        match ch {
4219            '"' => return Some(next_index),
4220            '\\' => {
4221                index = next_index;
4222                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4223                    index = escaped_next;
4224                }
4225            }
4226            '$' if input[next_index..].starts_with('{') => {
4227                let consumed = scan_command_subst_parameter_expansion_len(
4228                    &input[next_index + '{'.len_utf8()..],
4229                    subst_depth,
4230                    0,
4231                )?;
4232                index = next_index + '{'.len_utf8() + consumed;
4233            }
4234            '$' if input[next_index..].starts_with('(')
4235                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4236            {
4237                let consumed = scan_command_substitution_body_len_inner(
4238                    &input[next_index + '('.len_utf8()..],
4239                    subst_depth + 1,
4240                )?;
4241                index = next_index + '('.len_utf8() + consumed;
4242            }
4243            _ => index = next_index,
4244        }
4245    }
4246
4247    None
4248}
4249
4250fn scan_command_subst_parameter_expansion_len(
4251    input: &str,
4252    subst_depth: usize,
4253    parameter_depth: usize,
4254) -> Option<usize> {
4255    if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4256        return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4257    }
4258
4259    let mut index = 0usize;
4260    let mut in_single = false;
4261    let mut in_double = false;
4262    let mut in_ansi_c_single = false;
4263    let mut in_backtick = false;
4264    let mut escaped = false;
4265    let mut ansi_c_quote_pending = false;
4266
4267    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4268        let was_escaped = escaped;
4269        if ch == '\\' && !in_single {
4270            escaped = !escaped;
4271            index = next_index;
4272            ansi_c_quote_pending = false;
4273            continue;
4274        }
4275        escaped = false;
4276
4277        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4278            if input[next_index..].starts_with('{')
4279                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4280                    &input[next_index + '{'.len_utf8()..],
4281                    subst_depth,
4282                    parameter_depth + 1,
4283                )
4284            {
4285                index = next_index + '{'.len_utf8() + consumed;
4286                ansi_c_quote_pending = false;
4287                continue;
4288            }
4289
4290            if input[next_index..].starts_with('(')
4291                && !input[next_index + '('.len_utf8()..].starts_with('(')
4292                && let Some(consumed) = scan_command_substitution_body_len_inner(
4293                    &input[next_index + '('.len_utf8()..],
4294                    subst_depth + 1,
4295                )
4296            {
4297                index = next_index + '('.len_utf8() + consumed;
4298                ansi_c_quote_pending = false;
4299                continue;
4300            }
4301        }
4302
4303        if !in_single
4304            && !in_ansi_c_single
4305            && !in_double
4306            && !in_backtick
4307            && !was_escaped
4308            && matches!(ch, '<' | '>')
4309            && input[next_index..].starts_with('(')
4310            && let Some(consumed) = scan_command_substitution_body_len_inner(
4311                &input[next_index + '('.len_utf8()..],
4312                subst_depth + 1,
4313            )
4314        {
4315            index = next_index + '('.len_utf8() + consumed;
4316            ansi_c_quote_pending = false;
4317            continue;
4318        }
4319
4320        match ch {
4321            '\'' if !in_double && !in_backtick && !was_escaped => {
4322                if in_ansi_c_single {
4323                    in_ansi_c_single = false;
4324                } else if !in_single && ansi_c_quote_pending {
4325                    in_ansi_c_single = true;
4326                } else {
4327                    in_single = !in_single;
4328                }
4329            }
4330            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4331                in_double = !in_double
4332            }
4333            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4334                in_backtick = !in_backtick
4335            }
4336            '}' if !in_single
4337                && !in_ansi_c_single
4338                && !in_double
4339                && !in_backtick
4340                && !was_escaped =>
4341            {
4342                return Some(next_index);
4343            }
4344            _ => {}
4345        }
4346
4347        ansi_c_quote_pending = ch == '$'
4348            && !in_single
4349            && !in_ansi_c_single
4350            && !in_double
4351            && !in_backtick
4352            && !was_escaped;
4353        index = next_index;
4354    }
4355
4356    None
4357}
4358
4359fn scan_command_subst_parameter_expansion_len_balanced(
4360    input: &str,
4361    subst_depth: usize,
4362) -> Option<usize> {
4363    let mut index = 0usize;
4364    let mut brace_depth = 1usize;
4365    let mut in_single = false;
4366    let mut in_double = false;
4367    let mut in_ansi_c_single = false;
4368    let mut in_backtick = false;
4369    let mut escaped = false;
4370    let mut ansi_c_quote_pending = false;
4371
4372    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4373        let was_escaped = escaped;
4374        if ch == '\\' && !in_single {
4375            escaped = !escaped;
4376            index = next_index;
4377            ansi_c_quote_pending = false;
4378            continue;
4379        }
4380        escaped = false;
4381
4382        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4383            if input[next_index..].starts_with('{') {
4384                brace_depth = brace_depth.saturating_add(1);
4385                index = next_index + '{'.len_utf8();
4386                ansi_c_quote_pending = false;
4387                continue;
4388            }
4389
4390            if input[next_index..].starts_with('(')
4391                && !input[next_index + '('.len_utf8()..].starts_with('(')
4392                && let Some(consumed) = scan_command_substitution_body_len_inner(
4393                    &input[next_index + '('.len_utf8()..],
4394                    subst_depth + 1,
4395                )
4396            {
4397                index = next_index + '('.len_utf8() + consumed;
4398                ansi_c_quote_pending = false;
4399                continue;
4400            }
4401        }
4402
4403        if !in_single
4404            && !in_ansi_c_single
4405            && !in_double
4406            && !in_backtick
4407            && !was_escaped
4408            && matches!(ch, '<' | '>')
4409            && input[next_index..].starts_with('(')
4410            && let Some(consumed) = scan_command_substitution_body_len_inner(
4411                &input[next_index + '('.len_utf8()..],
4412                subst_depth + 1,
4413            )
4414        {
4415            index = next_index + '('.len_utf8() + consumed;
4416            ansi_c_quote_pending = false;
4417            continue;
4418        }
4419
4420        match ch {
4421            '\'' if !in_double && !in_backtick && !was_escaped => {
4422                if in_ansi_c_single {
4423                    in_ansi_c_single = false;
4424                } else if !in_single && ansi_c_quote_pending {
4425                    in_ansi_c_single = true;
4426                } else {
4427                    in_single = !in_single;
4428                }
4429            }
4430            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4431                in_double = !in_double
4432            }
4433            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4434                in_backtick = !in_backtick
4435            }
4436            '}' if !in_single
4437                && !in_ansi_c_single
4438                && !in_double
4439                && !in_backtick
4440                && !was_escaped =>
4441            {
4442                brace_depth = brace_depth.saturating_sub(1);
4443                if brace_depth == 0 {
4444                    return Some(next_index);
4445                }
4446            }
4447            _ => {}
4448        }
4449
4450        ansi_c_quote_pending = ch == '$'
4451            && !in_single
4452            && !in_ansi_c_single
4453            && !in_double
4454            && !in_backtick
4455            && !was_escaped;
4456        index = next_index;
4457    }
4458
4459    None
4460}
4461
4462fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4463    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4464        if !matches!(ch, ' ' | '\t') {
4465            break;
4466        }
4467        index = next_index;
4468    }
4469
4470    let start = index;
4471    let mut cooked = String::new();
4472    let mut in_single = false;
4473    let mut in_double = false;
4474    let mut escaped = false;
4475
4476    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4477        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4478            break;
4479        }
4480
4481        index = next_index;
4482        if escaped {
4483            cooked.push(ch);
4484            escaped = false;
4485            continue;
4486        }
4487
4488        match ch {
4489            '\\' if !in_single => escaped = true,
4490            '\'' if !in_double => in_single = !in_single,
4491            '"' if !in_single => in_double = !in_double,
4492            _ => cooked.push(ch),
4493        }
4494    }
4495
4496    (index > start).then_some((index, cooked))
4497}
4498
4499fn skip_command_subst_pending_heredoc(
4500    input: &str,
4501    mut index: usize,
4502    delimiter: &str,
4503    strip_tabs: bool,
4504) -> usize {
4505    while index <= input.len() {
4506        let rest = &input[index..];
4507        let line_len = rest.find('\n').unwrap_or(rest.len());
4508        let line = &rest[..line_len];
4509        let has_newline = line_len < rest.len();
4510
4511        index += line_len;
4512        if has_newline {
4513            index += '\n'.len_utf8();
4514        }
4515
4516        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4517            return index;
4518        }
4519    }
4520
4521    index
4522}
4523
4524fn scan_command_subst_ansi_c_single_quoted_segment(
4525    input: &str,
4526    quote_index: usize,
4527) -> Option<usize> {
4528    let mut index = quote_index + '\''.len_utf8();
4529
4530    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4531        index = next_index;
4532        if ch == '\\' {
4533            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4534                index = escaped_next;
4535            }
4536            continue;
4537        }
4538
4539        if ch == '\'' {
4540            return Some(index);
4541        }
4542    }
4543
4544    None
4545}
4546
4547fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4548    let mut index = start;
4549
4550    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4551        index = next_index;
4552        if ch == '\\' {
4553            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4554                index = escaped_next;
4555            }
4556            continue;
4557        }
4558
4559        if ch == '`' {
4560            return Some(index);
4561        }
4562    }
4563
4564    None
4565}
4566
4567fn flush_scanned_command_subst_keyword(
4568    current_word: &mut String,
4569    pending_case_headers: &mut usize,
4570    case_clause_depths: &mut SmallVec<[usize; 4]>,
4571    depth: usize,
4572    word_started_at_command_start: &mut bool,
4573) {
4574    if current_word.is_empty() {
4575        *word_started_at_command_start = false;
4576        return;
4577    }
4578
4579    match current_word.as_str() {
4580        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4581        "in" if *pending_case_headers > 0 => {
4582            *pending_case_headers -= 1;
4583            case_clause_depths.push(depth);
4584        }
4585        "esac" if *word_started_at_command_start => {
4586            case_clause_depths.pop();
4587        }
4588        _ => {}
4589    }
4590
4591    current_word.clear();
4592    *word_started_at_command_start = false;
4593}
4594
4595pub(super) fn scan_command_substitution_body_len_inner(
4596    input: &str,
4597    subst_depth: usize,
4598) -> Option<usize> {
4599    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4600        return None;
4601    }
4602
4603    let mut index = 0usize;
4604    let mut depth = 1;
4605    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4606    let mut pending_case_headers = 0usize;
4607    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4608    let mut current_word = String::with_capacity(16);
4609    let mut at_command_start = true;
4610    let mut expecting_redirection_target = false;
4611    let mut current_word_started_at_command_start = false;
4612
4613    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4614        match ch {
4615            '#' if hash_starts_comment(input, index) => {
4616                let had_word = !current_word.is_empty();
4617                flush_scanned_command_subst_keyword(
4618                    &mut current_word,
4619                    &mut pending_case_headers,
4620                    &mut case_clause_depths,
4621                    depth,
4622                    &mut current_word_started_at_command_start,
4623                );
4624                if had_word && expecting_redirection_target {
4625                    expecting_redirection_target = false;
4626                }
4627                index = next_index;
4628                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4629                    index = comment_next;
4630                    if comment_ch == '\n' {
4631                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4632                            index = skip_command_subst_pending_heredoc(
4633                                input, index, &delimiter, strip_tabs,
4634                            );
4635                        }
4636                        at_command_start = true;
4637                        expecting_redirection_target = false;
4638                        break;
4639                    }
4640                }
4641            }
4642            '(' => {
4643                flush_scanned_command_subst_keyword(
4644                    &mut current_word,
4645                    &mut pending_case_headers,
4646                    &mut case_clause_depths,
4647                    depth,
4648                    &mut current_word_started_at_command_start,
4649                );
4650                depth += 1;
4651                index = next_index;
4652                at_command_start = true;
4653                expecting_redirection_target = false;
4654            }
4655            ')' => {
4656                flush_scanned_command_subst_keyword(
4657                    &mut current_word,
4658                    &mut pending_case_headers,
4659                    &mut case_clause_depths,
4660                    depth,
4661                    &mut current_word_started_at_command_start,
4662                );
4663                if case_clause_depths
4664                    .last()
4665                    .is_some_and(|case_depth| *case_depth == depth)
4666                {
4667                    index = next_index;
4668                    at_command_start = true;
4669                    expecting_redirection_target = false;
4670                    continue;
4671                }
4672                depth -= 1;
4673                index = next_index;
4674                if depth == 0 {
4675                    return Some(index);
4676                }
4677                at_command_start = false;
4678                expecting_redirection_target = false;
4679            }
4680            '"' => {
4681                let had_word = !current_word.is_empty();
4682                flush_scanned_command_subst_keyword(
4683                    &mut current_word,
4684                    &mut pending_case_headers,
4685                    &mut case_clause_depths,
4686                    depth,
4687                    &mut current_word_started_at_command_start,
4688                );
4689                if had_word && expecting_redirection_target {
4690                    expecting_redirection_target = false;
4691                }
4692                index = scan_double_quoted_command_substitution_segment(
4693                    input,
4694                    next_index,
4695                    subst_depth,
4696                )?;
4697                if expecting_redirection_target {
4698                    expecting_redirection_target = false;
4699                } else {
4700                    at_command_start = false;
4701                }
4702            }
4703            '\'' => {
4704                let had_word = !current_word.is_empty();
4705                flush_scanned_command_subst_keyword(
4706                    &mut current_word,
4707                    &mut pending_case_headers,
4708                    &mut case_clause_depths,
4709                    depth,
4710                    &mut current_word_started_at_command_start,
4711                );
4712                if had_word && expecting_redirection_target {
4713                    expecting_redirection_target = false;
4714                }
4715                index = next_index;
4716                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4717                    index = quoted_next;
4718                    if quoted_ch == '\'' {
4719                        break;
4720                    }
4721                }
4722                if expecting_redirection_target {
4723                    expecting_redirection_target = false;
4724                } else {
4725                    at_command_start = false;
4726                }
4727            }
4728            '`' => {
4729                let had_word = !current_word.is_empty();
4730                flush_scanned_command_subst_keyword(
4731                    &mut current_word,
4732                    &mut pending_case_headers,
4733                    &mut case_clause_depths,
4734                    depth,
4735                    &mut current_word_started_at_command_start,
4736                );
4737                if had_word && expecting_redirection_target {
4738                    expecting_redirection_target = false;
4739                }
4740                index = scan_command_subst_backtick_segment(input, next_index)?;
4741                if expecting_redirection_target {
4742                    expecting_redirection_target = false;
4743                } else {
4744                    at_command_start = false;
4745                }
4746            }
4747            '$' if input[next_index..].starts_with('\'') => {
4748                let had_word = !current_word.is_empty();
4749                flush_scanned_command_subst_keyword(
4750                    &mut current_word,
4751                    &mut pending_case_headers,
4752                    &mut case_clause_depths,
4753                    depth,
4754                    &mut current_word_started_at_command_start,
4755                );
4756                if had_word && expecting_redirection_target {
4757                    expecting_redirection_target = false;
4758                }
4759                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4760                if expecting_redirection_target {
4761                    expecting_redirection_target = false;
4762                } else {
4763                    at_command_start = false;
4764                }
4765            }
4766            '\\' => {
4767                let had_word = !current_word.is_empty();
4768                flush_scanned_command_subst_keyword(
4769                    &mut current_word,
4770                    &mut pending_case_headers,
4771                    &mut case_clause_depths,
4772                    depth,
4773                    &mut current_word_started_at_command_start,
4774                );
4775                if had_word && expecting_redirection_target {
4776                    expecting_redirection_target = false;
4777                }
4778                index = next_index;
4779                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4780                    index = escaped_next;
4781                }
4782                if expecting_redirection_target {
4783                    expecting_redirection_target = false;
4784                } else {
4785                    at_command_start = false;
4786                }
4787            }
4788            '>' => {
4789                let word_was_redirection_fd = current_word_started_at_command_start
4790                    && !current_word.is_empty()
4791                    && current_word.chars().all(|current| current.is_ascii_digit());
4792                flush_scanned_command_subst_keyword(
4793                    &mut current_word,
4794                    &mut pending_case_headers,
4795                    &mut case_clause_depths,
4796                    depth,
4797                    &mut current_word_started_at_command_start,
4798                );
4799                if word_was_redirection_fd {
4800                    at_command_start = true;
4801                }
4802                index = next_index;
4803                expecting_redirection_target = true;
4804            }
4805            '<' if input[next_index..].starts_with('<') => {
4806                let word_was_redirection_fd = current_word_started_at_command_start
4807                    && !current_word.is_empty()
4808                    && current_word.chars().all(|current| current.is_ascii_digit());
4809                let had_word = !current_word.is_empty();
4810                flush_scanned_command_subst_keyword(
4811                    &mut current_word,
4812                    &mut pending_case_headers,
4813                    &mut case_clause_depths,
4814                    depth,
4815                    &mut current_word_started_at_command_start,
4816                );
4817                if had_word && expecting_redirection_target {
4818                    expecting_redirection_target = false;
4819                }
4820                if word_was_redirection_fd {
4821                    at_command_start = true;
4822                }
4823                if inside_unclosed_double_paren_on_line(input, index) {
4824                    index = next_index + '<'.len_utf8();
4825                    continue;
4826                }
4827
4828                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4829                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4830                    expecting_redirection_target = true;
4831                    continue;
4832                }
4833
4834                let strip_tabs = input[next_index..].starts_with("<-");
4835                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4836                if let Some((delimiter_index, delimiter)) =
4837                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4838                {
4839                    pending_heredocs.push((delimiter, strip_tabs));
4840                    index = delimiter_index;
4841                    expecting_redirection_target = false;
4842                } else {
4843                    index = next_index;
4844                    expecting_redirection_target = true;
4845                }
4846            }
4847            '\n' => {
4848                flush_scanned_command_subst_keyword(
4849                    &mut current_word,
4850                    &mut pending_case_headers,
4851                    &mut case_clause_depths,
4852                    depth,
4853                    &mut current_word_started_at_command_start,
4854                );
4855                index = next_index;
4856                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4857                    index =
4858                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4859                }
4860                at_command_start = true;
4861                expecting_redirection_target = false;
4862            }
4863            '$' if input[next_index..].starts_with('{') => {
4864                let had_word = !current_word.is_empty();
4865                flush_scanned_command_subst_keyword(
4866                    &mut current_word,
4867                    &mut pending_case_headers,
4868                    &mut case_clause_depths,
4869                    depth,
4870                    &mut current_word_started_at_command_start,
4871                );
4872                if had_word && expecting_redirection_target {
4873                    expecting_redirection_target = false;
4874                }
4875                let consumed = scan_command_subst_parameter_expansion_len(
4876                    &input[next_index + '{'.len_utf8()..],
4877                    subst_depth,
4878                    0,
4879                )?;
4880                index = next_index + '{'.len_utf8() + consumed;
4881                if expecting_redirection_target {
4882                    expecting_redirection_target = false;
4883                } else {
4884                    at_command_start = false;
4885                }
4886            }
4887            '$' if input[next_index..].starts_with('(')
4888                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4889            {
4890                let had_word = !current_word.is_empty();
4891                flush_scanned_command_subst_keyword(
4892                    &mut current_word,
4893                    &mut pending_case_headers,
4894                    &mut case_clause_depths,
4895                    depth,
4896                    &mut current_word_started_at_command_start,
4897                );
4898                if had_word && expecting_redirection_target {
4899                    expecting_redirection_target = false;
4900                }
4901                let consumed = scan_command_substitution_body_len_inner(
4902                    &input[next_index + '('.len_utf8()..],
4903                    subst_depth + 1,
4904                )?;
4905                index = next_index + '('.len_utf8() + consumed;
4906                if expecting_redirection_target {
4907                    expecting_redirection_target = false;
4908                } else {
4909                    at_command_start = false;
4910                }
4911            }
4912            _ => {
4913                if ch.is_ascii_alphanumeric() || ch == '_' {
4914                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4915                    {
4916                        current_word_started_at_command_start = true;
4917                        at_command_start = false;
4918                    }
4919                    current_word.push(ch);
4920                } else {
4921                    let had_word = !current_word.is_empty();
4922                    flush_scanned_command_subst_keyword(
4923                        &mut current_word,
4924                        &mut pending_case_headers,
4925                        &mut case_clause_depths,
4926                        depth,
4927                        &mut current_word_started_at_command_start,
4928                    );
4929                    if had_word && expecting_redirection_target {
4930                        expecting_redirection_target = false;
4931                    }
4932                    match ch {
4933                        ' ' | '\t' => {}
4934                        ';' | '|' | '&' => {
4935                            at_command_start = true;
4936                            expecting_redirection_target = false;
4937                        }
4938                        _ => {
4939                            if !expecting_redirection_target {
4940                                at_command_start = false;
4941                            }
4942                        }
4943                    }
4944                }
4945                index = next_index;
4946            }
4947        }
4948    }
4949
4950    None
4951}
4952
4953pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4954    scan_command_substitution_body_len_inner(input, 0)
4955}
4956
4957#[cfg(test)]
4958mod tests {
4959    use super::*;
4960
4961    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4962        match token.kind {
4963            kind if kind.is_word_like() => token.word_string(),
4964            TokenKind::Comment => token
4965                .span
4966                .slice(source)
4967                .strip_prefix('#')
4968                .map(str::to_string),
4969            TokenKind::Error => token
4970                .error_kind()
4971                .map(LexerErrorKind::message)
4972                .map(str::to_string),
4973            _ => None,
4974        }
4975    }
4976
4977    fn assert_next_token(
4978        lexer: &mut Lexer<'_>,
4979        expected_kind: TokenKind,
4980        expected_text: Option<&str>,
4981    ) {
4982        let token = lexer.next_lexed_token().unwrap();
4983        assert_eq!(token.kind, expected_kind);
4984        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4985    }
4986
4987    fn assert_next_token_with_comments(
4988        lexer: &mut Lexer<'_>,
4989        expected_kind: TokenKind,
4990        expected_text: Option<&str>,
4991    ) {
4992        let token = lexer.next_lexed_token_with_comments().unwrap();
4993        assert_eq!(token.kind, expected_kind);
4994        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4995    }
4996
4997    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4998        let mut lexer = Lexer::new(input);
4999
5000        while let Some(token) = lexer.next_lexed_token() {
5001            if token.kind == TokenKind::Newline {
5002                continue;
5003            }
5004
5005            assert_eq!(
5006                token.span.start.line, token.span.end.line,
5007                "token should stay on one line: {:?}",
5008                token
5009            );
5010        }
5011    }
5012
5013    #[test]
5014    fn test_simple_words() {
5015        let mut lexer = Lexer::new("echo hello world");
5016
5017        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5018        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5019        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5020        assert!(lexer.next_lexed_token().is_none());
5021    }
5022
5023    #[test]
5024    fn test_single_quoted_string() {
5025        let mut lexer = Lexer::new("echo 'hello world'");
5026
5027        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5028        // Single-quoted strings return LiteralWord (no variable expansion)
5029        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5030        assert!(lexer.next_lexed_token().is_none());
5031    }
5032
5033    #[test]
5034    fn test_double_quoted_string() {
5035        let mut lexer = Lexer::new("echo \"hello world\"");
5036
5037        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5038        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5039        assert!(lexer.next_lexed_token().is_none());
5040    }
5041
5042    #[test]
5043    fn test_brace_expansion_token_ignores_quoted_closers() {
5044        let mut lexer = Lexer::new("echo {\"}\",a}\n");
5045
5046        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5047        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5048        assert_next_token(&mut lexer, TokenKind::Newline, None);
5049        assert!(lexer.next_lexed_token().is_none());
5050    }
5051
5052    #[test]
5053    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5054        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5055
5056        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5057        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5058        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5059        assert_next_token(&mut lexer, TokenKind::Newline, None);
5060        assert!(lexer.next_lexed_token().is_none());
5061    }
5062
5063    #[test]
5064    fn test_double_quoted_expansion_token_keeps_source_backing() {
5065        let source = r#""$bar""#;
5066        let mut lexer = Lexer::new(source);
5067
5068        let token = lexer.next_lexed_token().unwrap();
5069        assert_eq!(token.kind, TokenKind::QuotedWord);
5070        assert_eq!(token.word_text(), Some("$bar"));
5071
5072        let word = token.word().unwrap();
5073        let segment = word.single_segment().unwrap();
5074        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5075        assert_eq!(segment.span().unwrap().slice(source), "$bar");
5076    }
5077
5078    #[test]
5079    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5080        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5081        let mut lexer = Lexer::new(source);
5082
5083        let token = lexer.next_lexed_token().unwrap();
5084        assert_eq!(token.kind, TokenKind::QuotedWord);
5085        assert_eq!(
5086            token.word_text(),
5087            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5088        );
5089    }
5090
5091    #[test]
5092    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5093        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5094        let mut lexer = Lexer::new(source);
5095
5096        let token = lexer.next_lexed_token().unwrap();
5097        assert_eq!(token.kind, TokenKind::QuotedWord);
5098        assert_eq!(
5099            token.word_text(),
5100            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5101        );
5102    }
5103
5104    #[test]
5105    fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5106        let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5107        let mut lexer = Lexer::new(source);
5108
5109        let token = lexer.next_lexed_token().unwrap();
5110        assert_eq!(token.kind, TokenKind::QuotedWord);
5111        assert_eq!(
5112            token.word_text(),
5113            Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5114        );
5115    }
5116
5117    #[test]
5118    fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5119        let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5120        let mut lexer = Lexer::new(source);
5121
5122        let token = lexer.next_lexed_token().unwrap();
5123        assert_eq!(token.kind, TokenKind::QuotedWord);
5124        assert_eq!(
5125            token.word_text(),
5126            Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5127        );
5128    }
5129
5130    #[test]
5131    fn test_mixed_word_keeps_segment_kinds() {
5132        let source = r#"foo"bar"'baz'"#;
5133        let mut lexer = Lexer::new(source);
5134
5135        let token = lexer.next_lexed_token().unwrap();
5136        assert_eq!(token.kind, TokenKind::Word);
5137
5138        let word = token.word().unwrap();
5139        let segments: Vec<_> = word
5140            .segments()
5141            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5142            .collect();
5143
5144        assert_eq!(
5145            segments,
5146            vec![
5147                (LexedWordSegmentKind::Plain, "foo".to_string()),
5148                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5149                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5150            ]
5151        );
5152        assert_eq!(word.joined_text(), "foobarbaz");
5153        assert_eq!(
5154            word.segments()
5155                .next()
5156                .and_then(LexedWordSegment::span)
5157                .unwrap()
5158                .slice(source),
5159            "foo"
5160        );
5161    }
5162
5163    #[test]
5164    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5165        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5166
5167        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5168        let body = &source[..consumed];
5169
5170        assert!(body.contains("field, direction"));
5171        assert!(body.ends_with(')'));
5172    }
5173
5174    #[test]
5175    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5176        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5177
5178        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5179        let body = &source[..consumed];
5180
5181        assert!(body.contains("printf '%s' y"));
5182        assert!(body.ends_with(')'));
5183    }
5184
5185    #[test]
5186    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5187        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5188
5189        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5190        let body = &source[..consumed];
5191
5192        assert!(body.contains("printf %s 1,2"));
5193        assert!(body.ends_with(')'));
5194    }
5195
5196    #[test]
5197    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5198        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5199
5200        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5201        let body = &source[..consumed];
5202
5203        assert!(body.contains("field, direction"));
5204        assert!(body.ends_with(')'));
5205    }
5206
5207    #[test]
5208    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5209        let source = "printf %s ${x//foo/)},1)\"";
5210
5211        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5212        let body = &source[..consumed];
5213
5214        assert!(body.contains("${x//foo/)},1"));
5215        assert!(body.ends_with(')'));
5216    }
5217
5218    #[test]
5219    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5220        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5221
5222        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5223        let body = &source[..consumed];
5224
5225        assert!(body.contains("printf %s 1,2"));
5226        assert!(body.ends_with(')'));
5227    }
5228
5229    #[test]
5230    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5231        let source = "[[ \"$buf\" == (#b)(*) ]]";
5232        let index = source.find('#').expect("expected hash");
5233
5234        assert!(!hash_starts_comment(source, index));
5235    }
5236
5237    #[test]
5238    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5239        let source = "(#comment with )";
5240        let index = source.find('#').expect("expected hash");
5241
5242        assert!(hash_starts_comment(source, index));
5243    }
5244
5245    #[test]
5246    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5247        let source = "(( #c < 256 ))";
5248        let index = source.find('#').expect("expected hash");
5249
5250        assert!(!hash_starts_comment(source, index));
5251    }
5252
5253    #[test]
5254    fn test_hash_starts_comment_respects_quoted_double_parens() {
5255        let source = "printf '((' # comment";
5256        let index = source.find('#').expect("expected hash");
5257
5258        assert!(hash_starts_comment(source, index));
5259    }
5260
5261    #[test]
5262    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5263        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5264
5265        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5266        let body = &source[..consumed];
5267
5268        assert!(body.contains("printf %s 1,2"));
5269        assert!(body.ends_with(')'));
5270    }
5271
5272    #[test]
5273    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5274        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5275
5276        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5277        let body = &source[..consumed];
5278
5279        assert!(body.contains("printf %s 1,2"));
5280        assert!(body.ends_with(')'));
5281    }
5282
5283    #[test]
5284    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5285        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5286
5287        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5288        let body = &source[..consumed];
5289
5290        assert!(body.contains("printf %s 1,2"));
5291        assert!(body.ends_with(')'));
5292    }
5293
5294    #[test]
5295    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5296        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5297
5298        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5299        let body = &source[..consumed];
5300
5301        assert!(body.contains("printf %s 1,2"));
5302        assert!(body.ends_with("))"));
5303    }
5304
5305    #[test]
5306    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5307        let source = "printf %s 1,2; echo case in)\"";
5308
5309        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5310        let body = &source[..consumed];
5311
5312        assert!(body.contains("echo case in"));
5313        assert!(body.ends_with(')'));
5314    }
5315
5316    #[test]
5317    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5318        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5319
5320        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5321        let body = &source[..consumed];
5322
5323        assert!(body.contains("$'a\\'b'"));
5324        assert!(body.contains("printf %s 1,2"));
5325        assert!(body.ends_with(')'));
5326    }
5327
5328    #[test]
5329    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5330        let source = "printf %s `echo foo)`; printf %s ok)\"";
5331
5332        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5333        let body = &source[..consumed];
5334
5335        assert!(body.contains("`echo foo)`"));
5336        assert!(body.contains("printf %s ok"));
5337        assert!(body.ends_with(')'));
5338    }
5339
5340    #[test]
5341    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5342        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5343
5344        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5345        let body = &source[..consumed];
5346
5347        assert!(body.contains("${x/`echo }`/foo)},1"));
5348        assert!(body.ends_with(')'));
5349    }
5350
5351    #[test]
5352    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5353     {
5354        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5355
5356        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5357        let body = &source[..consumed];
5358
5359        assert!(body.contains("${x/<(echo })/foo)},1"));
5360        assert!(body.ends_with(')'));
5361    }
5362
5363    #[test]
5364    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5365        let source = "printf %s 1,2; echo case in)";
5366
5367        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5368        let body = &source[..consumed];
5369
5370        assert_eq!(body, source);
5371    }
5372
5373    #[test]
5374    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5375        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5376
5377        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5378        let body = &source[..consumed];
5379
5380        assert_eq!(body, source);
5381    }
5382
5383    #[test]
5384    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5385        let source = "printf %s `echo foo)`; printf %s ok)";
5386
5387        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5388        let body = &source[..consumed];
5389
5390        assert_eq!(body, source);
5391    }
5392
5393    #[test]
5394    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5395        let source = "echo \"$line\" | cut -d' ' -f2-)";
5396
5397        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5398        let body = &source[..consumed];
5399
5400        assert_eq!(body, source);
5401    }
5402
5403    #[test]
5404    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5405        let source = "echo \"${@}\" | tr -d '[:space:]')";
5406
5407        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5408        let body = &source[..consumed];
5409
5410        assert_eq!(body, source);
5411    }
5412
5413    #[test]
5414    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5415        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5416
5417        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5418        let body = &source[..consumed];
5419
5420        assert_eq!(body, source);
5421    }
5422
5423    #[test]
5424    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5425        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5426
5427        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5428        let body = &source[..consumed];
5429
5430        assert_eq!(body, source);
5431    }
5432
5433    #[test]
5434    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5435        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5436        let mut lexer = Lexer::new(source);
5437
5438        let first = lexer.next_lexed_token().expect("expected first token");
5439        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5440        assert_eq!(first.word_string().as_deref(), Some("echo"));
5441
5442        let second = lexer.next_lexed_token().expect("expected second token");
5443        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5444        assert_eq!(
5445            second.word_string().as_deref(),
5446            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5447        );
5448    }
5449
5450    #[test]
5451    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5452        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5453        let start = source.find("$(").expect("expected command substitution") + 2;
5454        let consumed =
5455            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5456        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5457    }
5458
5459    #[test]
5460    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5461        let source = "echo $(echo $(basename $filename .fuzz))";
5462        let start = source.find("$(").expect("expected command substitution") + 2;
5463        let consumed =
5464            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5465        assert_eq!(
5466            &source[start..start + consumed],
5467            "echo $(basename $filename .fuzz))"
5468        );
5469    }
5470
5471    #[test]
5472    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5473        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5474        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5475        assert_eq!(consumed, source.len());
5476    }
5477
5478    #[test]
5479    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5480        let source = "'foo'bar";
5481        let mut lexer = Lexer::new(source);
5482
5483        let token = lexer.next_lexed_token().unwrap();
5484        assert_eq!(token.kind, TokenKind::LiteralWord);
5485
5486        let word = token.word().unwrap();
5487        let segments: Vec<_> = word
5488            .segments()
5489            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5490            .collect();
5491
5492        assert_eq!(
5493            segments,
5494            vec![
5495                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5496                (LexedWordSegmentKind::Plain, "bar".to_string()),
5497            ]
5498        );
5499        assert_eq!(word.joined_text(), "foobar");
5500        assert_eq!(
5501            word.segments()
5502                .nth(1)
5503                .and_then(LexedWordSegment::span)
5504                .unwrap()
5505                .slice(source),
5506            "bar"
5507        );
5508    }
5509
5510    #[test]
5511    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5512        let source = "$(printf hi)";
5513        let mut lexer = Lexer::new(source);
5514
5515        let token = lexer.next_lexed_token().unwrap();
5516        assert_eq!(token.kind, TokenKind::Word);
5517
5518        let word = token.word().unwrap();
5519        let segment = word.single_segment().unwrap();
5520        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5521        assert_eq!(segment.as_str(), source);
5522        assert_eq!(segment.span().unwrap().slice(source), source);
5523    }
5524
5525    #[test]
5526    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5527        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5528        let mut lexer = Lexer::new(source);
5529
5530        let token = lexer.next_lexed_token().unwrap();
5531        assert_eq!(token.kind, TokenKind::Word);
5532
5533        let word = token.word().unwrap();
5534        let segment = word.single_segment().unwrap();
5535        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5536        assert_eq!(segment.as_str(), source);
5537        assert_eq!(segment.span().unwrap().slice(source), source);
5538    }
5539
5540    #[test]
5541    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5542        let source = "\"foo\"$(printf hi)";
5543        let mut lexer = Lexer::new(source);
5544
5545        let token = lexer.next_lexed_token().unwrap();
5546        assert_eq!(token.kind, TokenKind::Word);
5547
5548        let word = token.word().unwrap();
5549        let continuation = word.segments().nth(1).unwrap();
5550        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5551        assert_eq!(continuation.as_str(), "$(printf hi)");
5552        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5553    }
5554
5555    #[test]
5556    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5557        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5558        let mut lexer = Lexer::new(source);
5559
5560        let token = lexer.next_lexed_token().unwrap();
5561        assert_eq!(token.kind, TokenKind::QuotedWord);
5562
5563        let word = token.word().unwrap();
5564        let segment = word.single_segment().unwrap();
5565        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5566        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5567        assert_eq!(
5568            segment.span().unwrap().slice(source),
5569            "${arr[$RANDOM % ${#arr[@]}]}"
5570        );
5571    }
5572
5573    #[test]
5574    fn test_ansi_c_control_escape_can_consume_quote() {
5575        let mut lexer = Lexer::new("echo $'\\c''");
5576
5577        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5578        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5579        assert!(lexer.next_lexed_token().is_none());
5580    }
5581
5582    #[test]
5583    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5584        let source = r#"out_line="${out_line//'"'/'\"'}"
5585"#;
5586        let mut lexer = Lexer::new(source);
5587
5588        assert_next_token(
5589            &mut lexer,
5590            TokenKind::Word,
5591            Some(r#"out_line=${out_line//'"'/'"'}"#),
5592        );
5593        assert_next_token(&mut lexer, TokenKind::Newline, None);
5594        assert!(lexer.next_lexed_token().is_none());
5595    }
5596
5597    #[test]
5598    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5599        let source = r#"out_line="${out_line//'"'/'\"'}"
5600echo "Error: Missing python3!"
5601cat << 'EOF' > "${pywrapper}"
5602import os
5603EOF
5604"#;
5605        let mut lexer = Lexer::new(source);
5606
5607        assert_next_token(
5608            &mut lexer,
5609            TokenKind::Word,
5610            Some(r#"out_line=${out_line//'"'/'"'}"#),
5611        );
5612        assert_next_token(&mut lexer, TokenKind::Newline, None);
5613        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5614        assert_next_token(
5615            &mut lexer,
5616            TokenKind::QuotedWord,
5617            Some("Error: Missing python3!"),
5618        );
5619        assert_next_token(&mut lexer, TokenKind::Newline, None);
5620        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5621        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5622        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5623        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5624        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5625    }
5626
5627    #[test]
5628    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5629        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5630        let mut lexer = Lexer::new(source);
5631
5632        let token = lexer.next_lexed_token().unwrap();
5633        assert_eq!(token.kind, TokenKind::Word);
5634        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5635        assert!(token.source_slice(source).is_none());
5636        assert_eq!(
5637            token.word_string().as_deref(),
5638            Some("crypt=${crypt//\\/\\\\}")
5639        );
5640        assert_next_token(&mut lexer, TokenKind::Newline, None);
5641        assert!(lexer.next_lexed_token().is_none());
5642    }
5643
5644    #[test]
5645    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5646        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5647        let mut lexer = Lexer::new(source);
5648
5649        assert_next_token(
5650            &mut lexer,
5651            TokenKind::Word,
5652            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5653        );
5654        assert_next_token(&mut lexer, TokenKind::Newline, None);
5655        assert_next_token(&mut lexer, TokenKind::Newline, None);
5656        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5657        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5658        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5659        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5660        assert_next_token(&mut lexer, TokenKind::Newline, None);
5661        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5662        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5663        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5664        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5665        assert_next_token(&mut lexer, TokenKind::Newline, None);
5666        assert_next_token(
5667            &mut lexer,
5668            TokenKind::Word,
5669            Some(
5670                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5671            ),
5672        );
5673        assert_next_token(&mut lexer, TokenKind::Newline, None);
5674        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5675        assert_next_token(&mut lexer, TokenKind::Newline, None);
5676        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5677        assert_next_token(&mut lexer, TokenKind::Newline, None);
5678        assert!(lexer.next_lexed_token().is_none());
5679    }
5680
5681    #[test]
5682    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5683        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5684        let mut lexer = Lexer::new(source);
5685
5686        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5687        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5688        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5689        assert_next_token(&mut lexer, TokenKind::Newline, None);
5690        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5691        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5692        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5693        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5694        assert_next_token(&mut lexer, TokenKind::Newline, None);
5695        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5696        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5697        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5698        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5699        assert_next_token(&mut lexer, TokenKind::Newline, None);
5700        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5701        assert_next_token(&mut lexer, TokenKind::Newline, None);
5702        assert!(lexer.next_lexed_token().is_none());
5703    }
5704
5705    #[test]
5706    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5707        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5708        let mut lexer = Lexer::new(source);
5709
5710        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5711        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5712        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5713        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5714        assert_next_token(&mut lexer, TokenKind::And, None);
5715        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5716        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5717        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5718        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5719        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5720        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5721        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5722        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5723        assert_next_token(&mut lexer, TokenKind::Newline, None);
5724        assert!(lexer.next_lexed_token().is_none());
5725    }
5726
5727    #[test]
5728    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5729        let source = "echo -{$(echo a),b}-\n";
5730        let mut lexer = Lexer::new(source);
5731
5732        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5733        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5734        assert_next_token(&mut lexer, TokenKind::Newline, None);
5735        assert!(lexer.next_lexed_token().is_none());
5736    }
5737
5738    #[test]
5739    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5740        let source = "echo -{$((1 + 2)),b}-\n";
5741        let mut lexer = Lexer::new(source);
5742
5743        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5744        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5745        assert_next_token(&mut lexer, TokenKind::Newline, None);
5746        assert!(lexer.next_lexed_token().is_none());
5747    }
5748
5749    #[test]
5750    fn test_operators() {
5751        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5752
5753        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5754        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5755        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5756        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5757        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5758        assert_next_token(&mut lexer, TokenKind::And, None);
5759        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5760        assert_next_token(&mut lexer, TokenKind::Or, None);
5761        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5762        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5763        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5764        assert_next_token(&mut lexer, TokenKind::Background, None);
5765        assert!(lexer.next_lexed_token().is_none());
5766    }
5767
5768    #[test]
5769    fn test_double_left_bracket_requires_separator() {
5770        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5771
5772        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5773        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5774        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5775        assert_next_token(&mut lexer, TokenKind::Newline, None);
5776        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5777        assert_next_token(&mut lexer, TokenKind::Newline, None);
5778        assert!(lexer.next_lexed_token().is_none());
5779    }
5780
5781    #[test]
5782    fn test_redirects() {
5783        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5784
5785        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5786        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5787        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5788        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5789        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5790        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5791        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5792        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5793        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5794        let token = lexer.next_lexed_token().unwrap();
5795        assert_eq!(token.kind, TokenKind::Clobber);
5796        assert_eq!(token.fd_value(), Some(2));
5797        assert_eq!(token_text(&token, lexer.input), None);
5798        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5799        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5800        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5801        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5802        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5803        assert_next_token(&mut lexer, TokenKind::HereString, None);
5804        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5805        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5806        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5807        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5808        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5809    }
5810
5811    #[test]
5812    fn test_comment() {
5813        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5814
5815        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5816        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5817        assert_next_token(&mut lexer, TokenKind::Newline, None);
5818        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5819        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5820    }
5821
5822    #[test]
5823    fn test_comment_token_with_span() {
5824        let mut lexer = Lexer::new("# lead\necho hi # tail");
5825
5826        let comment = lexer.next_lexed_token_with_comments().unwrap();
5827        assert_eq!(comment.kind, TokenKind::Comment);
5828        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5829        assert_eq!(comment.span.start.line, 1);
5830        assert_eq!(comment.span.start.column, 1);
5831        assert_eq!(comment.span.end.line, 1);
5832        assert_eq!(comment.span.end.column, 7);
5833
5834        assert_next_token(&mut lexer, TokenKind::Newline, None);
5835        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5836        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5837
5838        let inline = lexer.next_lexed_token_with_comments().unwrap();
5839        assert_eq!(inline.kind, TokenKind::Comment);
5840        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5841        assert_eq!(inline.span.start.line, 2);
5842        assert_eq!(inline.span.start.column, 9);
5843    }
5844
5845    #[test]
5846    fn test_comment_token_preserves_hash_boundaries() {
5847        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5848
5849        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5850        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5851        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5852        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5853        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5854        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5855        assert!(lexer.next_lexed_token_with_comments().is_none());
5856    }
5857
5858    #[test]
5859    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5860        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5861
5862        let mut saw_comment = false;
5863        while let Some(token) = lexer.next_lexed_token_with_comments() {
5864            if token.kind == TokenKind::Comment {
5865                saw_comment = true;
5866                break;
5867            }
5868        }
5869
5870        assert!(
5871            !saw_comment,
5872            "zsh inline glob controls inside [[ ]] should not lex as comments"
5873        );
5874    }
5875
5876    #[test]
5877    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5878        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5879
5880        let mut saw_comment = false;
5881        while let Some(token) = lexer.next_lexed_token_with_comments() {
5882            if token.kind == TokenKind::Comment {
5883                saw_comment = true;
5884                break;
5885            }
5886        }
5887
5888        assert!(
5889            !saw_comment,
5890            "zsh arithmetic char literals inside (( )) should not lex as comments"
5891        );
5892    }
5893
5894    #[test]
5895    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5896        let mut lexer = Lexer::new(
5897            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5898        );
5899
5900        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5901        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5902        assert_next_token(
5903            &mut lexer,
5904            TokenKind::LiteralWord,
5905            Some("\\e]133;C;cmdline_url=%s\\a"),
5906        );
5907        assert_next_token(
5908            &mut lexer,
5909            TokenKind::QuotedWord,
5910            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5911        );
5912        assert_next_token(&mut lexer, TokenKind::Newline, None);
5913    }
5914
5915    #[test]
5916    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5917        let mut lexer = Lexer::new(
5918            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5919        );
5920
5921        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5922        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5923        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5924        assert_next_token(&mut lexer, TokenKind::Newline, None);
5925        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5926        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5927        assert_next_token(
5928            &mut lexer,
5929            TokenKind::LiteralWord,
5930            Some("\\e]133;C;cmdline_url=%s\\a"),
5931        );
5932        assert_next_token(
5933            &mut lexer,
5934            TokenKind::QuotedWord,
5935            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5936        );
5937        assert_next_token(&mut lexer, TokenKind::Newline, None);
5938        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5939        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5940        assert_next_token(&mut lexer, TokenKind::Newline, None);
5941    }
5942
5943    #[test]
5944    fn test_variable_words() {
5945        let mut lexer = Lexer::new("echo $HOME $USER");
5946
5947        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5948        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5949        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5950        assert!(lexer.next_lexed_token().is_none());
5951    }
5952
5953    #[test]
5954    fn test_pipeline_tokens() {
5955        let mut lexer = Lexer::new("echo hello | cat");
5956
5957        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5958        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5959        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5960        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5961        assert!(lexer.next_lexed_token().is_none());
5962    }
5963
5964    #[test]
5965    fn test_read_heredoc() {
5966        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5967        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5968        let content = lexer.read_heredoc("EOF", false);
5969        assert_eq!(content.content, "hello\nworld\n");
5970    }
5971
5972    #[test]
5973    fn test_read_heredoc_single_line() {
5974        let mut lexer = Lexer::new("\ntest\nEOF");
5975        let content = lexer.read_heredoc("EOF", false);
5976        assert_eq!(content.content, "test\n");
5977    }
5978
5979    #[test]
5980    fn test_read_heredoc_full_scenario() {
5981        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5982        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5983
5984        // Parser would read these tokens
5985        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5986        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5987        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5988
5989        // Now read heredoc content
5990        let content = lexer.read_heredoc("EOF", false);
5991        assert_eq!(content.content, "hello\nworld\n");
5992    }
5993
5994    #[test]
5995    fn test_read_heredoc_with_redirect() {
5996        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5997        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5998        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5999        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6000        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6001        let content = lexer.read_heredoc("EOF", false);
6002        assert_eq!(content.content, "hello\n");
6003        // The redirect tokens are now available from the lexer
6004        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6005        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
6006    }
6007
6008    #[test]
6009    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
6010        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
6011        let mut lexer = Lexer::new(source);
6012
6013        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6014        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6015        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6016
6017        let heredoc = lexer.read_heredoc("EOF", false);
6018        assert_eq!(heredoc.content, "hello\n");
6019
6020        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6021        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6022        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6023        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6024        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6025        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6026        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6027    }
6028
6029    #[test]
6030    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6031        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6032        let mut lexer = Lexer::new(source);
6033
6034        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6035        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6036        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6037
6038        let heredoc = lexer.read_heredoc("EOF", false);
6039        assert_eq!(heredoc.content, "1\n2\n3\n");
6040    }
6041
6042    #[test]
6043    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6044        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6045        let mut lexer = Lexer::new(source);
6046
6047        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6048        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6049        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6050
6051        let heredoc = lexer.read_heredoc("EOF", false);
6052        assert_eq!(heredoc.content, "body\n");
6053    }
6054
6055    #[test]
6056    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6057        let source = "cat <<EOF # note \\\nbody\nEOF\n";
6058        let mut lexer = Lexer::new(source);
6059
6060        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6061        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6062        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6063
6064        let heredoc = lexer.read_heredoc("EOF", false);
6065        assert_eq!(heredoc.content, "body\n");
6066    }
6067
6068    #[test]
6069    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6070        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6071        let mut lexer = Lexer::new(source);
6072
6073        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6074        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6075        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6076        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6077
6078        let heredoc = lexer.read_heredoc("EOF", false);
6079        assert_eq!(heredoc.content, "body\n");
6080
6081        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6082    }
6083
6084    #[test]
6085    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6086        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6087        let mut lexer = Lexer::new(source);
6088
6089        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6090        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6091        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6092
6093        let heredoc = lexer.read_heredoc("EOF", false);
6094        assert_eq!(heredoc.content, "1\n");
6095
6096        assert_next_token(&mut lexer, TokenKind::Pipe, None);
6097        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6098    }
6099
6100    #[test]
6101    fn test_read_heredoc_with_redirect_preserves_following_spans() {
6102        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6103        let mut lexer = Lexer::new(source);
6104
6105        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6106        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6107        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6108
6109        let heredoc = lexer.read_heredoc("EOF", false);
6110        assert_eq!(heredoc.content, "hello\n");
6111
6112        let redirect = lexer.next_lexed_token_with_comments().unwrap();
6113        assert_eq!(redirect.kind, TokenKind::RedirectOut);
6114        assert_eq!(redirect.span.slice(source), ">");
6115
6116        let target = lexer.next_lexed_token_with_comments().unwrap();
6117        assert_eq!(target.kind, TokenKind::Word);
6118        assert_eq!(
6119            token_text(&target, lexer.input).as_deref(),
6120            Some("file.txt")
6121        );
6122        assert_eq!(target.span.slice(source), "file.txt");
6123
6124        let newline = lexer.next_lexed_token_with_comments().unwrap();
6125        assert_eq!(newline.kind, TokenKind::Newline);
6126        assert_eq!(newline.span.slice(source), "\n");
6127
6128        let comment = lexer.next_lexed_token_with_comments().unwrap();
6129        assert_eq!(comment.kind, TokenKind::Comment);
6130        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6131        assert_eq!(comment.span.slice(source), "# done");
6132    }
6133
6134    #[test]
6135    fn test_comment_with_unicode() {
6136        // Comment containing multi-byte UTF-8 characters
6137        let source = "# café résumé\necho ok";
6138        let mut lexer = Lexer::new(source);
6139
6140        let comment = lexer.next_lexed_token_with_comments().unwrap();
6141        assert_eq!(comment.kind, TokenKind::Comment);
6142        assert_eq!(
6143            token_text(&comment, lexer.input).as_deref(),
6144            Some(" café résumé")
6145        );
6146        // Span should cover exactly the comment bytes (including #)
6147        let start = comment.span.start.offset;
6148        let end = comment.span.end.offset;
6149        assert_eq!(start, 0);
6150        assert_eq!(&source[start..end], "# café résumé");
6151        assert!(source.is_char_boundary(start));
6152        assert!(source.is_char_boundary(end));
6153
6154        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6155        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6156    }
6157
6158    #[test]
6159    fn test_comment_with_cjk_characters() {
6160        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
6161        let source = "# 你好世界\necho ok";
6162        let mut lexer = Lexer::new(source);
6163
6164        let comment = lexer.next_lexed_token_with_comments().unwrap();
6165        assert_eq!(comment.kind, TokenKind::Comment);
6166        assert_eq!(
6167            token_text(&comment, lexer.input).as_deref(),
6168            Some(" 你好世界")
6169        );
6170        let start = comment.span.start.offset;
6171        let end = comment.span.end.offset;
6172        assert_eq!(&source[start..end], "# 你好世界");
6173        assert!(source.is_char_boundary(start));
6174        assert!(source.is_char_boundary(end));
6175    }
6176
6177    #[test]
6178    fn test_heredoc_with_comments_inside() {
6179        // Comments inside heredoc body should NOT appear as comment tokens
6180        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6181        let mut lexer = Lexer::new(source);
6182
6183        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6184        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6185        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6186
6187        let heredoc = lexer.read_heredoc("EOF", false);
6188        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6189
6190        // After heredoc, replayed line termination should appear before
6191        // tokens from following source lines.
6192        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6193        let comment = lexer.next_lexed_token_with_comments().unwrap();
6194        assert_eq!(comment.kind, TokenKind::Comment);
6195        assert_eq!(
6196            token_text(&comment, lexer.input).as_deref(),
6197            Some(" real comment")
6198        );
6199    }
6200
6201    #[test]
6202    fn test_heredoc_with_hash_in_variable() {
6203        // ${var#pattern} inside heredoc should not produce comment tokens
6204        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6205        let mut lexer = Lexer::new(source);
6206
6207        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6208        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6209        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6210
6211        let heredoc = lexer.read_heredoc("EOF", false);
6212        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6213    }
6214
6215    #[test]
6216    fn test_heredoc_span_does_not_leak() {
6217        // Heredoc content span must be within source bounds and must not
6218        // overlap with content before or after.
6219        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6220        let mut lexer = Lexer::new(source);
6221
6222        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6223        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6224        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6225
6226        let heredoc = lexer.read_heredoc("EOF", false);
6227        let start = heredoc.content_span.start.offset;
6228        let end = heredoc.content_span.end.offset;
6229        assert!(
6230            end <= source.len(),
6231            "heredoc span end ({end}) exceeds source length ({})",
6232            source.len()
6233        );
6234        assert_eq!(&source[start..end], "hello\nworld\n");
6235
6236        // Tokens after heredoc should still parse correctly
6237        assert_next_token(&mut lexer, TokenKind::Newline, None);
6238        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6239        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6240    }
6241
6242    #[test]
6243    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6244        let source = "\
6245cat <<\\_ACEOF
6246Use these variables to override the choices made by `configure' or to help
6247it to find libraries and programs with nonstandard names/locations.
6248_ACEOF
6249ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6250ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6251";
6252        let mut lexer = Lexer::new(source);
6253
6254        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6255        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6256        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6257        assert_eq!(delimiter.kind, TokenKind::Word);
6258        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6259
6260        let heredoc = lexer.read_heredoc("_ACEOF", false);
6261        assert_eq!(
6262            heredoc.content,
6263            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6264        );
6265
6266        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6267
6268        let first = lexer.next_lexed_token_with_comments().unwrap();
6269        assert_eq!(first.kind, TokenKind::Word);
6270        assert_eq!(
6271            first.span.slice(source),
6272            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6273        );
6274        let first_segments = first
6275            .word()
6276            .unwrap()
6277            .segments()
6278            .map(|segment| {
6279                (
6280                    segment.kind(),
6281                    segment.as_str().to_string(),
6282                    segment.span().map(|span| span.slice(source).to_string()),
6283                )
6284            })
6285            .collect::<Vec<_>>();
6286        assert_eq!(
6287            first_segments,
6288            vec![
6289                (
6290                    LexedWordSegmentKind::Plain,
6291                    "ac_dir_suffix=/".to_string(),
6292                    Some("ac_dir_suffix=/".to_string()),
6293                ),
6294                (
6295                    LexedWordSegmentKind::Plain,
6296                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6297                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6298                ),
6299            ]
6300        );
6301
6302        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6303
6304        let second = lexer.next_lexed_token_with_comments().unwrap();
6305        assert_eq!(second.kind, TokenKind::Word);
6306        assert_eq!(
6307            second.span.slice(source),
6308            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6309        );
6310        let second_segments = second
6311            .word()
6312            .unwrap()
6313            .segments()
6314            .map(|segment| {
6315                (
6316                    segment.kind(),
6317                    segment.as_str().to_string(),
6318                    segment.span().map(|span| span.slice(source).to_string()),
6319                )
6320            })
6321            .collect::<Vec<_>>();
6322        assert_eq!(
6323            second_segments,
6324            vec![
6325                (
6326                    LexedWordSegmentKind::Plain,
6327                    "ac_top_builddir_sub=".to_string(),
6328                    Some("ac_top_builddir_sub=".to_string()),
6329                ),
6330                (
6331                    LexedWordSegmentKind::Plain,
6332                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6333                    Some(
6334                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6335                            .to_string(),
6336                    ),
6337                ),
6338            ]
6339        );
6340    }
6341
6342    #[test]
6343    fn test_heredoc_with_unicode_content() {
6344        // Heredoc containing multi-byte characters; spans must be on char boundaries
6345        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6346        let mut lexer = Lexer::new(source);
6347
6348        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6349        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6350        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6351
6352        let heredoc = lexer.read_heredoc("EOF", false);
6353        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6354        let start = heredoc.content_span.start.offset;
6355        let end = heredoc.content_span.end.offset;
6356        assert!(
6357            source.is_char_boundary(start),
6358            "heredoc span start ({start}) not on char boundary"
6359        );
6360        assert!(
6361            source.is_char_boundary(end),
6362            "heredoc span end ({end}) not on char boundary"
6363        );
6364        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6365    }
6366
6367    #[test]
6368    fn test_assoc_compound_assignment() {
6369        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6370        // assignment as a single Word token
6371        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6372        assert_next_token(
6373            &mut lexer,
6374            TokenKind::Word,
6375            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6376        );
6377        assert!(lexer.next_lexed_token().is_none());
6378    }
6379
6380    #[test]
6381    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6382        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6383        let mut lexer = Lexer::new(source);
6384
6385        let token = lexer.next_lexed_token().unwrap();
6386        assert_eq!(token.kind, TokenKind::Word);
6387        assert_eq!(token.span.slice(source), source);
6388        assert!(lexer.next_lexed_token().is_none());
6389    }
6390
6391    #[test]
6392    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6393        let source = r#"foo\_bar@(baz|qux)"#;
6394        let mut lexer = Lexer::new(source);
6395
6396        let token = lexer.next_lexed_token().unwrap();
6397        assert_eq!(token.kind, TokenKind::Word);
6398        assert_eq!(token.span.slice(source), source);
6399        assert!(lexer.next_lexed_token().is_none());
6400    }
6401
6402    #[test]
6403    fn test_indexed_array_not_collapsed() {
6404        // arr=("hello world") should NOT be collapsed — parser handles
6405        // quoted elements token-by-token via the LeftParen path
6406        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6407        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6408        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6409    }
6410
6411    #[test]
6412    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6413        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6414        let mut lexer = Lexer::new(source);
6415
6416        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6417        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6418
6419        let token = lexer.next_lexed_token().unwrap();
6420        assert_eq!(token.kind, TokenKind::Word);
6421        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6422
6423        let word = token.word().unwrap();
6424        let segments: Vec<_> = word
6425            .segments()
6426            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6427            .collect();
6428        assert_eq!(
6429            segments,
6430            vec![
6431                (
6432                    LexedWordSegmentKind::DoubleQuoted,
6433                    "$plugin_dir".to_string()
6434                ),
6435                (LexedWordSegmentKind::Plain, "/*".to_string()),
6436                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6437            ]
6438        );
6439
6440        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6441        assert!(lexer.next_lexed_token().is_none());
6442    }
6443
6444    #[test]
6445    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6446        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6447        let mut lexer = Lexer::new(source);
6448
6449        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6450        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6451
6452        let token = lexer.next_lexed_token().unwrap();
6453        assert_eq!(token.kind, TokenKind::Word);
6454        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6455
6456        let word = token.word().unwrap();
6457        let segments: Vec<_> = word
6458            .segments()
6459            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6460            .collect();
6461        assert_eq!(
6462            segments,
6463            vec![
6464                (
6465                    LexedWordSegmentKind::DoubleQuoted,
6466                    "$__GREP_CACHE_FILE".to_string()
6467                ),
6468                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6469            ]
6470        );
6471
6472        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6473        assert!(lexer.next_lexed_token().is_none());
6474    }
6475
6476    #[test]
6477    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6478        let source = r#"$dir/${~pats}(N)"#;
6479        let mut lexer = Lexer::new(source);
6480
6481        let token = lexer.next_lexed_token().unwrap();
6482        assert_eq!(token.kind, TokenKind::Word);
6483        assert_eq!(token.span.slice(source), source);
6484        assert!(lexer.next_lexed_token().is_none());
6485    }
6486
6487    #[test]
6488    fn test_dollar_word_does_not_absorb_function_parens() {
6489        let mut lexer = Lexer::new(r#"foo$x()"#);
6490
6491        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6492        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6493        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6494        assert!(lexer.next_lexed_token().is_none());
6495    }
6496
6497    #[test]
6498    fn test_command_substitution_word_does_not_absorb_function_parens() {
6499        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6500
6501        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6502        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6503        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6504        assert!(lexer.next_lexed_token().is_none());
6505    }
6506
6507    /// Regression test for fuzz crash: single digit at EOF should not panic
6508    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6509    #[test]
6510    fn test_digit_at_eof_no_panic() {
6511        // A lone digit with no following redirect operator must not panic
6512        let mut lexer = Lexer::new("2");
6513        let token = lexer.next_lexed_token();
6514        assert!(token.is_some());
6515    }
6516
6517    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6518    #[test]
6519    fn test_nested_brace_expansion_single_token() {
6520        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6521        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6522        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6523        // No more tokens — everything was consumed
6524        assert!(lexer.next_lexed_token().is_none());
6525    }
6526
6527    /// Simple ${var} still works after brace depth change.
6528    #[test]
6529    fn test_simple_brace_expansion_unchanged() {
6530        let mut lexer = Lexer::new("${foo}");
6531        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6532        assert!(lexer.next_lexed_token().is_none());
6533    }
6534
6535    #[test]
6536    fn test_nvm_fixture_lexes_without_stalling() {
6537        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6538        let mut lexer = Lexer::new(input);
6539        let mut tokens = 0usize;
6540
6541        while lexer.next_lexed_token().is_some() {
6542            tokens += 1;
6543            assert!(
6544                tokens < 100_000,
6545                "lexer should continue making progress on the nvm fixture"
6546            );
6547        }
6548
6549        assert!(tokens > 0, "nvm fixture should produce at least one token");
6550    }
6551
6552    #[test]
6553    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6554        let input = concat!(
6555            "case \"${_input_type:-}\" in\n",
6556            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6557            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6558            "esac\n",
6559        );
6560
6561        assert_non_newline_tokens_stay_on_one_line(input);
6562
6563        let mut lexer = Lexer::new(input);
6564        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6565            .map(|token| (token.kind, token_text(&token, input)))
6566            .collect::<Vec<_>>();
6567        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6568        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6569    }
6570
6571    #[test]
6572    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6573        let input = concat!(
6574            "case $2 in\n",
6575            "  cygwin*) bin='cygwin32/bin' ;|\n",
6576            "esac\n",
6577        );
6578
6579        let mut lexer = Lexer::new(input);
6580        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6581            .map(|token| (token.kind, token_text(&token, input)))
6582            .collect::<Vec<_>>();
6583
6584        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6585        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6586        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6587    }
6588
6589    #[test]
6590    fn test_inline_if_with_array_append_stays_line_local() {
6591        let input = concat!(
6592            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6593            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6594        );
6595
6596        assert_non_newline_tokens_stay_on_one_line(input);
6597    }
6598
6599    #[test]
6600    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6601        let source = "unsetopt interactive_comments\n#literal\n";
6602        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6603        let mut lexer = Lexer::with_profile(source, &profile);
6604
6605        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6606        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6607        assert_next_token(&mut lexer, TokenKind::Newline, None);
6608        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6609    }
6610
6611    #[test]
6612    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6613        let source = "setopt rc_quotes\nprint 'a''b'\n";
6614        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6615        let mut lexer = Lexer::with_profile(source, &profile);
6616
6617        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6618        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6619        assert_next_token(&mut lexer, TokenKind::Newline, None);
6620        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6621        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6622    }
6623
6624    #[test]
6625    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6626        let source = "setopt ignore_braces\n{ echo }\n";
6627        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6628        let mut lexer = Lexer::with_profile(source, &profile);
6629
6630        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6631        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6632        assert_next_token(&mut lexer, TokenKind::Newline, None);
6633        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6634        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6635        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6636    }
6637
6638    #[test]
6639    fn test_heredoc_in_arithmetic_fuzz_crash() {
6640        // Regression test: the fuzzer found that heredoc re-injection inside
6641        // arithmetic context can push self.offset past self.input.len(),
6642        // causing a panic in read_unquoted_segment's borrowed-slice path.
6643        let data: &[u8] = &[
6644            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6645            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6646            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6647            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6648            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6649            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6650            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6651            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6652            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6653            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6654            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6655            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6656            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6657            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6658            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6659            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6660            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6661            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6662            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6663        ];
6664        let input = std::str::from_utf8(data).unwrap();
6665        let script = format!("echo $(({input}))\n");
6666        // Must not panic.
6667        let _ = crate::parser::Parser::new(&script).parse();
6668    }
6669}
shuck_parser/parser/lexer.rs

shuck_parser/parser/
lexer.rs