Skip to main content

shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335    /// Token kind used by the parser.
336    pub kind: TokenKind,
337    /// Source span covered by the token.
338    pub span: Span,
339    pub(crate) flags: TokenFlags,
340    payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345        match kind {
346            TokenKind::Word => LexedWordSegmentKind::Plain,
347            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349            _ => LexedWordSegmentKind::Composite,
350        }
351    }
352
353    pub(crate) fn punctuation(kind: TokenKind) -> Self {
354        Self {
355            kind,
356            span: Span::new(),
357            flags: TokenFlags::empty(),
358            payload: TokenPayload::None,
359        }
360    }
361
362    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363        let flags = if word.has_cooked_text() {
364            TokenFlags::cooked_text()
365        } else {
366            TokenFlags::empty()
367        };
368
369        Self {
370            kind,
371            span: Span::new(),
372            flags,
373            payload: TokenPayload::Word(word),
374        }
375    }
376
377    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378        Self::with_word_payload(
379            kind,
380            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381        )
382    }
383
384    fn owned_word(kind: TokenKind, text: String) -> Self {
385        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386    }
387
388    fn comment() -> Self {
389        Self {
390            kind: TokenKind::Comment,
391            span: Span::new(),
392            flags: TokenFlags::empty(),
393            payload: TokenPayload::None,
394        }
395    }
396
397    fn fd(kind: TokenKind, fd: i32) -> Self {
398        Self {
399            kind,
400            span: Span::new(),
401            flags: TokenFlags::empty(),
402            payload: TokenPayload::Fd(fd),
403        }
404    }
405
406    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407        Self {
408            kind,
409            span: Span::new(),
410            flags: TokenFlags::empty(),
411            payload: TokenPayload::FdPair(src_fd, dst_fd),
412        }
413    }
414
415    fn error(kind: LexerErrorKind) -> Self {
416        Self {
417            kind: TokenKind::Error,
418            span: Span::new(),
419            flags: TokenFlags::empty(),
420            payload: TokenPayload::Error(kind),
421        }
422    }
423
424    pub(crate) fn with_span(mut self, span: Span) -> Self {
425        self.span = span;
426        self
427    }
428
429    pub(crate) fn rebased(mut self, base: Position) -> Self {
430        self.span = self.span.rebased(base);
431        self.payload = match self.payload {
432            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433            payload => payload,
434        };
435        self
436    }
437
438    pub(crate) fn with_synthetic_flag(mut self) -> Self {
439        self.flags = self.flags.with_synthetic();
440        self
441    }
442
443    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444        let payload = match self.payload {
445            TokenPayload::None => TokenPayload::None,
446            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449            TokenPayload::Error(kind) => TokenPayload::Error(kind),
450        };
451
452        LexedToken {
453            kind: self.kind,
454            span: self.span,
455            flags: self.flags,
456            payload,
457        }
458    }
459
460    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461        let payload = match self.payload {
462            TokenPayload::None => TokenPayload::None,
463            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466            TokenPayload::Error(kind) => TokenPayload::Error(kind),
467        };
468
469        LexedToken {
470            kind: self.kind,
471            span: self.span,
472            flags: self.flags,
473            payload,
474        }
475    }
476
477    /// Borrow the token text when it is a single-segment word token.
478    pub fn word_text(&self) -> Option<&str> {
479        self.kind
480            .is_word_like()
481            .then_some(())
482            .and_then(|_| match &self.payload {
483                TokenPayload::Word(word) => word.text(),
484                _ => None,
485            })
486    }
487
488    /// Return an owned string containing the token's word text.
489    pub fn word_string(&self) -> Option<String> {
490        self.kind
491            .is_word_like()
492            .then_some(())
493            .and_then(|_| match &self.payload {
494                TokenPayload::Word(word) => Some(word.joined_text()),
495                _ => None,
496            })
497    }
498
499    /// Borrow the structured word payload for word-like tokens.
500    pub fn word(&self) -> Option<&LexedWord<'a>> {
501        match &self.payload {
502            TokenPayload::Word(word) => Some(word),
503            _ => None,
504        }
505    }
506
507    /// Borrow the original source slice when the token is source-backed and uncooked.
508    pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510            return None;
511        }
512
513        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514            .then(|| &source[self.span.start.offset..self.span.end.offset])
515    }
516
517    /// Return the file-descriptor payload for redirection tokens that carry one.
518    pub fn fd_value(&self) -> Option<i32> {
519        match self.payload {
520            TokenPayload::Fd(fd) => Some(fd),
521            _ => None,
522        }
523    }
524
525    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
526    pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527        match self.payload {
528            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529            _ => None,
530        }
531    }
532
533    /// Return the lexer error payload when this token represents `TokenKind::Error`.
534    pub fn error_kind(&self) -> Option<LexerErrorKind> {
535        match self.payload {
536            TokenPayload::Error(kind) => Some(kind),
537            _ => None,
538        }
539    }
540}
541
542/// Result of reading a heredoc body from the source.
543#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545    /// Decoded heredoc content.
546    pub content: String,
547    /// Source span covering the heredoc body content.
548    pub content_span: Span,
549}
550
551/// Maximum nesting depth for command substitution in the lexer.
552/// Prevents stack overflow from deeply nested $() patterns.
553const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557    rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561    fn new(source: &'a str) -> Self {
562        Self { rest: source }
563    }
564
565    fn first(&self) -> Option<char> {
566        self.rest.chars().next()
567    }
568
569    fn second(&self) -> Option<char> {
570        let mut chars = self.rest.chars();
571        chars.next()?;
572        chars.next()
573    }
574
575    fn third(&self) -> Option<char> {
576        let mut chars = self.rest.chars();
577        chars.next()?;
578        chars.next()?;
579        chars.next()
580    }
581
582    fn bump(&mut self) -> Option<char> {
583        let ch = self.first()?;
584        self.rest = &self.rest[ch.len_utf8()..];
585        Some(ch)
586    }
587
588    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589        let start = self.rest;
590        let mut end = 0;
591
592        for ch in start.chars() {
593            if !predicate(ch) {
594                break;
595            }
596            end += ch.len_utf8();
597        }
598
599        self.rest = &start[end..];
600        &start[..end]
601    }
602
603    fn rest(&self) -> &'a str {
604        self.rest
605    }
606
607    fn skip_bytes(&mut self, count: usize) {
608        self.rest = &self.rest[count..];
609    }
610
611    fn find_byte(&self, byte: u8) -> Option<usize> {
612        memchr(byte, self.rest.as_bytes())
613    }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618    source: &'a str,
619    line_starts: Vec<usize>,
620    cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626    pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630    fn new(source: &'a str) -> Self {
631        let mut line_starts =
632            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633        line_starts.push(0);
634        line_starts.extend(
635            source
636                .bytes()
637                .enumerate()
638                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639        );
640
641        Self {
642            source,
643            line_starts,
644            cached: Position::new(),
645        }
646    }
647
648    fn position(&mut self, offset: usize) -> Position {
649        if offset == self.cached.offset {
650            return self.cached;
651        }
652
653        let position = if offset > self.cached.offset && offset <= self.source.len() {
654            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655        } else {
656            self.position_uncached(offset)
657        };
658        self.cached = position;
659        position
660    }
661
662    fn position_uncached(&self, offset: usize) -> Position {
663        let offset = offset.min(self.source.len());
664        let line_index = self
665            .line_starts
666            .partition_point(|start| *start <= offset)
667            .saturating_sub(1);
668        let line_start = self.line_starts[line_index];
669        let line_text = &self.source[line_start..offset];
670        let column = if line_text.is_ascii() {
671            line_text.len() + 1
672        } else {
673            line_text.chars().count() + 1
674        };
675
676        Position {
677            line: line_index + 1,
678            column,
679            offset,
680        }
681    }
682
683    fn advance_from(mut position: Position, text: &str) -> Position {
684        position.offset += text.len();
685        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686        if newline_count == 0 {
687            position.column += if text.is_ascii() {
688                text.len()
689            } else {
690                text.chars().count()
691            };
692            return position;
693        }
694
695        position.line += newline_count;
696        let tail_start = memrchr(b'\n', text.as_bytes())
697            .map(|index| index + 1)
698            .unwrap_or_default();
699        let tail = &text[tail_start..];
700        position.column = if tail.is_ascii() {
701            tail.len() + 1
702        } else {
703            tail.chars().count() + 1
704        };
705        position
706    }
707}
708
709/// Lexer for bash scripts.
710#[derive(Clone)]
711pub struct Lexer<'a> {
712    #[allow(dead_code)] // Stored for error reporting in future
713    input: &'a str,
714    /// Current byte offset in the input/reinjected stream.
715    offset: usize,
716    cursor: Cursor<'a>,
717    position_map: PositionMap<'a>,
718    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
719    /// Consumed before `cursor`.
720    reinject_buf: VecDeque<char>,
721    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
722    reinject_resume_offset: Option<usize>,
723    /// Maximum allowed nesting depth for command substitution
724    max_subst_depth: usize,
725    initial_zsh_options: Option<ZshOptionState>,
726    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727    zsh_timeline_index: usize,
728    #[cfg(feature = "benchmarking")]
729    benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733    /// Create a new lexer for the given input.
734    pub fn new(input: &'a str) -> Self {
735        Self::with_max_subst_depth_and_profile(
736            input,
737            DEFAULT_MAX_SUBST_DEPTH,
738            &ShellProfile::native(super::ShellDialect::Bash),
739            None,
740        )
741    }
742
743    /// Create a new lexer with a custom max substitution nesting depth.
744    /// Limits recursion in read_command_subst_into().
745    pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746        Self::with_max_subst_depth_and_profile(
747            input,
748            max_depth,
749            &ShellProfile::native(super::ShellDialect::Bash),
750            None,
751        )
752    }
753
754    /// Create a new lexer using the provided shell profile.
755    pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757            .then(|| ZshOptionTimeline::build(input, shell_profile))
758            .flatten()
759            .map(Arc::new);
760        Self::with_max_subst_depth_and_profile(
761            input,
762            DEFAULT_MAX_SUBST_DEPTH,
763            shell_profile,
764            zsh_timeline,
765        )
766    }
767
768    pub(crate) fn with_max_subst_depth_and_profile(
769        input: &'a str,
770        max_depth: usize,
771        shell_profile: &ShellProfile,
772        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773    ) -> Self {
774        Self {
775            input,
776            offset: 0,
777            cursor: Cursor::new(input),
778            position_map: PositionMap::new(input),
779            reinject_buf: VecDeque::new(),
780            reinject_resume_offset: None,
781            max_subst_depth: max_depth,
782            initial_zsh_options: shell_profile.zsh_options().cloned(),
783            zsh_timeline,
784            zsh_timeline_index: 0,
785            #[cfg(feature = "benchmarking")]
786            benchmark_counters: None,
787        }
788    }
789
790    /// Get the current position in the input.
791    pub fn position(&self) -> Position {
792        self.position_map.position_uncached(self.offset)
793    }
794
795    fn current_position(&mut self) -> Position {
796        #[cfg(feature = "benchmarking")]
797        self.maybe_record_current_position_call();
798        self.position_map.position(self.offset)
799    }
800
801    #[cfg(feature = "benchmarking")]
802    pub(crate) fn enable_benchmark_counters(&mut self) {
803        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804    }
805
806    #[cfg(feature = "benchmarking")]
807    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808        self.benchmark_counters.unwrap_or_default()
809    }
810
811    #[cfg(feature = "benchmarking")]
812    fn maybe_record_current_position_call(&mut self) {
813        if let Some(counters) = &mut self.benchmark_counters {
814            counters.current_position_calls += 1;
815        }
816    }
817
818    fn sync_offset_to_cursor(&mut self) {
819        if self.reinject_buf.is_empty()
820            && let Some(offset) = self.reinject_resume_offset.take()
821        {
822            self.offset = offset;
823        }
824    }
825
826    /// Get the next token kind from the input without decoding or materializing
827    /// any payload text.
828    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829        self.next_lexed_token().map(|token| token.kind)
830    }
831
832    fn peek_char(&mut self) -> Option<char> {
833        self.sync_offset_to_cursor();
834        if let Some(&ch) = self.reinject_buf.front() {
835            Some(ch)
836        } else {
837            self.cursor.first()
838        }
839    }
840
841    fn advance(&mut self) -> Option<char> {
842        self.sync_offset_to_cursor();
843        let ch = if !self.reinject_buf.is_empty() {
844            self.reinject_buf.pop_front()
845        } else {
846            self.cursor.bump()
847        };
848        if let Some(c) = ch {
849            self.offset += c.len_utf8();
850        }
851        ch
852    }
853
854    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855        self.reinject_buf
856            .iter()
857            .copied()
858            .chain(self.cursor.rest().chars())
859    }
860
861    fn second_char(&self) -> Option<char> {
862        match self.reinject_buf.len() {
863            0 => self.cursor.second(),
864            1 => self.cursor.first(),
865            _ => self.reinject_buf.get(1).copied(),
866        }
867    }
868
869    fn third_char(&self) -> Option<char> {
870        match self.reinject_buf.len() {
871            0 => self.cursor.third(),
872            1 => self.cursor.second(),
873            2 => self.cursor.first(),
874            _ => self.reinject_buf.get(2).copied(),
875        }
876    }
877
878    fn fourth_char(&self) -> Option<char> {
879        match self.reinject_buf.len() {
880            0 => self.cursor.rest().chars().nth(3),
881            1 => self.cursor.third(),
882            2 => self.cursor.second(),
883            3 => self.cursor.first(),
884            _ => self.reinject_buf.get(3).copied(),
885        }
886    }
887
888    fn consume_source_bytes(&mut self, byte_len: usize) {
889        debug_assert!(self.reinject_buf.is_empty());
890        self.sync_offset_to_cursor();
891        self.offset += byte_len;
892        self.cursor.skip_bytes(byte_len);
893    }
894
895    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896        debug_assert!(self.reinject_buf.is_empty());
897        self.offset += byte_len;
898    }
899
900    fn consume_ascii_chars(&mut self, count: usize) {
901        if self.reinject_buf.is_empty() {
902            self.consume_source_bytes(count);
903            return;
904        }
905
906        for _ in 0..count {
907            self.advance();
908        }
909    }
910
911    fn source_horizontal_whitespace_len(&self) -> usize {
912        self.cursor
913            .rest()
914            .as_bytes()
915            .iter()
916            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917            .count()
918    }
919
920    fn source_ascii_plain_word_len(&self) -> usize {
921        self.cursor
922            .rest()
923            .as_bytes()
924            .iter()
925            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926            .count()
927    }
928
929    fn find_double_quote_special(source: &str) -> Option<usize> {
930        source
931            .as_bytes()
932            .iter()
933            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934    }
935
936    fn ensure_capture_from_source(
937        &self,
938        capture: &mut Option<String>,
939        start: Position,
940        end: Position,
941    ) {
942        if capture.is_none() {
943            *capture = Some(self.input[start.offset..end.offset].to_string());
944        }
945    }
946
947    fn push_capture_char(capture: &mut Option<String>, ch: char) {
948        if let Some(text) = capture.as_mut() {
949            text.push(ch);
950        }
951    }
952
953    fn push_capture_str(capture: &mut Option<String>, text: &str) {
954        if let Some(current) = capture.as_mut() {
955            current.push_str(text);
956        }
957    }
958
959    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960        if let Some(timeline) = self.zsh_timeline.as_ref() {
961            while self.zsh_timeline_index < timeline.entries.len()
962                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963            {
964                self.zsh_timeline_index += 1;
965            }
966            return if self.zsh_timeline_index == 0 {
967                self.initial_zsh_options.as_ref()
968            } else {
969                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970            };
971        }
972
973        self.initial_zsh_options.as_ref()
974    }
975
976    fn comments_enabled(&mut self) -> bool {
977        !self
978            .current_zsh_options()
979            .is_some_and(|options| options.interactive_comments.is_definitely_off())
980    }
981
982    fn rc_quotes_enabled(&mut self) -> bool {
983        self.current_zsh_options()
984            .is_some_and(|options| options.rc_quotes.is_definitely_on())
985    }
986
987    fn ignore_braces_enabled(&mut self) -> bool {
988        self.current_zsh_options()
989            .is_some_and(|options| options.ignore_braces.is_definitely_on())
990    }
991
992    fn ignore_close_braces_enabled(&mut self) -> bool {
993        self.current_zsh_options().is_some_and(|options| {
994            options.ignore_braces.is_definitely_on()
995                || options.ignore_close_braces.is_definitely_on()
996        })
997    }
998
999    fn should_treat_hash_as_word_char(&mut self) -> bool {
1000        if !self.comments_enabled() {
1001            return true;
1002        }
1003        self.reinject_buf.is_empty()
1004            && (self
1005                .input
1006                .get(..self.offset)
1007                .and_then(|prefix| prefix.chars().next_back())
1008                .is_some_and(|prev| {
1009                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010                })
1011                || self.is_inside_unclosed_double_paren_on_line())
1012    }
1013
1014    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015        capture
1016            .as_deref()
1017            .unwrap_or(&self.input[start.offset..self.offset])
1018    }
1019
1020    fn current_word_surface_is_single_char(
1021        &self,
1022        start: Position,
1023        capture: &Option<String>,
1024        target: char,
1025    ) -> bool {
1026        let text = self.current_word_text(start, capture);
1027        if !text.contains('\x00') {
1028            let mut encoded = [0; 4];
1029            return text == target.encode_utf8(&mut encoded);
1030        }
1031
1032        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034    }
1035
1036    fn current_word_surface_last_char<'b>(
1037        &'b self,
1038        start: Position,
1039        capture: &'b Option<String>,
1040    ) -> Option<char> {
1041        self.current_word_text(start, capture)
1042            .chars()
1043            .rev()
1044            .find(|&ch| ch != '\x00')
1045    }
1046
1047    fn current_word_surface_ends_with_char(
1048        &self,
1049        start: Position,
1050        capture: &Option<String>,
1051        target: char,
1052    ) -> bool {
1053        self.current_word_surface_last_char(start, capture) == Some(target)
1054    }
1055
1056    fn current_word_surface_ends_with_extglob_prefix(
1057        &self,
1058        start: Position,
1059        capture: &Option<String>,
1060    ) -> bool {
1061        self.current_word_surface_last_char(start, capture)
1062            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063    }
1064
1065    /// Get the next source-backed token from the input, skipping line comments.
1066    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067        self.skip_whitespace();
1068        let start = self.current_position();
1069        let token = self.next_lexed_token_inner(false)?;
1070        let end = self.current_position();
1071        Some(token.with_span(Span::from_positions(start, end)))
1072    }
1073
1074    /// Get the next source-backed token from the input, preserving line comments.
1075    pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076        self.skip_whitespace();
1077        let start = self.current_position();
1078        let token = self.next_lexed_token_inner(true)?;
1079        let end = self.current_position();
1080        Some(token.with_span(Span::from_positions(start, end)))
1081    }
1082
1083    /// Internal: get next token without recording position (called after whitespace skip)
1084    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085        let ch = self.peek_char()?;
1086
1087        match ch {
1088            '\n' => {
1089                self.consume_ascii_chars(1);
1090                Some(LexedToken::punctuation(TokenKind::Newline))
1091            }
1092            ';' => {
1093                if self.second_char() == Some(';') {
1094                    if self.third_char() == Some('&') {
1095                        self.consume_ascii_chars(3);
1096                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1097                    } else {
1098                        self.consume_ascii_chars(2);
1099                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1100                    }
1101                } else if self.second_char() == Some('|') {
1102                    self.consume_ascii_chars(2);
1103                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1104                } else if self.second_char() == Some('&') {
1105                    self.consume_ascii_chars(2);
1106                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1107                } else {
1108                    self.consume_ascii_chars(1);
1109                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1110                }
1111            }
1112            '|' => {
1113                if self.second_char() == Some('|') {
1114                    self.consume_ascii_chars(2);
1115                    Some(LexedToken::punctuation(TokenKind::Or))
1116                } else if self.second_char() == Some('&') {
1117                    self.consume_ascii_chars(2);
1118                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119                } else {
1120                    self.consume_ascii_chars(1);
1121                    Some(LexedToken::punctuation(TokenKind::Pipe))
1122                }
1123            }
1124            '&' => {
1125                if self.second_char() == Some('&') {
1126                    self.consume_ascii_chars(2);
1127                    Some(LexedToken::punctuation(TokenKind::And))
1128                } else if self.second_char() == Some('>') {
1129                    if self.third_char() == Some('>') {
1130                        self.consume_ascii_chars(3);
1131                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132                    } else {
1133                        self.consume_ascii_chars(2);
1134                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135                    }
1136                } else if self.second_char() == Some('|') {
1137                    self.consume_ascii_chars(2);
1138                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139                } else if self.second_char() == Some('!') {
1140                    self.consume_ascii_chars(2);
1141                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142                } else {
1143                    self.consume_ascii_chars(1);
1144                    Some(LexedToken::punctuation(TokenKind::Background))
1145                }
1146            }
1147            '>' => {
1148                if self.second_char() == Some('>') {
1149                    if self.third_char() == Some('|') {
1150                        self.consume_ascii_chars(3);
1151                    } else {
1152                        self.consume_ascii_chars(2);
1153                    }
1154                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155                } else if self.second_char() == Some('|') {
1156                    self.consume_ascii_chars(2);
1157                    Some(LexedToken::punctuation(TokenKind::Clobber))
1158                } else if self.second_char() == Some('(') {
1159                    self.consume_ascii_chars(2);
1160                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161                } else if self.second_char() == Some('&') {
1162                    self.consume_ascii_chars(2);
1163                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1164                } else {
1165                    self.consume_ascii_chars(1);
1166                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167                }
1168            }
1169            '<' => {
1170                if self.second_char() == Some('<') {
1171                    if self.third_char() == Some('<') {
1172                        self.consume_ascii_chars(3);
1173                        Some(LexedToken::punctuation(TokenKind::HereString))
1174                    } else if self.third_char() == Some('-') {
1175                        self.consume_ascii_chars(3);
1176                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177                    } else {
1178                        self.consume_ascii_chars(2);
1179                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1180                    }
1181                } else if self.second_char() == Some('>') {
1182                    self.consume_ascii_chars(2);
1183                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184                } else if self.second_char() == Some('(') {
1185                    self.consume_ascii_chars(2);
1186                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187                } else if self.second_char() == Some('&') {
1188                    self.consume_ascii_chars(2);
1189                    Some(LexedToken::punctuation(TokenKind::DupInput))
1190                } else {
1191                    self.consume_ascii_chars(1);
1192                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193                }
1194            }
1195            '(' => {
1196                if self.second_char() == Some('(') {
1197                    self.consume_ascii_chars(2);
1198                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199                } else {
1200                    self.consume_ascii_chars(1);
1201                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1202                }
1203            }
1204            ')' => {
1205                if self.second_char() == Some(')') {
1206                    self.consume_ascii_chars(2);
1207                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208                } else {
1209                    self.consume_ascii_chars(1);
1210                    Some(LexedToken::punctuation(TokenKind::RightParen))
1211                }
1212            }
1213            '{' => {
1214                if self.ignore_braces_enabled() {
1215                    let start = self.current_position();
1216                    self.consume_ascii_chars(1);
1217                    match self.peek_char() {
1218                        Some(' ') | Some('\t') | Some('\n') | None => {
1219                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220                        }
1221                        _ => self.read_word_starting_with("{", start),
1222                    }
1223                } else if self.looks_like_brace_expansion() {
1224                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1225                    // vs a brace group like { cmd; }
1226                    // Note: { must be followed by space/newline to be a brace group
1227                    self.read_brace_expansion_word()
1228                } else if self.is_brace_group_start() {
1229                    self.advance();
1230                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231                } else {
1232                    // {single} without comma/dot-dot is kept as literal word
1233                    self.read_brace_literal_word()
1234                }
1235            }
1236            '}' => {
1237                self.consume_ascii_chars(1);
1238                if self.ignore_close_braces_enabled() {
1239                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1240                } else {
1241                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1242                }
1243            }
1244            '[' => {
1245                let start = self.current_position();
1246                self.consume_ascii_chars(1);
1247                if self.peek_char() == Some('[')
1248                    && matches!(
1249                        self.second_char(),
1250                        Some(' ') | Some('\t') | Some('\n') | None
1251                    )
1252                {
1253                    self.consume_ascii_chars(1);
1254                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1255                } else {
1256                    // `[` can start the test command when followed by whitespace, or it can be
1257                    // ordinary word text such as a glob bracket expression.
1258                    //
1259                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1260                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1261                    // structural tokens mid-word.
1262                    match self.peek_char() {
1263                        Some(' ') | Some('\t') | Some('\n') | None => {
1264                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1265                        }
1266                        _ => self.read_word_starting_with("[", start),
1267                    }
1268                }
1269            }
1270            ']' => {
1271                if self.second_char() == Some(']') {
1272                    self.consume_ascii_chars(2);
1273                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1274                } else {
1275                    self.consume_ascii_chars(1);
1276                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1277                }
1278            }
1279            '\'' => self.read_single_quoted_string(),
1280            '"' => self.read_double_quoted_string(),
1281            '#' => {
1282                if self.should_treat_hash_as_word_char() {
1283                    let start = self.current_position();
1284                    return self.read_word_starting_with("#", start);
1285                }
1286                if preserve_comments {
1287                    self.read_comment();
1288                    Some(LexedToken::comment())
1289                } else {
1290                    self.skip_comment();
1291                    self.next_lexed_token_inner(false)
1292                }
1293            }
1294            // Handle file descriptor redirects like 2> or 2>&1
1295            '0'..='9' => self.read_word_or_fd_redirect(),
1296            _ => self.read_word(),
1297        }
1298    }
1299
1300    fn skip_whitespace(&mut self) {
1301        while let Some(ch) = self.peek_char() {
1302            if self.reinject_buf.is_empty() {
1303                let whitespace_len = self.source_horizontal_whitespace_len();
1304                if whitespace_len > 0 {
1305                    self.consume_source_bytes(whitespace_len);
1306                    continue;
1307                }
1308
1309                if self.cursor.rest().starts_with("\\\n") {
1310                    self.consume_source_bytes(2);
1311                    continue;
1312                }
1313            }
1314
1315            if ch == ' ' || ch == '\t' {
1316                self.consume_ascii_chars(1);
1317            } else if ch == '\\' {
1318                // Check for backslash-newline (line continuation) between tokens
1319                if self.second_char() == Some('\n') {
1320                    self.consume_ascii_chars(2);
1321                } else {
1322                    break;
1323                }
1324            } else {
1325                break;
1326            }
1327        }
1328    }
1329
1330    fn skip_comment(&mut self) {
1331        if self.reinject_buf.is_empty() {
1332            let end = self
1333                .cursor
1334                .find_byte(b'\n')
1335                .unwrap_or(self.cursor.rest().len());
1336            self.consume_source_bytes(end);
1337            return;
1338        }
1339
1340        while let Some(ch) = self.peek_char() {
1341            if ch == '\n' {
1342                break;
1343            }
1344            self.advance();
1345        }
1346    }
1347
1348    fn read_comment(&mut self) {
1349        debug_assert_eq!(self.peek_char(), Some('#'));
1350
1351        if self.reinject_buf.is_empty() {
1352            let rest = self.cursor.rest();
1353            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1354            self.consume_source_bytes(end);
1355            return;
1356        }
1357
1358        self.advance(); // consume '#'
1359
1360        while let Some(ch) = self.peek_char() {
1361            if ch == '\n' {
1362                break;
1363            }
1364            self.advance();
1365        }
1366    }
1367
1368    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1369        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1370            return false;
1371        }
1372
1373        let line_start = self.input[..self.offset]
1374            .rfind('\n')
1375            .map_or(0, |index| index + 1);
1376        let prefix = &self.input[line_start..self.offset];
1377        line_has_unclosed_double_paren(prefix)
1378    }
1379
1380    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1381    /// or just a regular word starting with a digit
1382    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1383        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1384            let fd: i32 = first_digit.to_digit(10).unwrap() as i32;
1385
1386            match (self.second_char(), self.third_char()) {
1387                (Some('>'), Some('>')) => {
1388                    if self.fourth_char() == Some('|') {
1389                        self.consume_ascii_chars(4);
1390                    } else {
1391                        self.consume_ascii_chars(3);
1392                    }
1393                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1394                }
1395                (Some('>'), Some('|')) => {
1396                    self.consume_ascii_chars(3);
1397                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1398                }
1399                (Some('>'), Some('&')) => {
1400                    self.consume_ascii_chars(3);
1401
1402                    let mut target_str = String::with_capacity(4);
1403                    while let Some(c) = self.peek_char() {
1404                        if c.is_ascii_digit() {
1405                            target_str.push(c);
1406                            self.advance();
1407                        } else {
1408                            break;
1409                        }
1410                    }
1411
1412                    if target_str.is_empty() {
1413                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1414                    }
1415
1416                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1417                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1418                }
1419                (Some('>'), _) => {
1420                    self.consume_ascii_chars(2);
1421                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1422                }
1423                (Some('<'), Some('&')) => {
1424                    self.consume_ascii_chars(3);
1425
1426                    let mut target_str = String::with_capacity(4);
1427                    while let Some(c) = self.peek_char() {
1428                        if c.is_ascii_digit() || c == '-' {
1429                            target_str.push(c);
1430                            self.advance();
1431                            if c == '-' {
1432                                break;
1433                            }
1434                        } else {
1435                            break;
1436                        }
1437                    }
1438
1439                    if target_str == "-" {
1440                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1441                    }
1442                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1443                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1444                }
1445                (Some('<'), Some('>')) => {
1446                    self.consume_ascii_chars(3);
1447                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1448                }
1449                (Some('<'), Some('<')) => {}
1450                (Some('<'), _) => {
1451                    self.consume_ascii_chars(2);
1452                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1453                }
1454                _ => {}
1455            }
1456        }
1457
1458        // Not a fd redirect pattern, read as regular word
1459        self.read_word()
1460    }
1461
1462    fn read_word_starting_with(
1463        &mut self,
1464        _prefix: &str,
1465        start: Position,
1466    ) -> Option<LexedToken<'a>> {
1467        let segment = match self.read_unquoted_segment(start) {
1468            Ok(segment) => segment,
1469            Err(kind) => return Some(LexedToken::error(kind)),
1470        };
1471        if segment.as_str().is_empty() {
1472            return None;
1473        }
1474        let mut lexed_word = LexedWord::from_segment(segment);
1475        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1476            return Some(LexedToken::error(kind));
1477        }
1478        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1479    }
1480
1481    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1482        let start = self.current_position();
1483
1484        if self.reinject_buf.is_empty() {
1485            let ascii_len = self.source_ascii_plain_word_len();
1486            let chunk = if ascii_len > 0
1487                && self
1488                    .cursor
1489                    .rest()
1490                    .as_bytes()
1491                    .get(ascii_len)
1492                    .is_none_or(|byte| byte.is_ascii())
1493            {
1494                self.consume_source_bytes(ascii_len);
1495                &self.input[start.offset..self.offset]
1496            } else {
1497                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1498                self.advance_scanned_source_bytes(chunk.len());
1499                chunk
1500            };
1501            if !chunk.is_empty() {
1502                let continues = matches!(
1503                    self.peek_char(),
1504                    Some(next)
1505                        if Self::is_word_char(next)
1506                            || next == '$'
1507                            || matches!(next, '\'' | '"')
1508                            || next == '{'
1509                            || (next == '('
1510                                && (chunk.ends_with('=')
1511                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1512                );
1513
1514                if !continues {
1515                    let end = self.current_position();
1516                    return Some(LexedToken::borrowed_word(
1517                        TokenKind::Word,
1518                        &self.input[start.offset..self.offset],
1519                        Some(Span::from_positions(start, end)),
1520                    ));
1521                }
1522
1523                if self.peek_char() == Some('(')
1524                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1525                {
1526                    return self.read_complex_word(start);
1527                }
1528
1529                let end = self.current_position();
1530                return self.finish_segmented_word(LexedWord::borrowed(
1531                    LexedWordSegmentKind::Plain,
1532                    &self.input[start.offset..self.offset],
1533                    Some(Span::from_positions(start, end)),
1534                ));
1535            }
1536        }
1537
1538        self.read_complex_word(start)
1539    }
1540
1541    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1542        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1543            return Some(LexedToken::error(kind));
1544        }
1545
1546        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1547    }
1548
1549    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1550        if self.peek_char() == Some('$') {
1551            match self.second_char() {
1552                Some('\'') => return self.read_dollar_single_quoted_string(),
1553                Some('"') => return self.read_dollar_double_quoted_string(),
1554                _ => {}
1555            }
1556        }
1557
1558        let segment = match self.read_unquoted_segment(start) {
1559            Ok(segment) => segment,
1560            Err(kind) => return Some(LexedToken::error(kind)),
1561        };
1562
1563        if segment.as_str().is_empty() {
1564            return None;
1565        }
1566
1567        self.finish_segmented_word(LexedWord::from_segment(segment))
1568    }
1569
1570    fn read_unquoted_segment(
1571        &mut self,
1572        start: Position,
1573    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1574        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1575        while let Some(ch) = self.peek_char() {
1576            if ch == '"' || ch == '\'' {
1577                break;
1578            } else if ch == '$' {
1579                if matches!(self.second_char(), Some('\'') | Some('"'))
1580                    && (self.current_position().offset > start.offset
1581                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1582                {
1583                    break;
1584                }
1585
1586                // Handle variable references and command substitution
1587                self.advance();
1588
1589                Self::push_capture_char(&mut word, ch); // push the '$'
1590
1591                // Check for $[ / $( / ${ forms before falling back to variable text.
1592                if self.peek_char() == Some('[') {
1593                    Self::push_capture_char(&mut word, '[');
1594                    self.advance();
1595                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1596                        return Err(LexerErrorKind::CommandSubstitution);
1597                    }
1598                } else if self.peek_char() == Some('(') {
1599                    if self.second_char() == Some('(') {
1600                        if !self.read_arithmetic_expansion_into(&mut word) {
1601                            return Err(LexerErrorKind::CommandSubstitution);
1602                        }
1603                    } else {
1604                        Self::push_capture_char(&mut word, '(');
1605                        self.advance();
1606                        if !self.read_command_subst_into(&mut word) {
1607                            return Err(LexerErrorKind::CommandSubstitution);
1608                        }
1609                    }
1610                } else if self.peek_char() == Some('{') {
1611                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1612                    // doesn't stop at the inner }.
1613                    Self::push_capture_char(&mut word, '{');
1614                    self.advance();
1615                    let _ = self.read_param_expansion_into(&mut word, start);
1616                } else {
1617                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1618                    if let Some(c) = self.peek_char() {
1619                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1620                            || c.is_ascii_digit()
1621                        {
1622                            Self::push_capture_char(&mut word, c);
1623                            self.advance();
1624                        } else {
1625                            // Read variable name (alphanumeric + _)
1626                            while let Some(c) = self.peek_char() {
1627                                if c.is_ascii_alphanumeric() || c == '_' {
1628                                    Self::push_capture_char(&mut word, c);
1629                                    self.advance();
1630                                } else {
1631                                    break;
1632                                }
1633                            }
1634                        }
1635                    }
1636                }
1637            } else if ch == '{' {
1638                if self.looks_like_mid_word_brace_segment() {
1639                    // Keep balanced {...} forms attached to the current word so
1640                    // plain literals like foo{bar} and brace expansions stay intact.
1641                    Self::push_capture_char(&mut word, ch);
1642                    self.advance();
1643                    let mut depth = 1;
1644                    while let Some(c) = self.peek_char() {
1645                        Self::push_capture_char(&mut word, c);
1646                        self.advance();
1647                        if c == '{' {
1648                            depth += 1;
1649                        } else if c == '}' {
1650                            depth -= 1;
1651                            if depth == 0 {
1652                                break;
1653                            }
1654                        }
1655                    }
1656                } else {
1657                    // Unmatched literal braces in regexes like ^{ should not swallow
1658                    // trailing delimiters such as ]] or then.
1659                    Self::push_capture_char(&mut word, ch);
1660                    self.advance();
1661                }
1662            } else if ch == '`' {
1663                // Preserve legacy backticks verbatim so the parser can keep the
1664                // original syntax form.
1665                let capture_end = self.current_position();
1666                self.ensure_capture_from_source(&mut word, start, capture_end);
1667                Self::push_capture_char(&mut word, ch);
1668                self.advance(); // consume opening `
1669                let mut closed = false;
1670                while let Some(c) = self.peek_char() {
1671                    Self::push_capture_char(&mut word, c);
1672                    self.advance();
1673                    if c == '`' {
1674                        closed = true;
1675                        break;
1676                    }
1677                    if c == '\\'
1678                        && let Some(next) = self.peek_char()
1679                    {
1680                        Self::push_capture_char(&mut word, next);
1681                        self.advance();
1682                    }
1683                }
1684                if !closed {
1685                    return Err(LexerErrorKind::BacktickSubstitution);
1686                }
1687            } else if ch == '\\' {
1688                let capture_end = self.current_position();
1689                self.ensure_capture_from_source(&mut word, start, capture_end);
1690                self.advance();
1691                if let Some(next) = self.peek_char() {
1692                    if next == '\n' {
1693                        // Line continuation: skip backslash + newline
1694                        self.advance();
1695                    } else {
1696                        // Escaped character: backslash quotes the next char
1697                        // (quote removal — only the literal char survives).
1698                        // Preserve source/decoded alignment with a sentinel so
1699                        // downstream word decoding keeps later spans anchored.
1700                        Self::push_capture_char(&mut word, '\x00');
1701                        Self::push_capture_char(&mut word, next);
1702                        self.advance();
1703                        if next == '{'
1704                            && self.current_word_surface_is_single_char(start, &word, '{')
1705                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1706                        {
1707                            let mut depth = 1;
1708                            while let Some(c) = self.peek_char() {
1709                                Self::push_capture_char(&mut word, c);
1710                                self.advance();
1711                                match c {
1712                                    '{' => depth += 1,
1713                                    '}' => {
1714                                        depth -= 1;
1715                                        if depth == 0 {
1716                                            break;
1717                                        }
1718                                    }
1719                                    _ => {}
1720                                }
1721                            }
1722                        }
1723                    }
1724                } else {
1725                    Self::push_capture_char(&mut word, '\\');
1726                }
1727            } else if ch == '('
1728                && self.current_word_surface_ends_with_char(start, &word, '=')
1729                && self.looks_like_assoc_assign()
1730            {
1731                // Associative compound assignment: var=([k]="v" ...) — keep entire
1732                // (...) as part of word so declare -A m=([k]="v") stays one token.
1733                Self::push_capture_char(&mut word, ch);
1734                self.advance();
1735                let mut depth = 1;
1736                while let Some(c) = self.peek_char() {
1737                    Self::push_capture_char(&mut word, c);
1738                    self.advance();
1739                    match c {
1740                        '(' => depth += 1,
1741                        ')' => {
1742                            depth -= 1;
1743                            if depth == 0 {
1744                                break;
1745                            }
1746                        }
1747                        '"' => {
1748                            while let Some(qc) = self.peek_char() {
1749                                Self::push_capture_char(&mut word, qc);
1750                                self.advance();
1751                                if qc == '"' {
1752                                    break;
1753                                }
1754                                if qc == '\\'
1755                                    && let Some(esc) = self.peek_char()
1756                                {
1757                                    Self::push_capture_char(&mut word, esc);
1758                                    self.advance();
1759                                }
1760                            }
1761                        }
1762                        '\'' => {
1763                            while let Some(qc) = self.peek_char() {
1764                                Self::push_capture_char(&mut word, qc);
1765                                self.advance();
1766                                if qc == '\'' {
1767                                    break;
1768                                }
1769                            }
1770                        }
1771                        '\\' => {
1772                            if let Some(esc) = self.peek_char() {
1773                                Self::push_capture_char(&mut word, esc);
1774                                self.advance();
1775                            }
1776                        }
1777                        _ => {}
1778                    }
1779                }
1780            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1781            {
1782                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1783                // Consume through matching ) including nested parens
1784                Self::push_capture_char(&mut word, ch);
1785                self.advance();
1786                let mut depth = 1;
1787                while let Some(c) = self.peek_char() {
1788                    Self::push_capture_char(&mut word, c);
1789                    self.advance();
1790                    match c {
1791                        '(' => depth += 1,
1792                        ')' => {
1793                            depth -= 1;
1794                            if depth == 0 {
1795                                break;
1796                            }
1797                        }
1798                        '\\' => {
1799                            if let Some(esc) = self.peek_char() {
1800                                Self::push_capture_char(&mut word, esc);
1801                                self.advance();
1802                            }
1803                        }
1804                        _ => {}
1805                    }
1806                }
1807            } else if Self::is_plain_word_char(ch) {
1808                if self.reinject_buf.is_empty() {
1809                    let ascii_len = self.source_ascii_plain_word_len();
1810                    let chunk = if ascii_len > 0
1811                        && self
1812                            .cursor
1813                            .rest()
1814                            .as_bytes()
1815                            .get(ascii_len)
1816                            .is_none_or(|byte| byte.is_ascii())
1817                    {
1818                        self.consume_source_bytes(ascii_len);
1819                        &self.input[self.offset - ascii_len..self.offset]
1820                    } else {
1821                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1822                        self.advance_scanned_source_bytes(chunk.len());
1823                        chunk
1824                    };
1825                    Self::push_capture_str(&mut word, chunk);
1826                } else {
1827                    Self::push_capture_char(&mut word, ch);
1828                    self.advance();
1829                }
1830            } else {
1831                break;
1832            }
1833        }
1834
1835        if let Some(word) = word {
1836            let span = Some(Span::from_positions(start, self.current_position()));
1837            Ok(LexedWordSegment::owned_with_spans(
1838                LexedWordSegmentKind::Plain,
1839                word,
1840                span,
1841                span,
1842            ))
1843        } else {
1844            let end = self.current_position();
1845            Ok(LexedWordSegment::borrowed(
1846                LexedWordSegmentKind::Plain,
1847                &self.input[start.offset..self.offset],
1848                Some(Span::from_positions(start, end)),
1849            ))
1850        }
1851    }
1852
1853    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1854        let segment = match self.read_single_quoted_segment() {
1855            Ok(segment) => segment,
1856            Err(kind) => return Some(LexedToken::error(kind)),
1857        };
1858        let mut word = LexedWord::from_segment(segment);
1859        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1860            return Some(LexedToken::error(kind));
1861        }
1862
1863        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1864    }
1865
1866    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1867        debug_assert_eq!(self.peek_char(), Some('\''));
1868
1869        let wrapper_start = self.current_position();
1870        self.consume_ascii_chars(1); // consume opening '
1871        let content_start = self.current_position();
1872        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1873        let mut content_end = content_start;
1874        let mut content = String::with_capacity(16);
1875        let mut closed = false;
1876
1877        if can_borrow {
1878            let rest = self.cursor.rest();
1879            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1880                self.consume_source_bytes(quote_index);
1881                content_end = self.current_position();
1882                self.consume_ascii_chars(1); // consume closing '
1883                closed = true;
1884            } else {
1885                self.consume_source_bytes(rest.len());
1886            }
1887        }
1888
1889        while let Some(ch) = self.peek_char() {
1890            if closed {
1891                break;
1892            }
1893            if ch == '\'' {
1894                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1895                    if !can_borrow {
1896                        content.push('\'');
1897                    }
1898                    self.advance();
1899                    self.advance();
1900                    continue;
1901                }
1902                content_end = self.current_position();
1903                self.consume_ascii_chars(1); // consume closing '
1904                closed = true;
1905                break;
1906            }
1907            if !can_borrow {
1908                content.push(ch);
1909            }
1910            self.advance();
1911        }
1912
1913        if !closed {
1914            return Err(LexerErrorKind::SingleQuote);
1915        }
1916
1917        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1918        let content_span = Some(Span::from_positions(content_start, content_end));
1919
1920        if can_borrow {
1921            Ok(LexedWordSegment::borrowed_with_spans(
1922                LexedWordSegmentKind::SingleQuoted,
1923                &self.input[content_start.offset..content_end.offset],
1924                content_span,
1925                wrapper_span,
1926            ))
1927        } else {
1928            Ok(LexedWordSegment::owned_with_spans(
1929                LexedWordSegmentKind::SingleQuoted,
1930                content,
1931                content_span,
1932                wrapper_span,
1933            ))
1934        }
1935    }
1936
1937    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1938        let segment = match self.read_dollar_single_quoted_segment() {
1939            Ok(segment) => segment,
1940            Err(kind) => return Some(LexedToken::error(kind)),
1941        };
1942        let mut word = LexedWord::from_segment(segment);
1943        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1944            return Some(LexedToken::error(kind));
1945        }
1946
1947        let kind = if word.single_segment().is_some() {
1948            TokenKind::LiteralWord
1949        } else {
1950            TokenKind::Word
1951        };
1952
1953        Some(LexedToken::with_word_payload(kind, word))
1954    }
1955
1956    fn read_dollar_single_quoted_segment(
1957        &mut self,
1958    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1959        debug_assert_eq!(self.peek_char(), Some('$'));
1960        debug_assert_eq!(self.second_char(), Some('\''));
1961
1962        let wrapper_start = self.current_position();
1963        self.consume_ascii_chars(2); // consume $'
1964        let content_start = self.current_position();
1965        let mut out = String::with_capacity(16);
1966
1967        while let Some(ch) = self.peek_char() {
1968            if ch == '\'' {
1969                let content_end = self.current_position();
1970                self.advance();
1971                let wrapper_span =
1972                    Some(Span::from_positions(wrapper_start, self.current_position()));
1973                let content_span = Some(Span::from_positions(content_start, content_end));
1974                return Ok(LexedWordSegment::owned_with_spans(
1975                    LexedWordSegmentKind::DollarSingleQuoted,
1976                    out,
1977                    content_span,
1978                    wrapper_span,
1979                ));
1980            }
1981
1982            if ch == '\\' {
1983                self.advance();
1984                if let Some(esc) = self.peek_char() {
1985                    self.advance();
1986                    match esc {
1987                        'n' => out.push('\n'),
1988                        't' => out.push('\t'),
1989                        'r' => out.push('\r'),
1990                        'a' => out.push('\x07'),
1991                        'b' => out.push('\x08'),
1992                        'f' => out.push('\x0C'),
1993                        'v' => out.push('\x0B'),
1994                        'e' | 'E' => out.push('\x1B'),
1995                        '\\' => out.push('\\'),
1996                        '\'' => out.push('\''),
1997                        '"' => out.push('"'),
1998                        '?' => out.push('?'),
1999                        'c' => {
2000                            if let Some(control) = self.peek_char() {
2001                                self.advance();
2002                                out.push(((control as u32 & 0x1F) as u8) as char);
2003                            } else {
2004                                out.push('\\');
2005                                out.push('c');
2006                            }
2007                        }
2008                        'x' => {
2009                            let mut hex = String::new();
2010                            for _ in 0..2 {
2011                                if let Some(h) = self.peek_char() {
2012                                    if h.is_ascii_hexdigit() {
2013                                        hex.push(h);
2014                                        self.advance();
2015                                    } else {
2016                                        break;
2017                                    }
2018                                }
2019                            }
2020                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2021                                out.push(val as char);
2022                            }
2023                        }
2024                        'u' => {
2025                            let mut hex = String::new();
2026                            for _ in 0..4 {
2027                                if let Some(h) = self.peek_char() {
2028                                    if h.is_ascii_hexdigit() {
2029                                        hex.push(h);
2030                                        self.advance();
2031                                    } else {
2032                                        break;
2033                                    }
2034                                }
2035                            }
2036                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2037                                && let Some(c) = char::from_u32(val)
2038                            {
2039                                out.push(c);
2040                            }
2041                        }
2042                        'U' => {
2043                            let mut hex = String::new();
2044                            for _ in 0..8 {
2045                                if let Some(h) = self.peek_char() {
2046                                    if h.is_ascii_hexdigit() {
2047                                        hex.push(h);
2048                                        self.advance();
2049                                    } else {
2050                                        break;
2051                                    }
2052                                }
2053                            }
2054                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2055                                && let Some(c) = char::from_u32(val)
2056                            {
2057                                out.push(c);
2058                            }
2059                        }
2060                        '0'..='7' => {
2061                            let mut oct = String::new();
2062                            oct.push(esc);
2063                            for _ in 0..2 {
2064                                if let Some(o) = self.peek_char() {
2065                                    if o.is_ascii_digit() && o < '8' {
2066                                        oct.push(o);
2067                                        self.advance();
2068                                    } else {
2069                                        break;
2070                                    }
2071                                }
2072                            }
2073                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2074                                out.push(val as char);
2075                            }
2076                        }
2077                        _ => {
2078                            out.push('\\');
2079                            out.push(esc);
2080                        }
2081                    }
2082                } else {
2083                    out.push('\\');
2084                }
2085                continue;
2086            }
2087
2088            out.push(ch);
2089            self.advance();
2090        }
2091
2092        Err(LexerErrorKind::SingleQuote)
2093    }
2094
2095    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2096        let start = self.current_position();
2097
2098        if self.reinject_buf.is_empty() {
2099            let ascii_len = self.source_ascii_plain_word_len();
2100            let chunk = if ascii_len > 0
2101                && self
2102                    .cursor
2103                    .rest()
2104                    .as_bytes()
2105                    .get(ascii_len)
2106                    .is_none_or(|byte| byte.is_ascii())
2107            {
2108                self.consume_source_bytes(ascii_len);
2109                &self.input[start.offset..self.offset]
2110            } else {
2111                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2112                self.advance_scanned_source_bytes(chunk.len());
2113                chunk
2114            };
2115            if chunk.is_empty() {
2116                return None;
2117            }
2118
2119            let end = self.current_position();
2120            return Some(LexedWordSegment::borrowed(
2121                LexedWordSegmentKind::Plain,
2122                &self.input[start.offset..self.offset],
2123                Some(Span::from_positions(start, end)),
2124            ));
2125        }
2126
2127        let ch = self.peek_char()?;
2128        if !Self::is_plain_word_char(ch) {
2129            return None;
2130        }
2131
2132        let mut text = String::with_capacity(16);
2133        while let Some(ch) = self.peek_char() {
2134            if !Self::is_plain_word_char(ch) {
2135                break;
2136            }
2137            text.push(ch);
2138            self.advance();
2139        }
2140
2141        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2142    }
2143
2144    /// After a closing quote, read any adjacent quoted or unquoted word chars
2145    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2146    fn append_segmented_continuation(
2147        &mut self,
2148        word: &mut LexedWord<'a>,
2149    ) -> Result<(), LexerErrorKind> {
2150        loop {
2151            match self.peek_char() {
2152                Some('\'') => {
2153                    word.push_segment(self.read_single_quoted_segment()?);
2154                }
2155                Some('"') => {
2156                    word.push_segment(self.read_double_quoted_segment()?);
2157                }
2158                Some('$') if self.second_char() == Some('\'') => {
2159                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2160                }
2161                Some('$') if self.second_char() == Some('"') => {
2162                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2163                }
2164                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2165                    let segment = self
2166                        .read_parenthesized_word_suffix_segment()
2167                        .expect("peeked '(' should produce a suffix segment");
2168                    word.push_segment(segment);
2169                }
2170                _ => {
2171                    if let Some(segment) = self.read_plain_continuation_segment() {
2172                        word.push_segment(segment);
2173                        continue;
2174                    }
2175
2176                    let start = self.current_position();
2177                    let plain = self.read_unquoted_segment(start)?;
2178                    if plain.as_str().is_empty() {
2179                        break;
2180                    }
2181                    word.push_segment(plain);
2182                }
2183            }
2184        }
2185
2186        Ok(())
2187    }
2188
2189    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2190        debug_assert_eq!(self.peek_char(), Some('('));
2191
2192        let start = self.current_position();
2193        let mut depth = 0usize;
2194        let mut escaped = false;
2195        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2196
2197        while let Some(ch) = self.peek_char() {
2198            if let Some(text) = text.as_mut() {
2199                text.push(ch);
2200            }
2201            self.advance();
2202
2203            if escaped {
2204                escaped = false;
2205                continue;
2206            }
2207
2208            match ch {
2209                '\\' => escaped = true,
2210                '(' => depth += 1,
2211                ')' => {
2212                    depth = depth.saturating_sub(1);
2213                    if depth == 0 {
2214                        break;
2215                    }
2216                }
2217                _ => {}
2218            }
2219        }
2220
2221        let end = self.current_position();
2222        let span = Some(Span::from_positions(start, end));
2223        if let Some(text) = text {
2224            Some(LexedWordSegment::owned_with_spans(
2225                LexedWordSegmentKind::Plain,
2226                text,
2227                span,
2228                span,
2229            ))
2230        } else {
2231            Some(LexedWordSegment::borrowed_with_spans(
2232                LexedWordSegmentKind::Plain,
2233                &self.input[start.offset..end.offset],
2234                span,
2235                span,
2236            ))
2237        }
2238    }
2239
2240    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2241        self.read_double_quoted_word(false)
2242    }
2243
2244    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2245        self.read_double_quoted_word(true)
2246    }
2247
2248    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2249        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2250            Ok(segment) => segment,
2251            Err(kind) => return Some(LexedToken::error(kind)),
2252        };
2253        let mut word = LexedWord::from_segment(segment);
2254        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2255            return Some(LexedToken::error(kind));
2256        }
2257
2258        let kind = if word.single_segment().is_some() {
2259            TokenKind::QuotedWord
2260        } else {
2261            TokenKind::Word
2262        };
2263
2264        Some(LexedToken::with_word_payload(kind, word))
2265    }
2266
2267    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2268        self.read_double_quoted_segment_with_dollar(false)
2269    }
2270
2271    fn read_dollar_double_quoted_segment(
2272        &mut self,
2273    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2274        self.read_double_quoted_segment_with_dollar(true)
2275    }
2276
2277    fn read_double_quoted_segment_with_dollar(
2278        &mut self,
2279        dollar: bool,
2280    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2281        if dollar {
2282            debug_assert_eq!(self.peek_char(), Some('$'));
2283            debug_assert_eq!(self.second_char(), Some('"'));
2284        } else {
2285            debug_assert_eq!(self.peek_char(), Some('"'));
2286        }
2287
2288        let wrapper_start = self.current_position();
2289        if dollar {
2290            self.consume_ascii_chars(2); // consume $"
2291        } else {
2292            self.consume_ascii_chars(1); // consume opening "
2293        }
2294        let content_start = self.current_position();
2295        let mut content_end = content_start;
2296        let mut simple = self.reinject_buf.is_empty();
2297        let mut borrowable = self.reinject_buf.is_empty();
2298        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2299        let mut closed = false;
2300
2301        while let Some(ch) = self.peek_char() {
2302            if simple {
2303                if self.reinject_buf.is_empty() {
2304                    let rest = self.cursor.rest();
2305                    match Self::find_double_quote_special(rest) {
2306                        Some(index) if index > 0 => {
2307                            self.consume_source_bytes(index);
2308                            continue;
2309                        }
2310                        None => {
2311                            self.consume_source_bytes(rest.len());
2312                            return Err(LexerErrorKind::DoubleQuote);
2313                        }
2314                        _ => {}
2315                    }
2316                }
2317
2318                match ch {
2319                    '"' => {
2320                        content_end = self.current_position();
2321                        self.consume_ascii_chars(1); // consume closing "
2322                        closed = true;
2323                        break;
2324                    }
2325                    '\\' | '$' | '`' => {
2326                        simple = false;
2327                        if ch == '`' {
2328                            borrowable = false;
2329                            let capture_end = self.current_position();
2330                            self.ensure_capture_from_source(
2331                                &mut content,
2332                                content_start,
2333                                capture_end,
2334                            );
2335                        }
2336                    }
2337                    _ => {
2338                        self.advance();
2339                    }
2340                }
2341                if simple {
2342                    continue;
2343                }
2344            }
2345
2346            match ch {
2347                '"' => {
2348                    if borrowable {
2349                        content_end = self.current_position();
2350                    }
2351                    self.consume_ascii_chars(1); // consume closing "
2352                    closed = true;
2353                    break;
2354                }
2355                '\\' => {
2356                    let escape_start = self.current_position();
2357                    self.advance();
2358                    if let Some(next) = self.peek_char() {
2359                        match next {
2360                            '\n' => {
2361                                borrowable = false;
2362                                self.ensure_capture_from_source(
2363                                    &mut content,
2364                                    content_start,
2365                                    escape_start,
2366                                );
2367                                self.advance();
2368                            }
2369                            '$' => {
2370                                borrowable = false;
2371                                self.ensure_capture_from_source(
2372                                    &mut content,
2373                                    content_start,
2374                                    escape_start,
2375                                );
2376                                Self::push_capture_char(&mut content, '\x00');
2377                                Self::push_capture_char(&mut content, '$');
2378                                self.advance();
2379                            }
2380                            '"' | '\\' | '`' => {
2381                                borrowable = false;
2382                                self.ensure_capture_from_source(
2383                                    &mut content,
2384                                    content_start,
2385                                    escape_start,
2386                                );
2387                                if next == '\\' {
2388                                    Self::push_capture_char(&mut content, '\x00');
2389                                }
2390                                if next == '`' {
2391                                    Self::push_capture_char(&mut content, '\x00');
2392                                }
2393                                Self::push_capture_char(&mut content, next);
2394                                self.advance();
2395                                content_end = self.current_position();
2396                            }
2397                            _ => {
2398                                Self::push_capture_char(&mut content, '\\');
2399                                Self::push_capture_char(&mut content, next);
2400                                self.advance();
2401                                content_end = self.current_position();
2402                            }
2403                        }
2404                    }
2405                }
2406                '$' => {
2407                    Self::push_capture_char(&mut content, '$');
2408                    self.advance();
2409                    if self.peek_char() == Some('(') {
2410                        if self.second_char() == Some('(') {
2411                            self.read_arithmetic_expansion_into(&mut content);
2412                        } else {
2413                            Self::push_capture_char(&mut content, '(');
2414                            self.advance();
2415                            self.read_command_subst_into(&mut content);
2416                        }
2417                    } else if self.peek_char() == Some('{') {
2418                        Self::push_capture_char(&mut content, '{');
2419                        self.advance();
2420                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2421                    }
2422                    content_end = self.current_position();
2423                }
2424                '`' => {
2425                    borrowable = false;
2426                    let capture_end = self.current_position();
2427                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2428                    Self::push_capture_char(&mut content, '`');
2429                    self.advance(); // consume opening `
2430                    while let Some(c) = self.peek_char() {
2431                        Self::push_capture_char(&mut content, c);
2432                        self.advance();
2433                        if c == '`' {
2434                            break;
2435                        }
2436                        if c == '\\'
2437                            && let Some(next) = self.peek_char()
2438                        {
2439                            Self::push_capture_char(&mut content, next);
2440                            self.advance();
2441                        }
2442                    }
2443                    content_end = self.current_position();
2444                }
2445                _ => {
2446                    Self::push_capture_char(&mut content, ch);
2447                    self.advance();
2448                    content_end = self.current_position();
2449                }
2450            }
2451        }
2452
2453        if !closed {
2454            return Err(LexerErrorKind::DoubleQuote);
2455        }
2456
2457        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2458        let content_span = Some(Span::from_positions(content_start, content_end));
2459
2460        if borrowable {
2461            Ok(LexedWordSegment::borrowed_with_spans(
2462                if dollar {
2463                    LexedWordSegmentKind::DollarDoubleQuoted
2464                } else {
2465                    LexedWordSegmentKind::DoubleQuoted
2466                },
2467                &self.input[content_start.offset..content_end.offset],
2468                content_span,
2469                wrapper_span,
2470            ))
2471        } else {
2472            Ok(LexedWordSegment::owned_with_spans(
2473                if dollar {
2474                    LexedWordSegmentKind::DollarDoubleQuoted
2475                } else {
2476                    LexedWordSegmentKind::DoubleQuoted
2477                },
2478                content.unwrap_or_default(),
2479                content_span,
2480                wrapper_span,
2481            ))
2482        }
2483    }
2484
2485    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2486        debug_assert_eq!(self.peek_char(), Some('('));
2487        debug_assert_eq!(self.second_char(), Some('('));
2488
2489        Self::push_capture_char(content, '(');
2490        self.advance();
2491        Self::push_capture_char(content, '(');
2492        self.advance();
2493
2494        let mut depth = 2;
2495        while let Some(c) = self.peek_char() {
2496            match c {
2497                '\\' => {
2498                    Self::push_capture_char(content, c);
2499                    self.advance();
2500                    if let Some(next) = self.peek_char() {
2501                        Self::push_capture_char(content, next);
2502                        self.advance();
2503                    }
2504                }
2505                '\'' => {
2506                    Self::push_capture_char(content, c);
2507                    self.advance();
2508                    while let Some(quoted) = self.peek_char() {
2509                        Self::push_capture_char(content, quoted);
2510                        self.advance();
2511                        if quoted == '\'' {
2512                            break;
2513                        }
2514                    }
2515                }
2516                '"' => {
2517                    let mut escaped = false;
2518                    Self::push_capture_char(content, c);
2519                    self.advance();
2520                    while let Some(quoted) = self.peek_char() {
2521                        Self::push_capture_char(content, quoted);
2522                        self.advance();
2523                        if escaped {
2524                            escaped = false;
2525                            continue;
2526                        }
2527                        match quoted {
2528                            '\\' => escaped = true,
2529                            '"' => break,
2530                            _ => {}
2531                        }
2532                    }
2533                }
2534                '`' => {
2535                    let mut escaped = false;
2536                    Self::push_capture_char(content, c);
2537                    self.advance();
2538                    while let Some(quoted) = self.peek_char() {
2539                        Self::push_capture_char(content, quoted);
2540                        self.advance();
2541                        if escaped {
2542                            escaped = false;
2543                            continue;
2544                        }
2545                        match quoted {
2546                            '\\' => escaped = true,
2547                            '`' => break,
2548                            _ => {}
2549                        }
2550                    }
2551                }
2552                '(' => {
2553                    Self::push_capture_char(content, c);
2554                    self.advance();
2555                    depth += 1;
2556                }
2557                ')' => {
2558                    Self::push_capture_char(content, c);
2559                    self.advance();
2560                    depth -= 1;
2561                    if depth == 0 {
2562                        return true;
2563                    }
2564                }
2565                _ => {
2566                    Self::push_capture_char(content, c);
2567                    self.advance();
2568                }
2569            }
2570        }
2571
2572        false
2573    }
2574
2575    fn read_legacy_arithmetic_into(
2576        &mut self,
2577        content: &mut Option<String>,
2578        segment_start: Position,
2579    ) -> bool {
2580        let mut bracket_depth = 1;
2581
2582        while let Some(c) = self.peek_char() {
2583            match c {
2584                '\\' => {
2585                    Self::push_capture_char(content, c);
2586                    self.advance();
2587                    if let Some(next) = self.peek_char() {
2588                        Self::push_capture_char(content, next);
2589                        self.advance();
2590                    }
2591                }
2592                '\'' => {
2593                    Self::push_capture_char(content, c);
2594                    self.advance();
2595                    while let Some(quoted) = self.peek_char() {
2596                        Self::push_capture_char(content, quoted);
2597                        self.advance();
2598                        if quoted == '\'' {
2599                            break;
2600                        }
2601                    }
2602                }
2603                '"' => {
2604                    let mut escaped = false;
2605                    Self::push_capture_char(content, c);
2606                    self.advance();
2607                    while let Some(quoted) = self.peek_char() {
2608                        Self::push_capture_char(content, quoted);
2609                        self.advance();
2610                        if escaped {
2611                            escaped = false;
2612                            continue;
2613                        }
2614                        match quoted {
2615                            '\\' => escaped = true,
2616                            '"' => break,
2617                            _ => {}
2618                        }
2619                    }
2620                }
2621                '`' => {
2622                    let mut escaped = false;
2623                    Self::push_capture_char(content, c);
2624                    self.advance();
2625                    while let Some(quoted) = self.peek_char() {
2626                        Self::push_capture_char(content, quoted);
2627                        self.advance();
2628                        if escaped {
2629                            escaped = false;
2630                            continue;
2631                        }
2632                        match quoted {
2633                            '\\' => escaped = true,
2634                            '`' => break,
2635                            _ => {}
2636                        }
2637                    }
2638                }
2639                '[' => {
2640                    Self::push_capture_char(content, c);
2641                    self.advance();
2642                    bracket_depth += 1;
2643                }
2644                ']' => {
2645                    Self::push_capture_char(content, c);
2646                    self.advance();
2647                    bracket_depth -= 1;
2648                    if bracket_depth == 0 {
2649                        return true;
2650                    }
2651                }
2652                '$' => {
2653                    Self::push_capture_char(content, c);
2654                    self.advance();
2655                    if self.peek_char() == Some('(') {
2656                        if self.second_char() == Some('(') {
2657                            if !self.read_arithmetic_expansion_into(content) {
2658                                return false;
2659                            }
2660                        } else {
2661                            Self::push_capture_char(content, '(');
2662                            self.advance();
2663                            if !self.read_command_subst_into(content) {
2664                                return false;
2665                            }
2666                        }
2667                    } else if self.peek_char() == Some('{') {
2668                        Self::push_capture_char(content, '{');
2669                        self.advance();
2670                        if !self.read_param_expansion_into(content, segment_start) {
2671                            return false;
2672                        }
2673                    } else if self.peek_char() == Some('[') {
2674                        Self::push_capture_char(content, '[');
2675                        self.advance();
2676                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2677                            return false;
2678                        }
2679                    }
2680                }
2681                _ => {
2682                    Self::push_capture_char(content, c);
2683                    self.advance();
2684                }
2685            }
2686        }
2687
2688        false
2689    }
2690
2691    /// Read command substitution content after `$(`, handling nested parens and quotes.
2692    /// Appends chars to `content` and adds the closing `)`.
2693    /// `subst_depth` tracks nesting to prevent stack overflow.
2694    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2695        self.read_command_subst_into_depth(content, 0)
2696    }
2697
2698    fn flush_command_subst_keyword(
2699        current_word: &mut String,
2700        pending_case_headers: &mut usize,
2701        case_clause_depths: &mut SmallVec<[usize; 4]>,
2702        depth: usize,
2703        word_started_at_command_start: &mut bool,
2704    ) {
2705        if current_word.is_empty() {
2706            *word_started_at_command_start = false;
2707            return;
2708        }
2709
2710        match current_word.as_str() {
2711            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2712            "in" if *pending_case_headers > 0 => {
2713                *pending_case_headers -= 1;
2714                case_clause_depths.push(depth);
2715            }
2716            "esac" if *word_started_at_command_start => {
2717                case_clause_depths.pop();
2718            }
2719            _ => {}
2720        }
2721
2722        current_word.clear();
2723        *word_started_at_command_start = false;
2724    }
2725
2726    fn read_command_subst_heredoc_delimiter_into(
2727        &mut self,
2728        content: &mut Option<String>,
2729    ) -> Option<String> {
2730        while let Some(ch) = self.peek_char() {
2731            if !matches!(ch, ' ' | '\t') {
2732                break;
2733            }
2734            Self::push_capture_char(content, ch);
2735            self.advance();
2736        }
2737
2738        let mut cooked = String::new();
2739        let mut in_single = false;
2740        let mut in_double = false;
2741        let mut escaped = false;
2742        let mut saw_any = false;
2743
2744        while let Some(ch) = self.peek_char() {
2745            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2746                break;
2747            }
2748
2749            saw_any = true;
2750            Self::push_capture_char(content, ch);
2751            self.advance();
2752
2753            if escaped {
2754                cooked.push(ch);
2755                escaped = false;
2756                continue;
2757            }
2758
2759            match ch {
2760                '\\' if !in_single => escaped = true,
2761                '\'' if !in_double => in_single = !in_single,
2762                '"' if !in_single => in_double = !in_double,
2763                _ => cooked.push(ch),
2764            }
2765        }
2766
2767        saw_any.then_some(cooked)
2768    }
2769
2770    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2771        Self::push_capture_char(content, '`');
2772        self.advance();
2773        while let Some(ch) = self.peek_char() {
2774            Self::push_capture_char(content, ch);
2775            self.advance();
2776            if ch == '\\' {
2777                if let Some(esc) = self.peek_char() {
2778                    Self::push_capture_char(content, esc);
2779                    self.advance();
2780                }
2781                continue;
2782            }
2783            if ch == '`' {
2784                break;
2785            }
2786        }
2787    }
2788
2789    fn read_command_subst_pending_heredoc_into(
2790        &mut self,
2791        content: &mut Option<String>,
2792        delimiter: &str,
2793        strip_tabs: bool,
2794    ) -> bool {
2795        loop {
2796            let mut line = String::new();
2797            let mut saw_newline = false;
2798
2799            while let Some(ch) = self.peek_char() {
2800                self.advance();
2801                if ch == '\n' {
2802                    saw_newline = true;
2803                    break;
2804                }
2805                line.push(ch);
2806            }
2807
2808            Self::push_capture_str(content, &line);
2809            if saw_newline {
2810                Self::push_capture_char(content, '\n');
2811            }
2812
2813            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2814                return true;
2815            }
2816        }
2817    }
2818
2819    fn read_command_subst_into_depth(
2820        &mut self,
2821        content: &mut Option<String>,
2822        subst_depth: usize,
2823    ) -> bool {
2824        if subst_depth >= self.max_subst_depth {
2825            // Depth limit exceeded — consume until matching ')' and emit error token
2826            let mut depth = 1;
2827            while let Some(c) = self.peek_char() {
2828                self.advance();
2829                match c {
2830                    '(' => depth += 1,
2831                    ')' => {
2832                        depth -= 1;
2833                        if depth == 0 {
2834                            Self::push_capture_char(content, ')');
2835                            return true;
2836                        }
2837                    }
2838                    _ => {}
2839                }
2840            }
2841            return false;
2842        }
2843
2844        let mut depth = 1;
2845        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2846        let mut pending_case_headers = 0usize;
2847        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2848        let mut current_word = String::with_capacity(16);
2849        let mut at_command_start = true;
2850        let mut expecting_redirection_target = false;
2851        let mut current_word_started_at_command_start = false;
2852        while let Some(c) = self.peek_char() {
2853            match c {
2854                '#' if !self.should_treat_hash_as_word_char() => {
2855                    let had_word = !current_word.is_empty();
2856                    Self::flush_command_subst_keyword(
2857                        &mut current_word,
2858                        &mut pending_case_headers,
2859                        &mut case_clause_depths,
2860                        depth,
2861                        &mut current_word_started_at_command_start,
2862                    );
2863                    if had_word && expecting_redirection_target {
2864                        expecting_redirection_target = false;
2865                    }
2866                    Self::push_capture_char(content, '#');
2867                    self.advance();
2868                    while let Some(comment_ch) = self.peek_char() {
2869                        Self::push_capture_char(content, comment_ch);
2870                        self.advance();
2871                        if comment_ch == '\n' {
2872                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2873                                if !self.read_command_subst_pending_heredoc_into(
2874                                    content, &delimiter, strip_tabs,
2875                                ) {
2876                                    return false;
2877                                }
2878                            }
2879                            at_command_start = true;
2880                            expecting_redirection_target = false;
2881                            break;
2882                        }
2883                    }
2884                }
2885                '(' => {
2886                    Self::flush_command_subst_keyword(
2887                        &mut current_word,
2888                        &mut pending_case_headers,
2889                        &mut case_clause_depths,
2890                        depth,
2891                        &mut current_word_started_at_command_start,
2892                    );
2893                    depth += 1;
2894                    Self::push_capture_char(content, c);
2895                    self.advance();
2896                    at_command_start = true;
2897                    expecting_redirection_target = false;
2898                }
2899                ')' => {
2900                    Self::flush_command_subst_keyword(
2901                        &mut current_word,
2902                        &mut pending_case_headers,
2903                        &mut case_clause_depths,
2904                        depth,
2905                        &mut current_word_started_at_command_start,
2906                    );
2907                    if case_clause_depths
2908                        .last()
2909                        .is_some_and(|case_depth| *case_depth == depth)
2910                    {
2911                        Self::push_capture_char(content, ')');
2912                        self.advance();
2913                        at_command_start = true;
2914                        expecting_redirection_target = false;
2915                        continue;
2916                    }
2917                    depth -= 1;
2918                    self.advance();
2919                    if depth == 0 {
2920                        Self::push_capture_char(content, ')');
2921                        return true;
2922                    }
2923                    Self::push_capture_char(content, c);
2924                    at_command_start = false;
2925                    expecting_redirection_target = false;
2926                }
2927                '"' => {
2928                    let had_word = !current_word.is_empty();
2929                    Self::flush_command_subst_keyword(
2930                        &mut current_word,
2931                        &mut pending_case_headers,
2932                        &mut case_clause_depths,
2933                        depth,
2934                        &mut current_word_started_at_command_start,
2935                    );
2936                    if had_word && expecting_redirection_target {
2937                        expecting_redirection_target = false;
2938                    }
2939                    // Nested double-quoted string inside $()
2940                    Self::push_capture_char(content, '"');
2941                    self.advance();
2942                    while let Some(qc) = self.peek_char() {
2943                        match qc {
2944                            '"' => {
2945                                Self::push_capture_char(content, '"');
2946                                self.advance();
2947                                break;
2948                            }
2949                            '\\' => {
2950                                Self::push_capture_char(content, '\\');
2951                                self.advance();
2952                                if let Some(esc) = self.peek_char() {
2953                                    Self::push_capture_char(content, esc);
2954                                    self.advance();
2955                                }
2956                            }
2957                            '$' => {
2958                                Self::push_capture_char(content, '$');
2959                                self.advance();
2960                                if self.peek_char() == Some('(') {
2961                                    if self.second_char() == Some('(') {
2962                                        if !self.read_arithmetic_expansion_into(content) {
2963                                            return false;
2964                                        }
2965                                    } else {
2966                                        Self::push_capture_char(content, '(');
2967                                        self.advance();
2968                                        if !self
2969                                            .read_command_subst_into_depth(content, subst_depth + 1)
2970                                        {
2971                                            return false;
2972                                        }
2973                                    }
2974                                }
2975                            }
2976                            _ => {
2977                                Self::push_capture_char(content, qc);
2978                                self.advance();
2979                            }
2980                        }
2981                    }
2982                    if expecting_redirection_target {
2983                        expecting_redirection_target = false;
2984                    } else {
2985                        at_command_start = false;
2986                    }
2987                }
2988                '\'' => {
2989                    let had_word = !current_word.is_empty();
2990                    Self::flush_command_subst_keyword(
2991                        &mut current_word,
2992                        &mut pending_case_headers,
2993                        &mut case_clause_depths,
2994                        depth,
2995                        &mut current_word_started_at_command_start,
2996                    );
2997                    if had_word && expecting_redirection_target {
2998                        expecting_redirection_target = false;
2999                    }
3000                    // Single-quoted string inside $()
3001                    Self::push_capture_char(content, '\'');
3002                    self.advance();
3003                    while let Some(qc) = self.peek_char() {
3004                        Self::push_capture_char(content, qc);
3005                        self.advance();
3006                        if qc == '\'' {
3007                            break;
3008                        }
3009                    }
3010                    if expecting_redirection_target {
3011                        expecting_redirection_target = false;
3012                    } else {
3013                        at_command_start = false;
3014                    }
3015                }
3016                '`' => {
3017                    let had_word = !current_word.is_empty();
3018                    Self::flush_command_subst_keyword(
3019                        &mut current_word,
3020                        &mut pending_case_headers,
3021                        &mut case_clause_depths,
3022                        depth,
3023                        &mut current_word_started_at_command_start,
3024                    );
3025                    if had_word && expecting_redirection_target {
3026                        expecting_redirection_target = false;
3027                    }
3028                    self.read_command_subst_backtick_segment_into(content);
3029                    if expecting_redirection_target {
3030                        expecting_redirection_target = false;
3031                    } else {
3032                        at_command_start = false;
3033                    }
3034                }
3035                '$' if self.second_char() == Some('\'') => {
3036                    let had_word = !current_word.is_empty();
3037                    Self::flush_command_subst_keyword(
3038                        &mut current_word,
3039                        &mut pending_case_headers,
3040                        &mut case_clause_depths,
3041                        depth,
3042                        &mut current_word_started_at_command_start,
3043                    );
3044                    if had_word && expecting_redirection_target {
3045                        expecting_redirection_target = false;
3046                    }
3047                    Self::push_capture_char(content, '$');
3048                    self.advance();
3049                    Self::push_capture_char(content, '\'');
3050                    self.advance();
3051                    while let Some(qc) = self.peek_char() {
3052                        Self::push_capture_char(content, qc);
3053                        self.advance();
3054                        if qc == '\\' {
3055                            if let Some(esc) = self.peek_char() {
3056                                Self::push_capture_char(content, esc);
3057                                self.advance();
3058                            }
3059                            continue;
3060                        }
3061                        if qc == '\'' {
3062                            break;
3063                        }
3064                    }
3065                    if expecting_redirection_target {
3066                        expecting_redirection_target = false;
3067                    } else {
3068                        at_command_start = false;
3069                    }
3070                }
3071                '\\' => {
3072                    let had_word = !current_word.is_empty();
3073                    Self::flush_command_subst_keyword(
3074                        &mut current_word,
3075                        &mut pending_case_headers,
3076                        &mut case_clause_depths,
3077                        depth,
3078                        &mut current_word_started_at_command_start,
3079                    );
3080                    if had_word && expecting_redirection_target {
3081                        expecting_redirection_target = false;
3082                    }
3083                    Self::push_capture_char(content, '\\');
3084                    self.advance();
3085                    if let Some(esc) = self.peek_char() {
3086                        Self::push_capture_char(content, esc);
3087                        self.advance();
3088                    }
3089                    if expecting_redirection_target {
3090                        expecting_redirection_target = false;
3091                    } else {
3092                        at_command_start = false;
3093                    }
3094                }
3095                '<' if self.second_char() == Some('<') => {
3096                    let word_was_redirection_fd = current_word_started_at_command_start
3097                        && !current_word.is_empty()
3098                        && current_word.chars().all(|current| current.is_ascii_digit());
3099                    Self::flush_command_subst_keyword(
3100                        &mut current_word,
3101                        &mut pending_case_headers,
3102                        &mut case_clause_depths,
3103                        depth,
3104                        &mut current_word_started_at_command_start,
3105                    );
3106                    if word_was_redirection_fd {
3107                        at_command_start = true;
3108                    }
3109
3110                    Self::push_capture_char(content, '<');
3111                    self.advance();
3112                    Self::push_capture_char(content, '<');
3113                    self.advance();
3114
3115                    if self.peek_char() == Some('<') {
3116                        Self::push_capture_char(content, '<');
3117                        self.advance();
3118                        expecting_redirection_target = true;
3119                        continue;
3120                    }
3121
3122                    let strip_tabs = if self.peek_char() == Some('-') {
3123                        Self::push_capture_char(content, '-');
3124                        self.advance();
3125                        true
3126                    } else {
3127                        false
3128                    };
3129
3130                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3131                    {
3132                        pending_heredocs.push((delimiter, strip_tabs));
3133                        expecting_redirection_target = false;
3134                    } else {
3135                        expecting_redirection_target = true;
3136                    }
3137                }
3138                '>' | '<' => {
3139                    let word_was_redirection_fd = current_word_started_at_command_start
3140                        && !current_word.is_empty()
3141                        && current_word.chars().all(|current| current.is_ascii_digit());
3142                    Self::flush_command_subst_keyword(
3143                        &mut current_word,
3144                        &mut pending_case_headers,
3145                        &mut case_clause_depths,
3146                        depth,
3147                        &mut current_word_started_at_command_start,
3148                    );
3149                    if word_was_redirection_fd {
3150                        at_command_start = true;
3151                    }
3152                    Self::push_capture_char(content, c);
3153                    self.advance();
3154                    expecting_redirection_target = true;
3155                }
3156                '\n' => {
3157                    Self::flush_command_subst_keyword(
3158                        &mut current_word,
3159                        &mut pending_case_headers,
3160                        &mut case_clause_depths,
3161                        depth,
3162                        &mut current_word_started_at_command_start,
3163                    );
3164                    Self::push_capture_char(content, '\n');
3165                    self.advance();
3166                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3167                        if !self.read_command_subst_pending_heredoc_into(
3168                            content, &delimiter, strip_tabs,
3169                        ) {
3170                            return false;
3171                        }
3172                    }
3173                    at_command_start = true;
3174                    expecting_redirection_target = false;
3175                }
3176                _ => {
3177                    if c.is_ascii_alphanumeric() || c == '_' {
3178                        if current_word.is_empty()
3179                            && !expecting_redirection_target
3180                            && at_command_start
3181                        {
3182                            current_word_started_at_command_start = true;
3183                            at_command_start = false;
3184                        }
3185                        current_word.push(c);
3186                    } else {
3187                        let had_word = !current_word.is_empty();
3188                        Self::flush_command_subst_keyword(
3189                            &mut current_word,
3190                            &mut pending_case_headers,
3191                            &mut case_clause_depths,
3192                            depth,
3193                            &mut current_word_started_at_command_start,
3194                        );
3195                        if had_word && expecting_redirection_target {
3196                            expecting_redirection_target = false;
3197                        }
3198                        match c {
3199                            ' ' | '\t' => {}
3200                            ';' | '|' | '&' => {
3201                                at_command_start = true;
3202                                expecting_redirection_target = false;
3203                            }
3204                            _ => {
3205                                if !expecting_redirection_target {
3206                                    at_command_start = false;
3207                                }
3208                            }
3209                        }
3210                    }
3211                    Self::push_capture_char(content, c);
3212                    self.advance();
3213                }
3214            }
3215        }
3216
3217        false
3218    }
3219
3220    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3221    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3222    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3223    fn read_param_expansion_into(
3224        &mut self,
3225        content: &mut Option<String>,
3226        segment_start: Position,
3227    ) -> bool {
3228        let mut borrowable = true;
3229        let mut depth = 1;
3230        let mut literal_brace_depth = 0usize;
3231        let mut in_single = false;
3232        let mut in_double = false;
3233        let mut double_quote_depth = 0usize;
3234        while let Some(c) = self.peek_char() {
3235            if in_single {
3236                match c {
3237                    '\\' => {
3238                        let escape_start = self.current_position();
3239                        if self.second_char() == Some('"') {
3240                            self.advance();
3241                            borrowable = false;
3242                            self.ensure_capture_from_source(content, segment_start, escape_start);
3243                            Self::push_capture_char(content, '"');
3244                            self.advance();
3245                        } else {
3246                            Self::push_capture_char(content, '\\');
3247                            self.advance();
3248                        }
3249                    }
3250                    '\'' => {
3251                        Self::push_capture_char(content, c);
3252                        self.advance();
3253                        in_single = false;
3254                    }
3255                    _ => {
3256                        Self::push_capture_char(content, c);
3257                        self.advance();
3258                    }
3259                }
3260                continue;
3261            }
3262
3263            match c {
3264                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3265                    self.advance();
3266                    Self::push_capture_char(content, '}');
3267                    if depth == 1
3268                        && literal_brace_depth > 0
3269                        && self.has_later_top_level_param_expansion_closer(depth)
3270                    {
3271                        literal_brace_depth -= 1;
3272                        continue;
3273                    }
3274                    depth -= 1;
3275                    if depth == 0 {
3276                        break;
3277                    }
3278                }
3279                '{' if !in_single && !in_double => {
3280                    literal_brace_depth += 1;
3281                    Self::push_capture_char(content, '{');
3282                    self.advance();
3283                }
3284                '"' => {
3285                    // Quotes inside ${...} are part of the expansion, not string delimiters
3286                    Self::push_capture_char(content, '"');
3287                    self.advance();
3288                    in_double = !in_double;
3289                    double_quote_depth = if in_double { depth } else { 0 };
3290                }
3291                '\'' => {
3292                    Self::push_capture_char(content, '\'');
3293                    self.advance();
3294                    if !in_double {
3295                        in_single = true;
3296                    }
3297                }
3298                '\\' => {
3299                    // Inside ${...} within double quotes, same escape rules apply:
3300                    // \", \\, \$, \` produce the escaped char; others keep backslash
3301                    let escape_start = self.current_position();
3302                    self.advance();
3303                    if let Some(esc) = self.peek_char() {
3304                        match esc {
3305                            '$' => {
3306                                borrowable = false;
3307                                self.ensure_capture_from_source(
3308                                    content,
3309                                    segment_start,
3310                                    escape_start,
3311                                );
3312                                Self::push_capture_char(content, '\x00');
3313                                Self::push_capture_char(content, '$');
3314                                self.advance();
3315                            }
3316                            '"' | '\\' | '`' => {
3317                                borrowable = false;
3318                                self.ensure_capture_from_source(
3319                                    content,
3320                                    segment_start,
3321                                    escape_start,
3322                                );
3323                                Self::push_capture_char(content, esc);
3324                                self.advance();
3325                            }
3326                            '}' => {
3327                                // \} should be a literal } without closing the expansion
3328                                Self::push_capture_char(content, '\\');
3329                                Self::push_capture_char(content, '}');
3330                                self.advance();
3331                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3332                            }
3333                            _ => {
3334                                Self::push_capture_char(content, '\\');
3335                                Self::push_capture_char(content, esc);
3336                                self.advance();
3337                            }
3338                        }
3339                    } else {
3340                        Self::push_capture_char(content, '\\');
3341                    }
3342                }
3343                '$' => {
3344                    Self::push_capture_char(content, '$');
3345                    self.advance();
3346                    if self.peek_char() == Some('(') {
3347                        if self.second_char() == Some('(') {
3348                            if !self.read_arithmetic_expansion_into(content) {
3349                                borrowable = false;
3350                            }
3351                        } else {
3352                            Self::push_capture_char(content, '(');
3353                            self.advance();
3354                            self.read_command_subst_into(content);
3355                        }
3356                    } else if self.peek_char() == Some('{') {
3357                        Self::push_capture_char(content, '{');
3358                        self.advance();
3359                        borrowable &= self.read_param_expansion_into(content, segment_start);
3360                    }
3361                }
3362                _ => {
3363                    Self::push_capture_char(content, c);
3364                    self.advance();
3365                }
3366            }
3367        }
3368        borrowable
3369    }
3370
3371    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3372        let mut chars = self.lookahead_chars().peekable();
3373        let mut depth = target_depth;
3374        let mut in_single = false;
3375        let mut in_double = false;
3376        let mut double_quote_depth = 0usize;
3377
3378        while let Some(ch) = chars.next() {
3379            if in_single {
3380                match ch {
3381                    '\'' => in_single = false,
3382                    '\\' if chars.peek() == Some(&'"') => {
3383                        chars.next();
3384                    }
3385                    '\\' => {}
3386                    _ => {}
3387                }
3388                continue;
3389            }
3390
3391            if in_double {
3392                match ch {
3393                    '"' => {
3394                        in_double = false;
3395                        double_quote_depth = 0;
3396                    }
3397                    '\\' => {
3398                        chars.next();
3399                    }
3400                    '$' if chars.peek() == Some(&'{') => {
3401                        chars.next();
3402                        depth += 1;
3403                    }
3404                    '}' if depth > double_quote_depth => {
3405                        depth -= 1;
3406                    }
3407                    _ => {}
3408                }
3409                continue;
3410            }
3411
3412            match ch {
3413                '\n' if depth == target_depth => return false,
3414                '\'' => in_single = true,
3415                '"' => {
3416                    in_double = true;
3417                    double_quote_depth = depth;
3418                }
3419                '\\' => {
3420                    chars.next();
3421                }
3422                '$' if chars.peek() == Some(&'{') => {
3423                    chars.next();
3424                    depth += 1;
3425                }
3426                '}' => {
3427                    if depth == target_depth {
3428                        return true;
3429                    }
3430                    depth -= 1;
3431                }
3432                _ => {}
3433            }
3434        }
3435
3436        false
3437    }
3438
3439    /// Check if the content starting with { looks like a brace expansion
3440    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3441    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3442    /// Caps lookahead to prevent O(n^2) scanning when input
3443    /// contains many unmatched `{` characters (issue #997).
3444    fn looks_like_brace_expansion(&self) -> bool {
3445        const MAX_LOOKAHEAD: usize = 10_000;
3446
3447        let mut chars = self.lookahead_chars();
3448
3449        // Skip the opening {
3450        if chars.next() != Some('{') {
3451            return false;
3452        }
3453
3454        let mut depth = 1;
3455        let mut has_comma = false;
3456        let mut has_dot_dot = false;
3457        let mut prev_char = None;
3458        let mut scanned = 0usize;
3459
3460        for ch in chars {
3461            scanned += 1;
3462            if scanned > MAX_LOOKAHEAD {
3463                return false;
3464            }
3465            match ch {
3466                '{' => depth += 1,
3467                '}' => {
3468                    depth -= 1;
3469                    if depth == 0 {
3470                        // Found matching }, check if we have brace expansion markers
3471                        return has_comma || has_dot_dot;
3472                    }
3473                }
3474                ',' if depth == 1 => has_comma = true,
3475                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3476                // Brace groups have whitespace/newlines/semicolons at depth 1
3477                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3478                _ => {}
3479            }
3480            prev_char = Some(ch);
3481        }
3482
3483        false
3484    }
3485
3486    /// Check whether a mid-word `{...}` segment can stay attached to the current
3487    /// word without crossing a top-level word boundary.
3488    fn looks_like_mid_word_brace_segment(&self) -> bool {
3489        const MAX_LOOKAHEAD: usize = 10_000;
3490
3491        let mut chars = self.lookahead_chars();
3492        if chars.next() != Some('{') {
3493            return false;
3494        }
3495
3496        let mut brace_depth = 1;
3497        let mut paren_depth = 0usize;
3498        let mut escaped = false;
3499        let mut in_single = false;
3500        let mut in_double = false;
3501        let mut in_backtick = false;
3502        let mut prev_char = None;
3503        let mut scanned = 0usize;
3504
3505        for ch in chars {
3506            scanned += 1;
3507            if scanned > MAX_LOOKAHEAD {
3508                return false;
3509            }
3510
3511            if !in_single
3512                && !in_double
3513                && !in_backtick
3514                && !escaped
3515                && brace_depth == 1
3516                && paren_depth == 0
3517                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3518            {
3519                return false;
3520            }
3521
3522            if escaped {
3523                escaped = false;
3524                prev_char = Some(ch);
3525                continue;
3526            }
3527
3528            match ch {
3529                '\\' => escaped = true,
3530                '\'' if !in_double && !in_backtick => in_single = !in_single,
3531                '"' if !in_single && !in_backtick => in_double = !in_double,
3532                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3533                '(' if !in_single
3534                    && !in_double
3535                    && !in_backtick
3536                    && (paren_depth > 0 || prev_char == Some('$')) =>
3537                {
3538                    paren_depth += 1
3539                }
3540                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3541                    paren_depth -= 1
3542                }
3543                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3544                '}' => {
3545                    brace_depth -= 1;
3546                    if brace_depth == 0 {
3547                        return true;
3548                    }
3549                }
3550                _ => {}
3551            }
3552
3553            prev_char = Some(ch);
3554        }
3555
3556        false
3557    }
3558
3559    /// Check if { is followed by whitespace (brace group start)
3560    fn is_brace_group_start(&self) -> bool {
3561        let mut chars = self.lookahead_chars();
3562        // Skip the opening {
3563        if chars.next() != Some('{') {
3564            return false;
3565        }
3566        // If next char is whitespace or newline, it's a brace group
3567        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3568    }
3569
3570    /// Check whether the text after an escaped `{` looks like a brace-expansion
3571    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3572    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3573        const MAX_LOOKAHEAD: usize = 10_000;
3574
3575        let mut chars = self.lookahead_chars();
3576        let mut depth = 1;
3577        let mut has_comma = false;
3578        let mut has_dot_dot = false;
3579        let mut prev_char = None;
3580        let mut scanned = 0usize;
3581
3582        for ch in chars.by_ref() {
3583            scanned += 1;
3584            if scanned > MAX_LOOKAHEAD {
3585                return false;
3586            }
3587            match ch {
3588                '{' => depth += 1,
3589                '}' => {
3590                    depth -= 1;
3591                    if depth == 0 {
3592                        return has_comma || has_dot_dot;
3593                    }
3594                }
3595                ',' if depth == 1 => has_comma = true,
3596                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3597                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3598                _ => {}
3599            }
3600            prev_char = Some(ch);
3601        }
3602
3603        false
3604    }
3605
3606    /// Read a {literal} pattern without comma/dot-dot as a word
3607    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3608        let mut word = String::with_capacity(16);
3609
3610        // Read the opening {
3611        if let Some('{') = self.peek_char() {
3612            word.push('{');
3613            self.advance();
3614        } else {
3615            return None;
3616        }
3617
3618        // Read until matching }
3619        let mut depth = 1;
3620        while let Some(ch) = self.peek_char() {
3621            word.push(ch);
3622            self.advance();
3623            match ch {
3624                '{' => depth += 1,
3625                '}' => {
3626                    depth -= 1;
3627                    if depth == 0 {
3628                        break;
3629                    }
3630                }
3631                _ => {}
3632            }
3633        }
3634
3635        // Continue reading any suffix
3636        while let Some(ch) = self.peek_char() {
3637            if Self::is_word_char(ch) {
3638                if self.reinject_buf.is_empty() {
3639                    let chunk = self.cursor.eat_while(Self::is_word_char);
3640                    word.push_str(chunk);
3641                    self.advance_scanned_source_bytes(chunk.len());
3642                } else {
3643                    word.push(ch);
3644                    self.advance();
3645                }
3646            } else {
3647                break;
3648            }
3649        }
3650
3651        Some(LexedToken::owned_word(TokenKind::Word, word))
3652    }
3653
3654    /// Read a brace expansion pattern as a word
3655    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3656        let mut word = String::with_capacity(16);
3657
3658        // Read the opening {
3659        if let Some('{') = self.peek_char() {
3660            word.push('{');
3661            self.advance();
3662        } else {
3663            return None;
3664        }
3665
3666        // Read until matching }
3667        let mut depth = 1;
3668        while let Some(ch) = self.peek_char() {
3669            word.push(ch);
3670            self.advance();
3671            match ch {
3672                '{' => depth += 1,
3673                '}' => {
3674                    depth -= 1;
3675                    if depth == 0 {
3676                        break;
3677                    }
3678                }
3679                _ => {}
3680            }
3681        }
3682
3683        // Continue reading any suffix after the brace pattern
3684        while let Some(ch) = self.peek_char() {
3685            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3686                if ch == '{' {
3687                    // Another brace pattern - include it
3688                    word.push(ch);
3689                    self.advance();
3690                    let mut inner_depth = 1;
3691                    while let Some(c) = self.peek_char() {
3692                        word.push(c);
3693                        self.advance();
3694                        match c {
3695                            '{' => inner_depth += 1,
3696                            '}' => {
3697                                inner_depth -= 1;
3698                                if inner_depth == 0 {
3699                                    break;
3700                                }
3701                            }
3702                            _ => {}
3703                        }
3704                    }
3705                } else {
3706                    word.push(ch);
3707                    self.advance();
3708                }
3709            } else {
3710                break;
3711            }
3712        }
3713
3714        Some(LexedToken::owned_word(TokenKind::Word, word))
3715    }
3716
3717    /// Peek ahead (without consuming) to see if `=(` starts an associative
3718    /// compound assignment like `([key]=val ...)`.  Returns true when the
3719    /// first non-whitespace char after `(` is `[`.
3720    fn looks_like_assoc_assign(&self) -> bool {
3721        let mut chars = self.lookahead_chars();
3722        // Skip the `(` we haven't consumed yet
3723        if chars.next() != Some('(') {
3724            return false;
3725        }
3726        // Skip optional whitespace
3727        for ch in chars {
3728            match ch {
3729                ' ' | '\t' => continue,
3730                '[' => return true,
3731                _ => return false,
3732            }
3733        }
3734        false
3735    }
3736
3737    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3738        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3739    }
3740
3741    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3742        word.segments().any(|segment| {
3743            matches!(
3744                segment.kind(),
3745                LexedWordSegmentKind::SingleQuoted
3746                    | LexedWordSegmentKind::DollarSingleQuoted
3747                    | LexedWordSegmentKind::DoubleQuoted
3748                    | LexedWordSegmentKind::DollarDoubleQuoted
3749            )
3750        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3751    }
3752
3753    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3754        text.contains(['*', '?'])
3755            || text.ends_with('}') && text.contains("${")
3756            || text.ends_with(']')
3757                && text
3758                    .rfind('[')
3759                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3760    }
3761
3762    fn is_word_char(ch: char) -> bool {
3763        !matches!(
3764            ch,
3765            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3766        )
3767    }
3768
3769    const fn is_ascii_word_byte(byte: u8) -> bool {
3770        !matches!(
3771            byte,
3772            b' ' | b'\t'
3773                | b'\n'
3774                | b';'
3775                | b'|'
3776                | b'&'
3777                | b'>'
3778                | b'<'
3779                | b'('
3780                | b')'
3781                | b'{'
3782                | b'}'
3783                | b'\''
3784                | b'"'
3785        )
3786    }
3787
3788    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3789        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3790    }
3791
3792    fn is_plain_word_char(ch: char) -> bool {
3793        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3794    }
3795
3796    /// Read here document content until the delimiter line is found
3797    pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3798        let mut content = String::with_capacity(64);
3799        let mut current_line = String::with_capacity(64);
3800
3801        // Save rest of current line (after the delimiter token on the command line).
3802        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3803        // tokenize the pipe and subsequent command after the heredoc body.
3804        //
3805        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3806        // so we track quoting state and continue across newlines until quotes close.
3807        let mut rest_of_line = String::with_capacity(32);
3808        let rest_of_line_start = self.current_position();
3809        let mut in_double_quote = false;
3810        let mut in_single_quote = false;
3811        let mut in_comment = false;
3812        let mut saw_non_whitespace_tail = false;
3813        let mut consecutive_backslashes = 0usize;
3814        let mut previous_tail_char = None;
3815        while let Some(ch) = self.peek_char() {
3816            self.advance();
3817            if in_comment {
3818                if ch == '\n' {
3819                    break;
3820                }
3821                rest_of_line.push(ch);
3822                previous_tail_char = Some(ch);
3823                continue;
3824            }
3825            if ch == '#'
3826                && !in_single_quote
3827                && !in_double_quote
3828                && self.comments_enabled()
3829                && heredoc_tail_hash_starts_comment(previous_tail_char)
3830            {
3831                in_comment = true;
3832                rest_of_line.push(ch);
3833                previous_tail_char = Some(ch);
3834                consecutive_backslashes = 0;
3835                continue;
3836            }
3837            let backslash_continues_line = ch == '\\'
3838                && !in_single_quote
3839                && self.peek_char() == Some('\n')
3840                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3841                && consecutive_backslashes.is_multiple_of(2);
3842            if backslash_continues_line {
3843                rest_of_line.push(ch);
3844                rest_of_line.push('\n');
3845                self.advance();
3846                consecutive_backslashes = 0;
3847                continue;
3848            }
3849            if ch == '\n' && !in_double_quote && !in_single_quote {
3850                break;
3851            }
3852            if ch == '"' && !in_single_quote {
3853                in_double_quote = !in_double_quote;
3854            } else if ch == '\'' && !in_double_quote {
3855                in_single_quote = !in_single_quote;
3856            } else if ch == '\\' && in_double_quote {
3857                // Escaped char inside double quotes — skip the next char too
3858                rest_of_line.push(ch);
3859                if let Some(next) = self.peek_char() {
3860                    rest_of_line.push(next);
3861                    self.advance();
3862                }
3863                continue;
3864            }
3865            rest_of_line.push(ch);
3866            if !ch.is_whitespace() {
3867                saw_non_whitespace_tail = true;
3868            }
3869            if ch == '\\' && !in_single_quote {
3870                consecutive_backslashes += 1;
3871            } else {
3872                consecutive_backslashes = 0;
3873            }
3874            previous_tail_char = Some(ch);
3875        }
3876
3877        // If we just drained a heredoc replay buffer (for example when multiple
3878        // heredocs share one command line), resume tracking from the true cursor
3879        // position before we measure the body span.
3880        self.sync_offset_to_cursor();
3881        let content_start = self.current_position();
3882        let mut current_line_start = content_start;
3883        let content_end;
3884
3885        // Read lines until we find the delimiter
3886        loop {
3887            if self.reinject_buf.is_empty() {
3888                // When the body reading drains a reinject buffer (from a
3889                // previous heredoc on the same command line), the virtual
3890                // offset drifts away from the cursor. Snap it back before
3891                // any source-based work so spans and `post_heredoc_offset`
3892                // stay within bounds.
3893                self.sync_offset_to_cursor();
3894                let rest = self.cursor.rest();
3895                if rest.is_empty() {
3896                    content_end = self.current_position();
3897                    break;
3898                }
3899
3900                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3901                let line = &rest[..line_len];
3902                let has_newline = line_len < rest.len();
3903
3904                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3905                    content_end = current_line_start;
3906                    self.consume_source_bytes(line_len);
3907                    if has_newline {
3908                        self.consume_ascii_chars(1);
3909                    }
3910                    break;
3911                }
3912
3913                content.push_str(line);
3914                self.consume_source_bytes(line_len);
3915
3916                if has_newline {
3917                    self.consume_ascii_chars(1);
3918                    content.push('\n');
3919                    current_line_start = self.current_position();
3920                    continue;
3921                }
3922
3923                content_end = self.current_position();
3924                break;
3925            }
3926
3927            match self.peek_char() {
3928                Some('\n') => {
3929                    self.advance();
3930                    // Check if current line matches delimiter
3931                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
3932                        content_end = current_line_start;
3933                        break;
3934                    }
3935                    content.push_str(&current_line);
3936                    content.push('\n');
3937                    current_line.clear();
3938                    current_line_start = self.current_position();
3939                }
3940                Some(ch) => {
3941                    current_line.push(ch);
3942                    self.advance();
3943                }
3944                None => {
3945                    // End of input - check last line
3946                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
3947                        content_end = current_line_start;
3948                        break;
3949                    }
3950                    if !current_line.is_empty() {
3951                        content.push_str(&current_line);
3952                    }
3953                    content_end = self.current_position();
3954                    break;
3955                }
3956            }
3957        }
3958
3959        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
3960        // redirects, command words, additional heredocs) stay visible to the
3961        // parser. Always replay a terminating newline so parsing stops before
3962        // tokens that originally lived on later source lines, like `}` or `do`.
3963        let post_heredoc_offset = self.offset;
3964        self.offset = rest_of_line_start.offset;
3965        for ch in rest_of_line.chars() {
3966            self.reinject_buf.push_back(ch);
3967        }
3968        self.reinject_buf.push_back('\n');
3969        self.reinject_resume_offset = Some(post_heredoc_offset);
3970
3971        HeredocRead {
3972            content,
3973            content_span: Span::from_positions(content_start, content_end),
3974        }
3975    }
3976
3977    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
3978        let mut chars = self.cursor.rest().chars();
3979        if chars.next() != Some('\n') {
3980            return false;
3981        }
3982
3983        for ch in chars {
3984            if matches!(ch, ' ' | '\t') {
3985                continue;
3986            }
3987            if ch == '\n' {
3988                return false;
3989            }
3990            return matches!(ch, '|' | '&' | ';' | '<' | '>')
3991                || (ch == '#' && self.comments_enabled());
3992        }
3993
3994        false
3995    }
3996}
3997
3998fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
3999    let line = if strip_tabs {
4000        line.trim_start_matches('\t')
4001    } else {
4002        line
4003    };
4004
4005    if line == delimiter {
4006        return true;
4007    }
4008
4009    let Some(trailing) = line.strip_prefix(delimiter) else {
4010        return false;
4011    };
4012
4013    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4014}
4015
4016fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4017    previous_tail_char.is_none_or(|prev| {
4018        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4019    })
4020}
4021
4022fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4023    let ch = input.get(index..)?.chars().next()?;
4024    Some((ch, index + ch.len_utf8()))
4025}
4026
4027fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4028    let mut index = 0usize;
4029    let mut depth = 0usize;
4030    let mut in_single = false;
4031    let mut in_double = false;
4032    let mut in_backtick = false;
4033    let mut escaped = false;
4034
4035    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4036        let was_escaped = escaped;
4037        if ch == '\\' && !in_single {
4038            escaped = !escaped;
4039            index = next_index;
4040            continue;
4041        }
4042        escaped = false;
4043
4044        match ch {
4045            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4046            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4047            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4048            '(' if !in_single
4049                && !in_double
4050                && !in_backtick
4051                && !was_escaped
4052                && prefix[next_index..].starts_with('(') =>
4053            {
4054                depth += 1;
4055                index = next_index + '('.len_utf8();
4056                continue;
4057            }
4058            ')' if !in_single
4059                && !in_double
4060                && !in_backtick
4061                && !was_escaped
4062                && prefix[next_index..].starts_with(')') =>
4063            {
4064                depth = depth.saturating_sub(1);
4065                index = next_index + ')'.len_utf8();
4066                continue;
4067            }
4068            _ => {}
4069        }
4070
4071        index = next_index;
4072    }
4073
4074    depth > 0
4075}
4076
4077fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4078    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4079    let prefix = &input[line_start..index];
4080    line_has_unclosed_double_paren(prefix)
4081}
4082
4083fn hash_starts_comment(input: &str, index: usize) -> bool {
4084    if inside_unclosed_double_paren_on_line(input, index) {
4085        return false;
4086    }
4087
4088    let next = &input[index + '#'.len_utf8()..];
4089    input[..index]
4090        .chars()
4091        .next_back()
4092        .is_none_or(|prev| match prev {
4093            '(' => {
4094                let whitespace_index = next.find(char::is_whitespace);
4095                let close_index = next.find(')');
4096
4097                match (whitespace_index, close_index) {
4098                    (Some(whitespace), Some(close)) => whitespace < close,
4099                    (Some(_), None) | (None, None) => true,
4100                    (None, Some(_)) => false,
4101                }
4102            }
4103            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4104        })
4105}
4106
4107fn heredoc_delimiter_is_terminator(
4108    ch: char,
4109    in_single: bool,
4110    in_double: bool,
4111    escaped: bool,
4112) -> bool {
4113    !in_single
4114        && !in_double
4115        && !escaped
4116        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4117}
4118
4119fn scan_double_quoted_command_substitution_segment(
4120    input: &str,
4121    mut index: usize,
4122    subst_depth: usize,
4123) -> Option<usize> {
4124    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4125        match ch {
4126            '"' => return Some(next_index),
4127            '\\' => {
4128                index = next_index;
4129                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4130                    index = escaped_next;
4131                }
4132            }
4133            '$' if input[next_index..].starts_with('{') => {
4134                let consumed = scan_command_subst_parameter_expansion_len(
4135                    &input[next_index + '{'.len_utf8()..],
4136                    subst_depth,
4137                )?;
4138                index = next_index + '{'.len_utf8() + consumed;
4139            }
4140            '$' if input[next_index..].starts_with('(')
4141                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4142            {
4143                let consumed = scan_command_substitution_body_len_inner(
4144                    &input[next_index + '('.len_utf8()..],
4145                    subst_depth + 1,
4146                )?;
4147                index = next_index + '('.len_utf8() + consumed;
4148            }
4149            _ => index = next_index,
4150        }
4151    }
4152
4153    None
4154}
4155
4156fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4157    let mut index = 0usize;
4158    let mut in_single = false;
4159    let mut in_double = false;
4160    let mut in_ansi_c_single = false;
4161    let mut in_backtick = false;
4162    let mut escaped = false;
4163    let mut ansi_c_quote_pending = false;
4164
4165    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4166        let was_escaped = escaped;
4167        if ch == '\\' && !in_single {
4168            escaped = !escaped;
4169            index = next_index;
4170            ansi_c_quote_pending = false;
4171            continue;
4172        }
4173        escaped = false;
4174
4175        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4176            if input[next_index..].starts_with('{')
4177                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4178                    &input[next_index + '{'.len_utf8()..],
4179                    subst_depth,
4180                )
4181            {
4182                index = next_index + '{'.len_utf8() + consumed;
4183                ansi_c_quote_pending = false;
4184                continue;
4185            }
4186
4187            if input[next_index..].starts_with('(')
4188                && !input[next_index + '('.len_utf8()..].starts_with('(')
4189                && let Some(consumed) = scan_command_substitution_body_len_inner(
4190                    &input[next_index + '('.len_utf8()..],
4191                    subst_depth + 1,
4192                )
4193            {
4194                index = next_index + '('.len_utf8() + consumed;
4195                ansi_c_quote_pending = false;
4196                continue;
4197            }
4198        }
4199
4200        if !in_single
4201            && !in_ansi_c_single
4202            && !in_double
4203            && !in_backtick
4204            && !was_escaped
4205            && matches!(ch, '<' | '>')
4206            && input[next_index..].starts_with('(')
4207            && let Some(consumed) = scan_command_substitution_body_len_inner(
4208                &input[next_index + '('.len_utf8()..],
4209                subst_depth + 1,
4210            )
4211        {
4212            index = next_index + '('.len_utf8() + consumed;
4213            ansi_c_quote_pending = false;
4214            continue;
4215        }
4216
4217        match ch {
4218            '\'' if !in_double && !in_backtick && !was_escaped => {
4219                if in_ansi_c_single {
4220                    in_ansi_c_single = false;
4221                } else if !in_single && ansi_c_quote_pending {
4222                    in_ansi_c_single = true;
4223                } else {
4224                    in_single = !in_single;
4225                }
4226            }
4227            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4228                in_double = !in_double
4229            }
4230            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4231                in_backtick = !in_backtick
4232            }
4233            '}' if !in_single
4234                && !in_ansi_c_single
4235                && !in_double
4236                && !in_backtick
4237                && !was_escaped =>
4238            {
4239                return Some(next_index);
4240            }
4241            _ => {}
4242        }
4243
4244        ansi_c_quote_pending = ch == '$'
4245            && !in_single
4246            && !in_ansi_c_single
4247            && !in_double
4248            && !in_backtick
4249            && !was_escaped;
4250        index = next_index;
4251    }
4252
4253    None
4254}
4255
4256fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4257    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4258        if !matches!(ch, ' ' | '\t') {
4259            break;
4260        }
4261        index = next_index;
4262    }
4263
4264    let start = index;
4265    let mut cooked = String::new();
4266    let mut in_single = false;
4267    let mut in_double = false;
4268    let mut escaped = false;
4269
4270    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4271        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4272            break;
4273        }
4274
4275        index = next_index;
4276        if escaped {
4277            cooked.push(ch);
4278            escaped = false;
4279            continue;
4280        }
4281
4282        match ch {
4283            '\\' if !in_single => escaped = true,
4284            '\'' if !in_double => in_single = !in_single,
4285            '"' if !in_single => in_double = !in_double,
4286            _ => cooked.push(ch),
4287        }
4288    }
4289
4290    (index > start).then_some((index, cooked))
4291}
4292
4293fn skip_command_subst_pending_heredoc(
4294    input: &str,
4295    mut index: usize,
4296    delimiter: &str,
4297    strip_tabs: bool,
4298) -> usize {
4299    while index <= input.len() {
4300        let rest = &input[index..];
4301        let line_len = rest.find('\n').unwrap_or(rest.len());
4302        let line = &rest[..line_len];
4303        let has_newline = line_len < rest.len();
4304
4305        index += line_len;
4306        if has_newline {
4307            index += '\n'.len_utf8();
4308        }
4309
4310        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4311            return index;
4312        }
4313    }
4314
4315    index
4316}
4317
4318fn scan_command_subst_ansi_c_single_quoted_segment(
4319    input: &str,
4320    quote_index: usize,
4321) -> Option<usize> {
4322    let mut index = quote_index + '\''.len_utf8();
4323
4324    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4325        index = next_index;
4326        if ch == '\\' {
4327            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4328                index = escaped_next;
4329            }
4330            continue;
4331        }
4332
4333        if ch == '\'' {
4334            return Some(index);
4335        }
4336    }
4337
4338    None
4339}
4340
4341fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4342    let mut index = start;
4343
4344    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4345        index = next_index;
4346        if ch == '\\' {
4347            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4348                index = escaped_next;
4349            }
4350            continue;
4351        }
4352
4353        if ch == '`' {
4354            return Some(index);
4355        }
4356    }
4357
4358    None
4359}
4360
4361fn flush_scanned_command_subst_keyword(
4362    current_word: &mut String,
4363    pending_case_headers: &mut usize,
4364    case_clause_depths: &mut SmallVec<[usize; 4]>,
4365    depth: usize,
4366    word_started_at_command_start: &mut bool,
4367) {
4368    if current_word.is_empty() {
4369        *word_started_at_command_start = false;
4370        return;
4371    }
4372
4373    match current_word.as_str() {
4374        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4375        "in" if *pending_case_headers > 0 => {
4376            *pending_case_headers -= 1;
4377            case_clause_depths.push(depth);
4378        }
4379        "esac" if *word_started_at_command_start => {
4380            case_clause_depths.pop();
4381        }
4382        _ => {}
4383    }
4384
4385    current_word.clear();
4386    *word_started_at_command_start = false;
4387}
4388
4389fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4390    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4391        return None;
4392    }
4393
4394    let mut index = 0usize;
4395    let mut depth = 1;
4396    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4397    let mut pending_case_headers = 0usize;
4398    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4399    let mut current_word = String::with_capacity(16);
4400    let mut at_command_start = true;
4401    let mut expecting_redirection_target = false;
4402    let mut current_word_started_at_command_start = false;
4403
4404    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4405        match ch {
4406            '#' if hash_starts_comment(input, index) => {
4407                let had_word = !current_word.is_empty();
4408                flush_scanned_command_subst_keyword(
4409                    &mut current_word,
4410                    &mut pending_case_headers,
4411                    &mut case_clause_depths,
4412                    depth,
4413                    &mut current_word_started_at_command_start,
4414                );
4415                if had_word && expecting_redirection_target {
4416                    expecting_redirection_target = false;
4417                }
4418                index = next_index;
4419                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4420                    index = comment_next;
4421                    if comment_ch == '\n' {
4422                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4423                            index = skip_command_subst_pending_heredoc(
4424                                input, index, &delimiter, strip_tabs,
4425                            );
4426                        }
4427                        at_command_start = true;
4428                        expecting_redirection_target = false;
4429                        break;
4430                    }
4431                }
4432            }
4433            '(' => {
4434                flush_scanned_command_subst_keyword(
4435                    &mut current_word,
4436                    &mut pending_case_headers,
4437                    &mut case_clause_depths,
4438                    depth,
4439                    &mut current_word_started_at_command_start,
4440                );
4441                depth += 1;
4442                index = next_index;
4443                at_command_start = true;
4444                expecting_redirection_target = false;
4445            }
4446            ')' => {
4447                flush_scanned_command_subst_keyword(
4448                    &mut current_word,
4449                    &mut pending_case_headers,
4450                    &mut case_clause_depths,
4451                    depth,
4452                    &mut current_word_started_at_command_start,
4453                );
4454                if case_clause_depths
4455                    .last()
4456                    .is_some_and(|case_depth| *case_depth == depth)
4457                {
4458                    index = next_index;
4459                    at_command_start = true;
4460                    expecting_redirection_target = false;
4461                    continue;
4462                }
4463                depth -= 1;
4464                index = next_index;
4465                if depth == 0 {
4466                    return Some(index);
4467                }
4468                at_command_start = false;
4469                expecting_redirection_target = false;
4470            }
4471            '"' => {
4472                let had_word = !current_word.is_empty();
4473                flush_scanned_command_subst_keyword(
4474                    &mut current_word,
4475                    &mut pending_case_headers,
4476                    &mut case_clause_depths,
4477                    depth,
4478                    &mut current_word_started_at_command_start,
4479                );
4480                if had_word && expecting_redirection_target {
4481                    expecting_redirection_target = false;
4482                }
4483                index = scan_double_quoted_command_substitution_segment(
4484                    input,
4485                    next_index,
4486                    subst_depth,
4487                )?;
4488                if expecting_redirection_target {
4489                    expecting_redirection_target = false;
4490                } else {
4491                    at_command_start = false;
4492                }
4493            }
4494            '\'' => {
4495                let had_word = !current_word.is_empty();
4496                flush_scanned_command_subst_keyword(
4497                    &mut current_word,
4498                    &mut pending_case_headers,
4499                    &mut case_clause_depths,
4500                    depth,
4501                    &mut current_word_started_at_command_start,
4502                );
4503                if had_word && expecting_redirection_target {
4504                    expecting_redirection_target = false;
4505                }
4506                index = next_index;
4507                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4508                    index = quoted_next;
4509                    if quoted_ch == '\'' {
4510                        break;
4511                    }
4512                }
4513                if expecting_redirection_target {
4514                    expecting_redirection_target = false;
4515                } else {
4516                    at_command_start = false;
4517                }
4518            }
4519            '`' => {
4520                let had_word = !current_word.is_empty();
4521                flush_scanned_command_subst_keyword(
4522                    &mut current_word,
4523                    &mut pending_case_headers,
4524                    &mut case_clause_depths,
4525                    depth,
4526                    &mut current_word_started_at_command_start,
4527                );
4528                if had_word && expecting_redirection_target {
4529                    expecting_redirection_target = false;
4530                }
4531                index = scan_command_subst_backtick_segment(input, next_index)?;
4532                if expecting_redirection_target {
4533                    expecting_redirection_target = false;
4534                } else {
4535                    at_command_start = false;
4536                }
4537            }
4538            '$' if input[next_index..].starts_with('\'') => {
4539                let had_word = !current_word.is_empty();
4540                flush_scanned_command_subst_keyword(
4541                    &mut current_word,
4542                    &mut pending_case_headers,
4543                    &mut case_clause_depths,
4544                    depth,
4545                    &mut current_word_started_at_command_start,
4546                );
4547                if had_word && expecting_redirection_target {
4548                    expecting_redirection_target = false;
4549                }
4550                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4551                if expecting_redirection_target {
4552                    expecting_redirection_target = false;
4553                } else {
4554                    at_command_start = false;
4555                }
4556            }
4557            '\\' => {
4558                let had_word = !current_word.is_empty();
4559                flush_scanned_command_subst_keyword(
4560                    &mut current_word,
4561                    &mut pending_case_headers,
4562                    &mut case_clause_depths,
4563                    depth,
4564                    &mut current_word_started_at_command_start,
4565                );
4566                if had_word && expecting_redirection_target {
4567                    expecting_redirection_target = false;
4568                }
4569                index = next_index;
4570                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4571                    index = escaped_next;
4572                }
4573                if expecting_redirection_target {
4574                    expecting_redirection_target = false;
4575                } else {
4576                    at_command_start = false;
4577                }
4578            }
4579            '>' => {
4580                let word_was_redirection_fd = current_word_started_at_command_start
4581                    && !current_word.is_empty()
4582                    && current_word.chars().all(|current| current.is_ascii_digit());
4583                flush_scanned_command_subst_keyword(
4584                    &mut current_word,
4585                    &mut pending_case_headers,
4586                    &mut case_clause_depths,
4587                    depth,
4588                    &mut current_word_started_at_command_start,
4589                );
4590                if word_was_redirection_fd {
4591                    at_command_start = true;
4592                }
4593                index = next_index;
4594                expecting_redirection_target = true;
4595            }
4596            '<' if input[next_index..].starts_with('<') => {
4597                let word_was_redirection_fd = current_word_started_at_command_start
4598                    && !current_word.is_empty()
4599                    && current_word.chars().all(|current| current.is_ascii_digit());
4600                let had_word = !current_word.is_empty();
4601                flush_scanned_command_subst_keyword(
4602                    &mut current_word,
4603                    &mut pending_case_headers,
4604                    &mut case_clause_depths,
4605                    depth,
4606                    &mut current_word_started_at_command_start,
4607                );
4608                if had_word && expecting_redirection_target {
4609                    expecting_redirection_target = false;
4610                }
4611                if word_was_redirection_fd {
4612                    at_command_start = true;
4613                }
4614                if inside_unclosed_double_paren_on_line(input, index) {
4615                    index = next_index + '<'.len_utf8();
4616                    continue;
4617                }
4618
4619                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4620                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4621                    expecting_redirection_target = true;
4622                    continue;
4623                }
4624
4625                let strip_tabs = input[next_index..].starts_with("<-");
4626                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4627                if let Some((delimiter_index, delimiter)) =
4628                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4629                {
4630                    pending_heredocs.push((delimiter, strip_tabs));
4631                    index = delimiter_index;
4632                    expecting_redirection_target = false;
4633                } else {
4634                    index = next_index;
4635                    expecting_redirection_target = true;
4636                }
4637            }
4638            '\n' => {
4639                flush_scanned_command_subst_keyword(
4640                    &mut current_word,
4641                    &mut pending_case_headers,
4642                    &mut case_clause_depths,
4643                    depth,
4644                    &mut current_word_started_at_command_start,
4645                );
4646                index = next_index;
4647                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4648                    index =
4649                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4650                }
4651                at_command_start = true;
4652                expecting_redirection_target = false;
4653            }
4654            '$' if input[next_index..].starts_with('{') => {
4655                let had_word = !current_word.is_empty();
4656                flush_scanned_command_subst_keyword(
4657                    &mut current_word,
4658                    &mut pending_case_headers,
4659                    &mut case_clause_depths,
4660                    depth,
4661                    &mut current_word_started_at_command_start,
4662                );
4663                if had_word && expecting_redirection_target {
4664                    expecting_redirection_target = false;
4665                }
4666                let consumed = scan_command_subst_parameter_expansion_len(
4667                    &input[next_index + '{'.len_utf8()..],
4668                    subst_depth,
4669                )?;
4670                index = next_index + '{'.len_utf8() + consumed;
4671                if expecting_redirection_target {
4672                    expecting_redirection_target = false;
4673                } else {
4674                    at_command_start = false;
4675                }
4676            }
4677            '$' if input[next_index..].starts_with('(')
4678                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4679            {
4680                let had_word = !current_word.is_empty();
4681                flush_scanned_command_subst_keyword(
4682                    &mut current_word,
4683                    &mut pending_case_headers,
4684                    &mut case_clause_depths,
4685                    depth,
4686                    &mut current_word_started_at_command_start,
4687                );
4688                if had_word && expecting_redirection_target {
4689                    expecting_redirection_target = false;
4690                }
4691                let consumed = scan_command_substitution_body_len_inner(
4692                    &input[next_index + '('.len_utf8()..],
4693                    subst_depth + 1,
4694                )?;
4695                index = next_index + '('.len_utf8() + consumed;
4696                if expecting_redirection_target {
4697                    expecting_redirection_target = false;
4698                } else {
4699                    at_command_start = false;
4700                }
4701            }
4702            _ => {
4703                if ch.is_ascii_alphanumeric() || ch == '_' {
4704                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4705                    {
4706                        current_word_started_at_command_start = true;
4707                        at_command_start = false;
4708                    }
4709                    current_word.push(ch);
4710                } else {
4711                    let had_word = !current_word.is_empty();
4712                    flush_scanned_command_subst_keyword(
4713                        &mut current_word,
4714                        &mut pending_case_headers,
4715                        &mut case_clause_depths,
4716                        depth,
4717                        &mut current_word_started_at_command_start,
4718                    );
4719                    if had_word && expecting_redirection_target {
4720                        expecting_redirection_target = false;
4721                    }
4722                    match ch {
4723                        ' ' | '\t' => {}
4724                        ';' | '|' | '&' => {
4725                            at_command_start = true;
4726                            expecting_redirection_target = false;
4727                        }
4728                        _ => {
4729                            if !expecting_redirection_target {
4730                                at_command_start = false;
4731                            }
4732                        }
4733                    }
4734                }
4735                index = next_index;
4736            }
4737        }
4738    }
4739
4740    None
4741}
4742
4743pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4744    scan_command_substitution_body_len_inner(input, 0)
4745}
4746
4747#[cfg(test)]
4748mod tests {
4749    use super::*;
4750
4751    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4752        match token.kind {
4753            kind if kind.is_word_like() => token.word_string(),
4754            TokenKind::Comment => token
4755                .span
4756                .slice(source)
4757                .strip_prefix('#')
4758                .map(str::to_string),
4759            TokenKind::Error => token
4760                .error_kind()
4761                .map(LexerErrorKind::message)
4762                .map(str::to_string),
4763            _ => None,
4764        }
4765    }
4766
4767    fn assert_next_token(
4768        lexer: &mut Lexer<'_>,
4769        expected_kind: TokenKind,
4770        expected_text: Option<&str>,
4771    ) {
4772        let token = lexer.next_lexed_token().unwrap();
4773        assert_eq!(token.kind, expected_kind);
4774        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4775    }
4776
4777    fn assert_next_token_with_comments(
4778        lexer: &mut Lexer<'_>,
4779        expected_kind: TokenKind,
4780        expected_text: Option<&str>,
4781    ) {
4782        let token = lexer.next_lexed_token_with_comments().unwrap();
4783        assert_eq!(token.kind, expected_kind);
4784        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4785    }
4786
4787    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4788        let mut lexer = Lexer::new(input);
4789
4790        while let Some(token) = lexer.next_lexed_token() {
4791            if token.kind == TokenKind::Newline {
4792                continue;
4793            }
4794
4795            assert_eq!(
4796                token.span.start.line, token.span.end.line,
4797                "token should stay on one line: {:?}",
4798                token
4799            );
4800        }
4801    }
4802
4803    #[test]
4804    fn test_simple_words() {
4805        let mut lexer = Lexer::new("echo hello world");
4806
4807        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4808        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4809        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4810        assert!(lexer.next_lexed_token().is_none());
4811    }
4812
4813    #[test]
4814    fn test_single_quoted_string() {
4815        let mut lexer = Lexer::new("echo 'hello world'");
4816
4817        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4818        // Single-quoted strings return LiteralWord (no variable expansion)
4819        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4820        assert!(lexer.next_lexed_token().is_none());
4821    }
4822
4823    #[test]
4824    fn test_double_quoted_string() {
4825        let mut lexer = Lexer::new("echo \"hello world\"");
4826
4827        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4828        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4829        assert!(lexer.next_lexed_token().is_none());
4830    }
4831
4832    #[test]
4833    fn test_double_quoted_expansion_token_keeps_source_backing() {
4834        let source = r#""$bar""#;
4835        let mut lexer = Lexer::new(source);
4836
4837        let token = lexer.next_lexed_token().unwrap();
4838        assert_eq!(token.kind, TokenKind::QuotedWord);
4839        assert_eq!(token.word_text(), Some("$bar"));
4840
4841        let word = token.word().unwrap();
4842        let segment = word.single_segment().unwrap();
4843        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4844        assert_eq!(segment.span().unwrap().slice(source), "$bar");
4845    }
4846
4847    #[test]
4848    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4849        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4850        let mut lexer = Lexer::new(source);
4851
4852        let token = lexer.next_lexed_token().unwrap();
4853        assert_eq!(token.kind, TokenKind::QuotedWord);
4854        assert_eq!(
4855            token.word_text(),
4856            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4857        );
4858    }
4859
4860    #[test]
4861    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4862        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4863        let mut lexer = Lexer::new(source);
4864
4865        let token = lexer.next_lexed_token().unwrap();
4866        assert_eq!(token.kind, TokenKind::QuotedWord);
4867        assert_eq!(
4868            token.word_text(),
4869            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4870        );
4871    }
4872
4873    #[test]
4874    fn test_mixed_word_keeps_segment_kinds() {
4875        let source = r#"foo"bar"'baz'"#;
4876        let mut lexer = Lexer::new(source);
4877
4878        let token = lexer.next_lexed_token().unwrap();
4879        assert_eq!(token.kind, TokenKind::Word);
4880
4881        let word = token.word().unwrap();
4882        let segments: Vec<_> = word
4883            .segments()
4884            .map(|segment| (segment.kind(), segment.as_str().to_string()))
4885            .collect();
4886
4887        assert_eq!(
4888            segments,
4889            vec![
4890                (LexedWordSegmentKind::Plain, "foo".to_string()),
4891                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4892                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4893            ]
4894        );
4895        assert_eq!(word.joined_text(), "foobarbaz");
4896        assert_eq!(
4897            word.segments()
4898                .next()
4899                .and_then(LexedWordSegment::span)
4900                .unwrap()
4901                .slice(source),
4902            "foo"
4903        );
4904    }
4905
4906    #[test]
4907    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
4908        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
4909
4910        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4911        let body = &source[..consumed];
4912
4913        assert!(body.contains("field, direction"));
4914        assert!(body.ends_with(')'));
4915    }
4916
4917    #[test]
4918    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
4919        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
4920
4921        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4922        let body = &source[..consumed];
4923
4924        assert!(body.contains("printf '%s' y"));
4925        assert!(body.ends_with(')'));
4926    }
4927
4928    #[test]
4929    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
4930        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
4931
4932        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4933        let body = &source[..consumed];
4934
4935        assert!(body.contains("printf %s 1,2"));
4936        assert!(body.ends_with(')'));
4937    }
4938
4939    #[test]
4940    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
4941        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
4942
4943        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4944        let body = &source[..consumed];
4945
4946        assert!(body.contains("field, direction"));
4947        assert!(body.ends_with(')'));
4948    }
4949
4950    #[test]
4951    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
4952        let source = "printf %s ${x//foo/)},1)\"";
4953
4954        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4955        let body = &source[..consumed];
4956
4957        assert!(body.contains("${x//foo/)},1"));
4958        assert!(body.ends_with(')'));
4959    }
4960
4961    #[test]
4962    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
4963        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
4964
4965        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4966        let body = &source[..consumed];
4967
4968        assert!(body.contains("printf %s 1,2"));
4969        assert!(body.ends_with(')'));
4970    }
4971
4972    #[test]
4973    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
4974        let source = "[[ \"$buf\" == (#b)(*) ]]";
4975        let index = source.find('#').expect("expected hash");
4976
4977        assert!(!hash_starts_comment(source, index));
4978    }
4979
4980    #[test]
4981    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
4982        let source = "(#comment with )";
4983        let index = source.find('#').expect("expected hash");
4984
4985        assert!(hash_starts_comment(source, index));
4986    }
4987
4988    #[test]
4989    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
4990        let source = "(( #c < 256 ))";
4991        let index = source.find('#').expect("expected hash");
4992
4993        assert!(!hash_starts_comment(source, index));
4994    }
4995
4996    #[test]
4997    fn test_hash_starts_comment_respects_quoted_double_parens() {
4998        let source = "printf '((' # comment";
4999        let index = source.find('#').expect("expected hash");
5000
5001        assert!(hash_starts_comment(source, index));
5002    }
5003
5004    #[test]
5005    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5006        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5007
5008        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5009        let body = &source[..consumed];
5010
5011        assert!(body.contains("printf %s 1,2"));
5012        assert!(body.ends_with(')'));
5013    }
5014
5015    #[test]
5016    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5017        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5018
5019        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5020        let body = &source[..consumed];
5021
5022        assert!(body.contains("printf %s 1,2"));
5023        assert!(body.ends_with(')'));
5024    }
5025
5026    #[test]
5027    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5028        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5029
5030        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5031        let body = &source[..consumed];
5032
5033        assert!(body.contains("printf %s 1,2"));
5034        assert!(body.ends_with(')'));
5035    }
5036
5037    #[test]
5038    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5039        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5040
5041        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5042        let body = &source[..consumed];
5043
5044        assert!(body.contains("printf %s 1,2"));
5045        assert!(body.ends_with("))"));
5046    }
5047
5048    #[test]
5049    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5050        let source = "printf %s 1,2; echo case in)\"";
5051
5052        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5053        let body = &source[..consumed];
5054
5055        assert!(body.contains("echo case in"));
5056        assert!(body.ends_with(')'));
5057    }
5058
5059    #[test]
5060    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5061        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5062
5063        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5064        let body = &source[..consumed];
5065
5066        assert!(body.contains("$'a\\'b'"));
5067        assert!(body.contains("printf %s 1,2"));
5068        assert!(body.ends_with(')'));
5069    }
5070
5071    #[test]
5072    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5073        let source = "printf %s `echo foo)`; printf %s ok)\"";
5074
5075        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5076        let body = &source[..consumed];
5077
5078        assert!(body.contains("`echo foo)`"));
5079        assert!(body.contains("printf %s ok"));
5080        assert!(body.ends_with(')'));
5081    }
5082
5083    #[test]
5084    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5085        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5086
5087        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5088        let body = &source[..consumed];
5089
5090        assert!(body.contains("${x/`echo }`/foo)},1"));
5091        assert!(body.ends_with(')'));
5092    }
5093
5094    #[test]
5095    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5096     {
5097        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5098
5099        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5100        let body = &source[..consumed];
5101
5102        assert!(body.contains("${x/<(echo })/foo)},1"));
5103        assert!(body.ends_with(')'));
5104    }
5105
5106    #[test]
5107    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5108        let source = "printf %s 1,2; echo case in)";
5109
5110        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5111        let body = &source[..consumed];
5112
5113        assert_eq!(body, source);
5114    }
5115
5116    #[test]
5117    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5118        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5119
5120        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5121        let body = &source[..consumed];
5122
5123        assert_eq!(body, source);
5124    }
5125
5126    #[test]
5127    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5128        let source = "printf %s `echo foo)`; printf %s ok)";
5129
5130        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5131        let body = &source[..consumed];
5132
5133        assert_eq!(body, source);
5134    }
5135
5136    #[test]
5137    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5138        let source = "echo \"$line\" | cut -d' ' -f2-)";
5139
5140        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5141        let body = &source[..consumed];
5142
5143        assert_eq!(body, source);
5144    }
5145
5146    #[test]
5147    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5148        let source = "echo \"${@}\" | tr -d '[:space:]')";
5149
5150        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5151        let body = &source[..consumed];
5152
5153        assert_eq!(body, source);
5154    }
5155
5156    #[test]
5157    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5158        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5159
5160        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5161        let body = &source[..consumed];
5162
5163        assert_eq!(body, source);
5164    }
5165
5166    #[test]
5167    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5168        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5169
5170        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5171        let body = &source[..consumed];
5172
5173        assert_eq!(body, source);
5174    }
5175
5176    #[test]
5177    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5178        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5179        let mut lexer = Lexer::new(source);
5180
5181        let first = lexer.next_lexed_token().expect("expected first token");
5182        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5183        assert_eq!(first.word_string().as_deref(), Some("echo"));
5184
5185        let second = lexer.next_lexed_token().expect("expected second token");
5186        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5187        assert_eq!(
5188            second.word_string().as_deref(),
5189            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5190        );
5191    }
5192
5193    #[test]
5194    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5195        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5196        let start = source.find("$(").expect("expected command substitution") + 2;
5197        let consumed =
5198            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5199        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5200    }
5201
5202    #[test]
5203    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5204        let source = "echo $(echo $(basename $filename .fuzz))";
5205        let start = source.find("$(").expect("expected command substitution") + 2;
5206        let consumed =
5207            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5208        assert_eq!(
5209            &source[start..start + consumed],
5210            "echo $(basename $filename .fuzz))"
5211        );
5212    }
5213
5214    #[test]
5215    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5216        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5217        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5218        assert_eq!(consumed, source.len());
5219    }
5220
5221    #[test]
5222    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5223        let source = "'foo'bar";
5224        let mut lexer = Lexer::new(source);
5225
5226        let token = lexer.next_lexed_token().unwrap();
5227        assert_eq!(token.kind, TokenKind::LiteralWord);
5228
5229        let word = token.word().unwrap();
5230        let segments: Vec<_> = word
5231            .segments()
5232            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5233            .collect();
5234
5235        assert_eq!(
5236            segments,
5237            vec![
5238                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5239                (LexedWordSegmentKind::Plain, "bar".to_string()),
5240            ]
5241        );
5242        assert_eq!(word.joined_text(), "foobar");
5243        assert_eq!(
5244            word.segments()
5245                .nth(1)
5246                .and_then(LexedWordSegment::span)
5247                .unwrap()
5248                .slice(source),
5249            "bar"
5250        );
5251    }
5252
5253    #[test]
5254    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5255        let source = "$(printf hi)";
5256        let mut lexer = Lexer::new(source);
5257
5258        let token = lexer.next_lexed_token().unwrap();
5259        assert_eq!(token.kind, TokenKind::Word);
5260
5261        let word = token.word().unwrap();
5262        let segment = word.single_segment().unwrap();
5263        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5264        assert_eq!(segment.as_str(), source);
5265        assert_eq!(segment.span().unwrap().slice(source), source);
5266    }
5267
5268    #[test]
5269    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5270        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5271        let mut lexer = Lexer::new(source);
5272
5273        let token = lexer.next_lexed_token().unwrap();
5274        assert_eq!(token.kind, TokenKind::Word);
5275
5276        let word = token.word().unwrap();
5277        let segment = word.single_segment().unwrap();
5278        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5279        assert_eq!(segment.as_str(), source);
5280        assert_eq!(segment.span().unwrap().slice(source), source);
5281    }
5282
5283    #[test]
5284    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5285        let source = "\"foo\"$(printf hi)";
5286        let mut lexer = Lexer::new(source);
5287
5288        let token = lexer.next_lexed_token().unwrap();
5289        assert_eq!(token.kind, TokenKind::Word);
5290
5291        let word = token.word().unwrap();
5292        let continuation = word.segments().nth(1).unwrap();
5293        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5294        assert_eq!(continuation.as_str(), "$(printf hi)");
5295        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5296    }
5297
5298    #[test]
5299    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5300        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5301        let mut lexer = Lexer::new(source);
5302
5303        let token = lexer.next_lexed_token().unwrap();
5304        assert_eq!(token.kind, TokenKind::QuotedWord);
5305
5306        let word = token.word().unwrap();
5307        let segment = word.single_segment().unwrap();
5308        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5309        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5310        assert_eq!(
5311            segment.span().unwrap().slice(source),
5312            "${arr[$RANDOM % ${#arr[@]}]}"
5313        );
5314    }
5315
5316    #[test]
5317    fn test_ansi_c_control_escape_can_consume_quote() {
5318        let mut lexer = Lexer::new("echo $'\\c''");
5319
5320        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5321        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5322        assert!(lexer.next_lexed_token().is_none());
5323    }
5324
5325    #[test]
5326    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5327        let source = r#"out_line="${out_line//'"'/'\"'}"
5328"#;
5329        let mut lexer = Lexer::new(source);
5330
5331        assert_next_token(
5332            &mut lexer,
5333            TokenKind::Word,
5334            Some(r#"out_line=${out_line//'"'/'"'}"#),
5335        );
5336        assert_next_token(&mut lexer, TokenKind::Newline, None);
5337        assert!(lexer.next_lexed_token().is_none());
5338    }
5339
5340    #[test]
5341    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5342        let source = r#"out_line="${out_line//'"'/'\"'}"
5343echo "Error: Missing python3!"
5344cat << 'EOF' > "${pywrapper}"
5345import os
5346EOF
5347"#;
5348        let mut lexer = Lexer::new(source);
5349
5350        assert_next_token(
5351            &mut lexer,
5352            TokenKind::Word,
5353            Some(r#"out_line=${out_line//'"'/'"'}"#),
5354        );
5355        assert_next_token(&mut lexer, TokenKind::Newline, None);
5356        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5357        assert_next_token(
5358            &mut lexer,
5359            TokenKind::QuotedWord,
5360            Some("Error: Missing python3!"),
5361        );
5362        assert_next_token(&mut lexer, TokenKind::Newline, None);
5363        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5364        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5365        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5366        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5367        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5368    }
5369
5370    #[test]
5371    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5372        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5373        let mut lexer = Lexer::new(source);
5374
5375        let token = lexer.next_lexed_token().unwrap();
5376        assert_eq!(token.kind, TokenKind::Word);
5377        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5378        assert!(token.source_slice(source).is_none());
5379        assert_eq!(
5380            token.word_string().as_deref(),
5381            Some("crypt=${crypt//\\/\\\\}")
5382        );
5383        assert_next_token(&mut lexer, TokenKind::Newline, None);
5384        assert!(lexer.next_lexed_token().is_none());
5385    }
5386
5387    #[test]
5388    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5389        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5390        let mut lexer = Lexer::new(source);
5391
5392        assert_next_token(
5393            &mut lexer,
5394            TokenKind::Word,
5395            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5396        );
5397        assert_next_token(&mut lexer, TokenKind::Newline, None);
5398        assert_next_token(&mut lexer, TokenKind::Newline, None);
5399        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5400        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5401        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5402        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5403        assert_next_token(&mut lexer, TokenKind::Newline, None);
5404        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5405        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5406        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5407        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5408        assert_next_token(&mut lexer, TokenKind::Newline, None);
5409        assert_next_token(
5410            &mut lexer,
5411            TokenKind::Word,
5412            Some(
5413                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5414            ),
5415        );
5416        assert_next_token(&mut lexer, TokenKind::Newline, None);
5417        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5418        assert_next_token(&mut lexer, TokenKind::Newline, None);
5419        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5420        assert_next_token(&mut lexer, TokenKind::Newline, None);
5421        assert!(lexer.next_lexed_token().is_none());
5422    }
5423
5424    #[test]
5425    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5426        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5427        let mut lexer = Lexer::new(source);
5428
5429        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5430        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5431        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5432        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5433        assert_next_token(&mut lexer, TokenKind::And, None);
5434        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5435        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5436        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5437        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5438        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5439        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5440        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5441        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5442        assert_next_token(&mut lexer, TokenKind::Newline, None);
5443        assert!(lexer.next_lexed_token().is_none());
5444    }
5445
5446    #[test]
5447    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5448        let source = "echo -{$(echo a),b}-\n";
5449        let mut lexer = Lexer::new(source);
5450
5451        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5452        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5453        assert_next_token(&mut lexer, TokenKind::Newline, None);
5454        assert!(lexer.next_lexed_token().is_none());
5455    }
5456
5457    #[test]
5458    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5459        let source = "echo -{$((1 + 2)),b}-\n";
5460        let mut lexer = Lexer::new(source);
5461
5462        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5463        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5464        assert_next_token(&mut lexer, TokenKind::Newline, None);
5465        assert!(lexer.next_lexed_token().is_none());
5466    }
5467
5468    #[test]
5469    fn test_operators() {
5470        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5471
5472        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5473        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5474        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5475        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5476        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5477        assert_next_token(&mut lexer, TokenKind::And, None);
5478        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5479        assert_next_token(&mut lexer, TokenKind::Or, None);
5480        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5481        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5482        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5483        assert_next_token(&mut lexer, TokenKind::Background, None);
5484        assert!(lexer.next_lexed_token().is_none());
5485    }
5486
5487    #[test]
5488    fn test_double_left_bracket_requires_separator() {
5489        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5490
5491        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5492        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5493        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5494        assert_next_token(&mut lexer, TokenKind::Newline, None);
5495        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5496        assert_next_token(&mut lexer, TokenKind::Newline, None);
5497        assert!(lexer.next_lexed_token().is_none());
5498    }
5499
5500    #[test]
5501    fn test_redirects() {
5502        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5503
5504        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5505        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5506        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5507        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5508        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5509        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5510        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5511        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5512        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5513        let token = lexer.next_lexed_token().unwrap();
5514        assert_eq!(token.kind, TokenKind::Clobber);
5515        assert_eq!(token.fd_value(), Some(2));
5516        assert_eq!(token_text(&token, lexer.input), None);
5517        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5518        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5519        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5520        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5521        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5522        assert_next_token(&mut lexer, TokenKind::HereString, None);
5523        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5524        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5525        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5526        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5527        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5528    }
5529
5530    #[test]
5531    fn test_comment() {
5532        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5533
5534        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5535        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5536        assert_next_token(&mut lexer, TokenKind::Newline, None);
5537        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5538        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5539    }
5540
5541    #[test]
5542    fn test_comment_token_with_span() {
5543        let mut lexer = Lexer::new("# lead\necho hi # tail");
5544
5545        let comment = lexer.next_lexed_token_with_comments().unwrap();
5546        assert_eq!(comment.kind, TokenKind::Comment);
5547        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5548        assert_eq!(comment.span.start.line, 1);
5549        assert_eq!(comment.span.start.column, 1);
5550        assert_eq!(comment.span.end.line, 1);
5551        assert_eq!(comment.span.end.column, 7);
5552
5553        assert_next_token(&mut lexer, TokenKind::Newline, None);
5554        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5555        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5556
5557        let inline = lexer.next_lexed_token_with_comments().unwrap();
5558        assert_eq!(inline.kind, TokenKind::Comment);
5559        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5560        assert_eq!(inline.span.start.line, 2);
5561        assert_eq!(inline.span.start.column, 9);
5562    }
5563
5564    #[test]
5565    fn test_comment_token_preserves_hash_boundaries() {
5566        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5567
5568        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5569        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5570        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5571        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5572        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5573        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5574        assert!(lexer.next_lexed_token_with_comments().is_none());
5575    }
5576
5577    #[test]
5578    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5579        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5580
5581        let mut saw_comment = false;
5582        while let Some(token) = lexer.next_lexed_token_with_comments() {
5583            if token.kind == TokenKind::Comment {
5584                saw_comment = true;
5585                break;
5586            }
5587        }
5588
5589        assert!(
5590            !saw_comment,
5591            "zsh inline glob controls inside [[ ]] should not lex as comments"
5592        );
5593    }
5594
5595    #[test]
5596    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5597        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5598
5599        let mut saw_comment = false;
5600        while let Some(token) = lexer.next_lexed_token_with_comments() {
5601            if token.kind == TokenKind::Comment {
5602                saw_comment = true;
5603                break;
5604            }
5605        }
5606
5607        assert!(
5608            !saw_comment,
5609            "zsh arithmetic char literals inside (( )) should not lex as comments"
5610        );
5611    }
5612
5613    #[test]
5614    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5615        let mut lexer = Lexer::new(
5616            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5617        );
5618
5619        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5620        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5621        assert_next_token(
5622            &mut lexer,
5623            TokenKind::LiteralWord,
5624            Some("\\e]133;C;cmdline_url=%s\\a"),
5625        );
5626        assert_next_token(
5627            &mut lexer,
5628            TokenKind::QuotedWord,
5629            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5630        );
5631        assert_next_token(&mut lexer, TokenKind::Newline, None);
5632    }
5633
5634    #[test]
5635    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5636        let mut lexer = Lexer::new(
5637            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5638        );
5639
5640        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5641        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5642        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5643        assert_next_token(&mut lexer, TokenKind::Newline, None);
5644        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5645        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5646        assert_next_token(
5647            &mut lexer,
5648            TokenKind::LiteralWord,
5649            Some("\\e]133;C;cmdline_url=%s\\a"),
5650        );
5651        assert_next_token(
5652            &mut lexer,
5653            TokenKind::QuotedWord,
5654            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5655        );
5656        assert_next_token(&mut lexer, TokenKind::Newline, None);
5657        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5658        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5659        assert_next_token(&mut lexer, TokenKind::Newline, None);
5660    }
5661
5662    #[test]
5663    fn test_variable_words() {
5664        let mut lexer = Lexer::new("echo $HOME $USER");
5665
5666        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5667        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5668        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5669        assert!(lexer.next_lexed_token().is_none());
5670    }
5671
5672    #[test]
5673    fn test_pipeline_tokens() {
5674        let mut lexer = Lexer::new("echo hello | cat");
5675
5676        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5677        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5678        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5679        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5680        assert!(lexer.next_lexed_token().is_none());
5681    }
5682
5683    #[test]
5684    fn test_read_heredoc() {
5685        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5686        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5687        let content = lexer.read_heredoc("EOF", false);
5688        assert_eq!(content.content, "hello\nworld\n");
5689    }
5690
5691    #[test]
5692    fn test_read_heredoc_single_line() {
5693        let mut lexer = Lexer::new("\ntest\nEOF");
5694        let content = lexer.read_heredoc("EOF", false);
5695        assert_eq!(content.content, "test\n");
5696    }
5697
5698    #[test]
5699    fn test_read_heredoc_full_scenario() {
5700        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5701        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5702
5703        // Parser would read these tokens
5704        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5705        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5706        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5707
5708        // Now read heredoc content
5709        let content = lexer.read_heredoc("EOF", false);
5710        assert_eq!(content.content, "hello\nworld\n");
5711    }
5712
5713    #[test]
5714    fn test_read_heredoc_with_redirect() {
5715        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5716        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5717        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5718        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5719        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5720        let content = lexer.read_heredoc("EOF", false);
5721        assert_eq!(content.content, "hello\n");
5722        // The redirect tokens are now available from the lexer
5723        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5724        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5725    }
5726
5727    #[test]
5728    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5729        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
5730        let mut lexer = Lexer::new(source);
5731
5732        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5733        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5734        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5735
5736        let heredoc = lexer.read_heredoc("EOF", false);
5737        assert_eq!(heredoc.content, "hello\n");
5738
5739        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5740        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5741        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5742        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5743        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5744        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5745        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5746    }
5747
5748    #[test]
5749    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5750        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5751        let mut lexer = Lexer::new(source);
5752
5753        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5754        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5755        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5756
5757        let heredoc = lexer.read_heredoc("EOF", false);
5758        assert_eq!(heredoc.content, "1\n2\n3\n");
5759    }
5760
5761    #[test]
5762    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5763        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5764        let mut lexer = Lexer::new(source);
5765
5766        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5767        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5768        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5769
5770        let heredoc = lexer.read_heredoc("EOF", false);
5771        assert_eq!(heredoc.content, "body\n");
5772    }
5773
5774    #[test]
5775    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5776        let source = "cat <<EOF # note \\\nbody\nEOF\n";
5777        let mut lexer = Lexer::new(source);
5778
5779        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5780        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5781        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5782
5783        let heredoc = lexer.read_heredoc("EOF", false);
5784        assert_eq!(heredoc.content, "body\n");
5785    }
5786
5787    #[test]
5788    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5789        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5790        let mut lexer = Lexer::new(source);
5791
5792        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5793        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5794        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5795        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5796
5797        let heredoc = lexer.read_heredoc("EOF", false);
5798        assert_eq!(heredoc.content, "body\n");
5799
5800        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5801    }
5802
5803    #[test]
5804    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5805        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5806        let mut lexer = Lexer::new(source);
5807
5808        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5809        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5810        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5811
5812        let heredoc = lexer.read_heredoc("EOF", false);
5813        assert_eq!(heredoc.content, "1\n");
5814
5815        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5816        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5817    }
5818
5819    #[test]
5820    fn test_read_heredoc_with_redirect_preserves_following_spans() {
5821        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5822        let mut lexer = Lexer::new(source);
5823
5824        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5825        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5826        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5827
5828        let heredoc = lexer.read_heredoc("EOF", false);
5829        assert_eq!(heredoc.content, "hello\n");
5830
5831        let redirect = lexer.next_lexed_token_with_comments().unwrap();
5832        assert_eq!(redirect.kind, TokenKind::RedirectOut);
5833        assert_eq!(redirect.span.slice(source), ">");
5834
5835        let target = lexer.next_lexed_token_with_comments().unwrap();
5836        assert_eq!(target.kind, TokenKind::Word);
5837        assert_eq!(
5838            token_text(&target, lexer.input).as_deref(),
5839            Some("file.txt")
5840        );
5841        assert_eq!(target.span.slice(source), "file.txt");
5842
5843        let newline = lexer.next_lexed_token_with_comments().unwrap();
5844        assert_eq!(newline.kind, TokenKind::Newline);
5845        assert_eq!(newline.span.slice(source), "\n");
5846
5847        let comment = lexer.next_lexed_token_with_comments().unwrap();
5848        assert_eq!(comment.kind, TokenKind::Comment);
5849        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5850        assert_eq!(comment.span.slice(source), "# done");
5851    }
5852
5853    #[test]
5854    fn test_comment_with_unicode() {
5855        // Comment containing multi-byte UTF-8 characters
5856        let source = "# café résumé\necho ok";
5857        let mut lexer = Lexer::new(source);
5858
5859        let comment = lexer.next_lexed_token_with_comments().unwrap();
5860        assert_eq!(comment.kind, TokenKind::Comment);
5861        assert_eq!(
5862            token_text(&comment, lexer.input).as_deref(),
5863            Some(" café résumé")
5864        );
5865        // Span should cover exactly the comment bytes (including #)
5866        let start = comment.span.start.offset;
5867        let end = comment.span.end.offset;
5868        assert_eq!(start, 0);
5869        assert_eq!(&source[start..end], "# café résumé");
5870        assert!(source.is_char_boundary(start));
5871        assert!(source.is_char_boundary(end));
5872
5873        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5874        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5875    }
5876
5877    #[test]
5878    fn test_comment_with_cjk_characters() {
5879        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
5880        let source = "# 你好世界\necho ok";
5881        let mut lexer = Lexer::new(source);
5882
5883        let comment = lexer.next_lexed_token_with_comments().unwrap();
5884        assert_eq!(comment.kind, TokenKind::Comment);
5885        assert_eq!(
5886            token_text(&comment, lexer.input).as_deref(),
5887            Some(" 你好世界")
5888        );
5889        let start = comment.span.start.offset;
5890        let end = comment.span.end.offset;
5891        assert_eq!(&source[start..end], "# 你好世界");
5892        assert!(source.is_char_boundary(start));
5893        assert!(source.is_char_boundary(end));
5894    }
5895
5896    #[test]
5897    fn test_heredoc_with_comments_inside() {
5898        // Comments inside heredoc body should NOT appear as comment tokens
5899        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
5900        let mut lexer = Lexer::new(source);
5901
5902        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5903        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5904        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5905
5906        let heredoc = lexer.read_heredoc("EOF", false);
5907        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
5908
5909        // After heredoc, replayed line termination should appear before
5910        // tokens from following source lines.
5911        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5912        let comment = lexer.next_lexed_token_with_comments().unwrap();
5913        assert_eq!(comment.kind, TokenKind::Comment);
5914        assert_eq!(
5915            token_text(&comment, lexer.input).as_deref(),
5916            Some(" real comment")
5917        );
5918    }
5919
5920    #[test]
5921    fn test_heredoc_with_hash_in_variable() {
5922        // ${var#pattern} inside heredoc should not produce comment tokens
5923        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
5924        let mut lexer = Lexer::new(source);
5925
5926        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5927        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5928        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5929
5930        let heredoc = lexer.read_heredoc("EOF", false);
5931        assert_eq!(heredoc.content, "val=${x#prefix}\n");
5932    }
5933
5934    #[test]
5935    fn test_heredoc_span_does_not_leak() {
5936        // Heredoc content span must be within source bounds and must not
5937        // overlap with content before or after.
5938        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
5939        let mut lexer = Lexer::new(source);
5940
5941        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5942        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5943        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5944
5945        let heredoc = lexer.read_heredoc("EOF", false);
5946        let start = heredoc.content_span.start.offset;
5947        let end = heredoc.content_span.end.offset;
5948        assert!(
5949            end <= source.len(),
5950            "heredoc span end ({end}) exceeds source length ({})",
5951            source.len()
5952        );
5953        assert_eq!(&source[start..end], "hello\nworld\n");
5954
5955        // Tokens after heredoc should still parse correctly
5956        assert_next_token(&mut lexer, TokenKind::Newline, None);
5957        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5958        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
5959    }
5960
5961    #[test]
5962    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
5963        let source = "\
5964cat <<\\_ACEOF
5965Use these variables to override the choices made by `configure' or to help
5966it to find libraries and programs with nonstandard names/locations.
5967_ACEOF
5968ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
5969ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
5970";
5971        let mut lexer = Lexer::new(source);
5972
5973        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5974        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5975        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
5976        assert_eq!(delimiter.kind, TokenKind::Word);
5977        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
5978
5979        let heredoc = lexer.read_heredoc("_ACEOF", false);
5980        assert_eq!(
5981            heredoc.content,
5982            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
5983        );
5984
5985        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5986
5987        let first = lexer.next_lexed_token_with_comments().unwrap();
5988        assert_eq!(first.kind, TokenKind::Word);
5989        assert_eq!(
5990            first.span.slice(source),
5991            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
5992        );
5993        let first_segments = first
5994            .word()
5995            .unwrap()
5996            .segments()
5997            .map(|segment| {
5998                (
5999                    segment.kind(),
6000                    segment.as_str().to_string(),
6001                    segment.span().map(|span| span.slice(source).to_string()),
6002                )
6003            })
6004            .collect::<Vec<_>>();
6005        assert_eq!(
6006            first_segments,
6007            vec![
6008                (
6009                    LexedWordSegmentKind::Plain,
6010                    "ac_dir_suffix=/".to_string(),
6011                    Some("ac_dir_suffix=/".to_string()),
6012                ),
6013                (
6014                    LexedWordSegmentKind::Plain,
6015                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6016                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6017                ),
6018            ]
6019        );
6020
6021        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6022
6023        let second = lexer.next_lexed_token_with_comments().unwrap();
6024        assert_eq!(second.kind, TokenKind::Word);
6025        assert_eq!(
6026            second.span.slice(source),
6027            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6028        );
6029        let second_segments = second
6030            .word()
6031            .unwrap()
6032            .segments()
6033            .map(|segment| {
6034                (
6035                    segment.kind(),
6036                    segment.as_str().to_string(),
6037                    segment.span().map(|span| span.slice(source).to_string()),
6038                )
6039            })
6040            .collect::<Vec<_>>();
6041        assert_eq!(
6042            second_segments,
6043            vec![
6044                (
6045                    LexedWordSegmentKind::Plain,
6046                    "ac_top_builddir_sub=".to_string(),
6047                    Some("ac_top_builddir_sub=".to_string()),
6048                ),
6049                (
6050                    LexedWordSegmentKind::Plain,
6051                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6052                    Some(
6053                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6054                            .to_string(),
6055                    ),
6056                ),
6057            ]
6058        );
6059    }
6060
6061    #[test]
6062    fn test_heredoc_with_unicode_content() {
6063        // Heredoc containing multi-byte characters; spans must be on char boundaries
6064        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6065        let mut lexer = Lexer::new(source);
6066
6067        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6068        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6069        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6070
6071        let heredoc = lexer.read_heredoc("EOF", false);
6072        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6073        let start = heredoc.content_span.start.offset;
6074        let end = heredoc.content_span.end.offset;
6075        assert!(
6076            source.is_char_boundary(start),
6077            "heredoc span start ({start}) not on char boundary"
6078        );
6079        assert!(
6080            source.is_char_boundary(end),
6081            "heredoc span end ({end}) not on char boundary"
6082        );
6083        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6084    }
6085
6086    #[test]
6087    fn test_assoc_compound_assignment() {
6088        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6089        // assignment as a single Word token
6090        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6091        assert_next_token(
6092            &mut lexer,
6093            TokenKind::Word,
6094            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6095        );
6096        assert!(lexer.next_lexed_token().is_none());
6097    }
6098
6099    #[test]
6100    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6101        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6102        let mut lexer = Lexer::new(source);
6103
6104        let token = lexer.next_lexed_token().unwrap();
6105        assert_eq!(token.kind, TokenKind::Word);
6106        assert_eq!(token.span.slice(source), source);
6107        assert!(lexer.next_lexed_token().is_none());
6108    }
6109
6110    #[test]
6111    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6112        let source = r#"foo\_bar@(baz|qux)"#;
6113        let mut lexer = Lexer::new(source);
6114
6115        let token = lexer.next_lexed_token().unwrap();
6116        assert_eq!(token.kind, TokenKind::Word);
6117        assert_eq!(token.span.slice(source), source);
6118        assert!(lexer.next_lexed_token().is_none());
6119    }
6120
6121    #[test]
6122    fn test_indexed_array_not_collapsed() {
6123        // arr=("hello world") should NOT be collapsed — parser handles
6124        // quoted elements token-by-token via the LeftParen path
6125        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6126        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6127        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6128    }
6129
6130    #[test]
6131    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6132        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6133        let mut lexer = Lexer::new(source);
6134
6135        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6136        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6137
6138        let token = lexer.next_lexed_token().unwrap();
6139        assert_eq!(token.kind, TokenKind::Word);
6140        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6141
6142        let word = token.word().unwrap();
6143        let segments: Vec<_> = word
6144            .segments()
6145            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6146            .collect();
6147        assert_eq!(
6148            segments,
6149            vec![
6150                (
6151                    LexedWordSegmentKind::DoubleQuoted,
6152                    "$plugin_dir".to_string()
6153                ),
6154                (LexedWordSegmentKind::Plain, "/*".to_string()),
6155                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6156            ]
6157        );
6158
6159        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6160        assert!(lexer.next_lexed_token().is_none());
6161    }
6162
6163    #[test]
6164    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6165        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6166        let mut lexer = Lexer::new(source);
6167
6168        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6169        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6170
6171        let token = lexer.next_lexed_token().unwrap();
6172        assert_eq!(token.kind, TokenKind::Word);
6173        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6174
6175        let word = token.word().unwrap();
6176        let segments: Vec<_> = word
6177            .segments()
6178            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6179            .collect();
6180        assert_eq!(
6181            segments,
6182            vec![
6183                (
6184                    LexedWordSegmentKind::DoubleQuoted,
6185                    "$__GREP_CACHE_FILE".to_string()
6186                ),
6187                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6188            ]
6189        );
6190
6191        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6192        assert!(lexer.next_lexed_token().is_none());
6193    }
6194
6195    #[test]
6196    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6197        let source = r#"$dir/${~pats}(N)"#;
6198        let mut lexer = Lexer::new(source);
6199
6200        let token = lexer.next_lexed_token().unwrap();
6201        assert_eq!(token.kind, TokenKind::Word);
6202        assert_eq!(token.span.slice(source), source);
6203        assert!(lexer.next_lexed_token().is_none());
6204    }
6205
6206    #[test]
6207    fn test_dollar_word_does_not_absorb_function_parens() {
6208        let mut lexer = Lexer::new(r#"foo$x()"#);
6209
6210        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6211        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6212        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6213        assert!(lexer.next_lexed_token().is_none());
6214    }
6215
6216    #[test]
6217    fn test_command_substitution_word_does_not_absorb_function_parens() {
6218        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6219
6220        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6221        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6222        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6223        assert!(lexer.next_lexed_token().is_none());
6224    }
6225
6226    /// Regression test for fuzz crash: single digit at EOF should not panic
6227    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6228    #[test]
6229    fn test_digit_at_eof_no_panic() {
6230        // A lone digit with no following redirect operator must not panic
6231        let mut lexer = Lexer::new("2");
6232        let token = lexer.next_lexed_token();
6233        assert!(token.is_some());
6234    }
6235
6236    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6237    #[test]
6238    fn test_nested_brace_expansion_single_token() {
6239        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6240        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6241        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6242        // No more tokens — everything was consumed
6243        assert!(lexer.next_lexed_token().is_none());
6244    }
6245
6246    /// Simple ${var} still works after brace depth change.
6247    #[test]
6248    fn test_simple_brace_expansion_unchanged() {
6249        let mut lexer = Lexer::new("${foo}");
6250        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6251        assert!(lexer.next_lexed_token().is_none());
6252    }
6253
6254    #[test]
6255    fn test_nvm_fixture_lexes_without_stalling() {
6256        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6257        let mut lexer = Lexer::new(input);
6258        let mut tokens = 0usize;
6259
6260        while lexer.next_lexed_token().is_some() {
6261            tokens += 1;
6262            assert!(
6263                tokens < 100_000,
6264                "lexer should continue making progress on the nvm fixture"
6265            );
6266        }
6267
6268        assert!(tokens > 0, "nvm fixture should produce at least one token");
6269    }
6270
6271    #[test]
6272    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6273        let input = concat!(
6274            "case \"${_input_type:-}\" in\n",
6275            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6276            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6277            "esac\n",
6278        );
6279
6280        assert_non_newline_tokens_stay_on_one_line(input);
6281
6282        let mut lexer = Lexer::new(input);
6283        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6284            .map(|token| (token.kind, token_text(&token, input)))
6285            .collect::<Vec<_>>();
6286        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6287        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6288    }
6289
6290    #[test]
6291    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6292        let input = concat!(
6293            "case $2 in\n",
6294            "  cygwin*) bin='cygwin32/bin' ;|\n",
6295            "esac\n",
6296        );
6297
6298        let mut lexer = Lexer::new(input);
6299        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6300            .map(|token| (token.kind, token_text(&token, input)))
6301            .collect::<Vec<_>>();
6302
6303        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6304        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6305        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6306    }
6307
6308    #[test]
6309    fn test_inline_if_with_array_append_stays_line_local() {
6310        let input = concat!(
6311            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6312            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6313        );
6314
6315        assert_non_newline_tokens_stay_on_one_line(input);
6316    }
6317
6318    #[test]
6319    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6320        let source = "unsetopt interactive_comments\n#literal\n";
6321        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6322        let mut lexer = Lexer::with_profile(source, &profile);
6323
6324        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6325        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6326        assert_next_token(&mut lexer, TokenKind::Newline, None);
6327        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6328    }
6329
6330    #[test]
6331    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6332        let source = "setopt rc_quotes\nprint 'a''b'\n";
6333        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6334        let mut lexer = Lexer::with_profile(source, &profile);
6335
6336        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6337        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6338        assert_next_token(&mut lexer, TokenKind::Newline, None);
6339        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6340        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6341    }
6342
6343    #[test]
6344    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6345        let source = "setopt ignore_braces\n{ echo }\n";
6346        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6347        let mut lexer = Lexer::with_profile(source, &profile);
6348
6349        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6350        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6351        assert_next_token(&mut lexer, TokenKind::Newline, None);
6352        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6353        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6354        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6355    }
6356
6357    #[test]
6358    fn test_heredoc_in_arithmetic_fuzz_crash() {
6359        // Regression test: the fuzzer found that heredoc re-injection inside
6360        // arithmetic context can push self.offset past self.input.len(),
6361        // causing a panic in read_unquoted_segment's borrowed-slice path.
6362        let data: &[u8] = &[
6363            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6364            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6365            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6366            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6367            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6368            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6369            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6370            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6371            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6372            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6373            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6374            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6375            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6376            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6377            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6378            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6379            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6380            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6381            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6382        ];
6383        let input = std::str::from_utf8(data).unwrap();
6384        let script = format!("echo $(({input}))\n");
6385        // Must not panic.
6386        let _ = crate::parser::Parser::new(&script).parse();
6387    }
6388}