shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335    /// Token kind used by the parser.
336    pub kind: TokenKind,
337    /// Source span covered by the token.
338    pub span: Span,
339    pub(crate) flags: TokenFlags,
340    payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345        match kind {
346            TokenKind::Word => LexedWordSegmentKind::Plain,
347            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349            _ => LexedWordSegmentKind::Composite,
350        }
351    }
352
353    pub(crate) fn punctuation(kind: TokenKind) -> Self {
354        Self {
355            kind,
356            span: Span::new(),
357            flags: TokenFlags::empty(),
358            payload: TokenPayload::None,
359        }
360    }
361
362    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363        let flags = if word.has_cooked_text() {
364            TokenFlags::cooked_text()
365        } else {
366            TokenFlags::empty()
367        };
368
369        Self {
370            kind,
371            span: Span::new(),
372            flags,
373            payload: TokenPayload::Word(word),
374        }
375    }
376
377    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378        Self::with_word_payload(
379            kind,
380            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381        )
382    }
383
384    fn owned_word(kind: TokenKind, text: String) -> Self {
385        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386    }
387
388    fn comment() -> Self {
389        Self {
390            kind: TokenKind::Comment,
391            span: Span::new(),
392            flags: TokenFlags::empty(),
393            payload: TokenPayload::None,
394        }
395    }
396
397    fn fd(kind: TokenKind, fd: i32) -> Self {
398        Self {
399            kind,
400            span: Span::new(),
401            flags: TokenFlags::empty(),
402            payload: TokenPayload::Fd(fd),
403        }
404    }
405
406    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407        Self {
408            kind,
409            span: Span::new(),
410            flags: TokenFlags::empty(),
411            payload: TokenPayload::FdPair(src_fd, dst_fd),
412        }
413    }
414
415    fn error(kind: LexerErrorKind) -> Self {
416        Self {
417            kind: TokenKind::Error,
418            span: Span::new(),
419            flags: TokenFlags::empty(),
420            payload: TokenPayload::Error(kind),
421        }
422    }
423
424    pub(crate) fn with_span(mut self, span: Span) -> Self {
425        self.span = span;
426        self
427    }
428
429    pub(crate) fn rebased(mut self, base: Position) -> Self {
430        self.span = self.span.rebased(base);
431        self.payload = match self.payload {
432            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433            payload => payload,
434        };
435        self
436    }
437
438    pub(crate) fn with_synthetic_flag(mut self) -> Self {
439        self.flags = self.flags.with_synthetic();
440        self
441    }
442
443    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444        let payload = match self.payload {
445            TokenPayload::None => TokenPayload::None,
446            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449            TokenPayload::Error(kind) => TokenPayload::Error(kind),
450        };
451
452        LexedToken {
453            kind: self.kind,
454            span: self.span,
455            flags: self.flags,
456            payload,
457        }
458    }
459
460    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461        let payload = match self.payload {
462            TokenPayload::None => TokenPayload::None,
463            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466            TokenPayload::Error(kind) => TokenPayload::Error(kind),
467        };
468
469        LexedToken {
470            kind: self.kind,
471            span: self.span,
472            flags: self.flags,
473            payload,
474        }
475    }
476
477    /// Borrow the token text when it is a single-segment word token.
478    pub fn word_text(&self) -> Option<&str> {
479        self.kind
480            .is_word_like()
481            .then_some(())
482            .and_then(|_| match &self.payload {
483                TokenPayload::Word(word) => word.text(),
484                _ => None,
485            })
486    }
487
488    /// Return an owned string containing the token's word text.
489    pub fn word_string(&self) -> Option<String> {
490        self.kind
491            .is_word_like()
492            .then_some(())
493            .and_then(|_| match &self.payload {
494                TokenPayload::Word(word) => Some(word.joined_text()),
495                _ => None,
496            })
497    }
498
499    /// Borrow the structured word payload for word-like tokens.
500    pub fn word(&self) -> Option<&LexedWord<'a>> {
501        match &self.payload {
502            TokenPayload::Word(word) => Some(word),
503            _ => None,
504        }
505    }
506
507    /// Borrow the original source slice when the token is source-backed and uncooked.
508    pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510            return None;
511        }
512
513        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514            .then(|| &source[self.span.start.offset..self.span.end.offset])
515    }
516
517    /// Return the file-descriptor payload for redirection tokens that carry one.
518    pub fn fd_value(&self) -> Option<i32> {
519        match self.payload {
520            TokenPayload::Fd(fd) => Some(fd),
521            _ => None,
522        }
523    }
524
525    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
526    pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527        match self.payload {
528            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529            _ => None,
530        }
531    }
532
533    /// Return the lexer error payload when this token represents `TokenKind::Error`.
534    pub fn error_kind(&self) -> Option<LexerErrorKind> {
535        match self.payload {
536            TokenPayload::Error(kind) => Some(kind),
537            _ => None,
538        }
539    }
540}
541
542/// Result of reading a heredoc body from the source.
543#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545    /// Decoded heredoc content.
546    pub content: String,
547    /// Source span covering the heredoc body content.
548    pub content_span: Span,
549}
550
551/// Maximum nesting depth for command substitution in the lexer.
552/// Prevents stack overflow from deeply nested $() patterns.
553const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557    rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561    fn new(source: &'a str) -> Self {
562        Self { rest: source }
563    }
564
565    fn first(&self) -> Option<char> {
566        self.rest.chars().next()
567    }
568
569    fn second(&self) -> Option<char> {
570        let mut chars = self.rest.chars();
571        chars.next()?;
572        chars.next()
573    }
574
575    fn third(&self) -> Option<char> {
576        let mut chars = self.rest.chars();
577        chars.next()?;
578        chars.next()?;
579        chars.next()
580    }
581
582    fn bump(&mut self) -> Option<char> {
583        let ch = self.first()?;
584        self.rest = &self.rest[ch.len_utf8()..];
585        Some(ch)
586    }
587
588    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589        let start = self.rest;
590        let mut end = 0;
591
592        for ch in start.chars() {
593            if !predicate(ch) {
594                break;
595            }
596            end += ch.len_utf8();
597        }
598
599        self.rest = &start[end..];
600        &start[..end]
601    }
602
603    fn rest(&self) -> &'a str {
604        self.rest
605    }
606
607    fn skip_bytes(&mut self, count: usize) {
608        self.rest = &self.rest[count..];
609    }
610
611    fn find_byte(&self, byte: u8) -> Option<usize> {
612        memchr(byte, self.rest.as_bytes())
613    }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618    source: &'a str,
619    line_starts: Vec<usize>,
620    cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626    pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630    fn new(source: &'a str) -> Self {
631        let mut line_starts =
632            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633        line_starts.push(0);
634        line_starts.extend(
635            source
636                .bytes()
637                .enumerate()
638                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639        );
640
641        Self {
642            source,
643            line_starts,
644            cached: Position::new(),
645        }
646    }
647
648    fn position(&mut self, offset: usize) -> Position {
649        if offset == self.cached.offset {
650            return self.cached;
651        }
652
653        let position = if offset > self.cached.offset && offset <= self.source.len() {
654            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655        } else {
656            self.position_uncached(offset)
657        };
658        self.cached = position;
659        position
660    }
661
662    fn position_uncached(&self, offset: usize) -> Position {
663        let offset = offset.min(self.source.len());
664        let line_index = self
665            .line_starts
666            .partition_point(|start| *start <= offset)
667            .saturating_sub(1);
668        let line_start = self.line_starts[line_index];
669        let line_text = &self.source[line_start..offset];
670        let column = if line_text.is_ascii() {
671            line_text.len() + 1
672        } else {
673            line_text.chars().count() + 1
674        };
675
676        Position {
677            line: line_index + 1,
678            column,
679            offset,
680        }
681    }
682
683    fn advance_from(mut position: Position, text: &str) -> Position {
684        position.offset += text.len();
685        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686        if newline_count == 0 {
687            position.column += if text.is_ascii() {
688                text.len()
689            } else {
690                text.chars().count()
691            };
692            return position;
693        }
694
695        position.line += newline_count;
696        let tail_start = memrchr(b'\n', text.as_bytes())
697            .map(|index| index + 1)
698            .unwrap_or_default();
699        let tail = &text[tail_start..];
700        position.column = if tail.is_ascii() {
701            tail.len() + 1
702        } else {
703            tail.chars().count() + 1
704        };
705        position
706    }
707}
708
709/// Lexer for bash scripts.
710#[derive(Clone)]
711pub struct Lexer<'a> {
712    #[allow(dead_code)] // Stored for error reporting in future
713    input: &'a str,
714    /// Current byte offset in the input/reinjected stream.
715    offset: usize,
716    cursor: Cursor<'a>,
717    position_map: PositionMap<'a>,
718    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
719    /// Consumed before `cursor`.
720    reinject_buf: VecDeque<char>,
721    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
722    reinject_resume_offset: Option<usize>,
723    /// Maximum allowed nesting depth for command substitution
724    max_subst_depth: usize,
725    initial_zsh_options: Option<ZshOptionState>,
726    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727    zsh_timeline_index: usize,
728    #[cfg(feature = "benchmarking")]
729    benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733    /// Create a new lexer for the given input.
734    pub fn new(input: &'a str) -> Self {
735        Self::with_max_subst_depth_and_profile(
736            input,
737            DEFAULT_MAX_SUBST_DEPTH,
738            &ShellProfile::native(super::ShellDialect::Bash),
739            None,
740        )
741    }
742
743    /// Create a new lexer with a custom max substitution nesting depth.
744    /// Limits recursion in read_command_subst_into().
745    pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746        Self::with_max_subst_depth_and_profile(
747            input,
748            max_depth,
749            &ShellProfile::native(super::ShellDialect::Bash),
750            None,
751        )
752    }
753
754    /// Create a new lexer using the provided shell profile.
755    pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757            .then(|| ZshOptionTimeline::build(input, shell_profile))
758            .flatten()
759            .map(Arc::new);
760        Self::with_max_subst_depth_and_profile(
761            input,
762            DEFAULT_MAX_SUBST_DEPTH,
763            shell_profile,
764            zsh_timeline,
765        )
766    }
767
768    pub(crate) fn with_max_subst_depth_and_profile(
769        input: &'a str,
770        max_depth: usize,
771        shell_profile: &ShellProfile,
772        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773    ) -> Self {
774        Self {
775            input,
776            offset: 0,
777            cursor: Cursor::new(input),
778            position_map: PositionMap::new(input),
779            reinject_buf: VecDeque::new(),
780            reinject_resume_offset: None,
781            max_subst_depth: max_depth,
782            initial_zsh_options: shell_profile.zsh_options().cloned(),
783            zsh_timeline,
784            zsh_timeline_index: 0,
785            #[cfg(feature = "benchmarking")]
786            benchmark_counters: None,
787        }
788    }
789
790    /// Get the current position in the input.
791    pub fn position(&self) -> Position {
792        self.position_map.position_uncached(self.offset)
793    }
794
795    fn current_position(&mut self) -> Position {
796        #[cfg(feature = "benchmarking")]
797        self.maybe_record_current_position_call();
798        self.position_map.position(self.offset)
799    }
800
801    #[cfg(feature = "benchmarking")]
802    pub(crate) fn enable_benchmark_counters(&mut self) {
803        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804    }
805
806    #[cfg(feature = "benchmarking")]
807    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808        self.benchmark_counters.unwrap_or_default()
809    }
810
811    #[cfg(feature = "benchmarking")]
812    fn maybe_record_current_position_call(&mut self) {
813        if let Some(counters) = &mut self.benchmark_counters {
814            counters.current_position_calls += 1;
815        }
816    }
817
818    fn sync_offset_to_cursor(&mut self) {
819        if self.reinject_buf.is_empty()
820            && let Some(offset) = self.reinject_resume_offset.take()
821        {
822            self.offset = offset;
823        }
824    }
825
826    /// Get the next token kind from the input without decoding or materializing
827    /// any payload text.
828    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829        self.next_lexed_token().map(|token| token.kind)
830    }
831
832    fn peek_char(&mut self) -> Option<char> {
833        self.sync_offset_to_cursor();
834        if let Some(&ch) = self.reinject_buf.front() {
835            Some(ch)
836        } else {
837            self.cursor.first()
838        }
839    }
840
841    fn advance(&mut self) -> Option<char> {
842        self.sync_offset_to_cursor();
843        let ch = if !self.reinject_buf.is_empty() {
844            self.reinject_buf.pop_front()
845        } else {
846            self.cursor.bump()
847        };
848        if let Some(c) = ch {
849            self.offset += c.len_utf8();
850        }
851        ch
852    }
853
854    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855        self.reinject_buf
856            .iter()
857            .copied()
858            .chain(self.cursor.rest().chars())
859    }
860
861    fn second_char(&self) -> Option<char> {
862        match self.reinject_buf.len() {
863            0 => self.cursor.second(),
864            1 => self.cursor.first(),
865            _ => self.reinject_buf.get(1).copied(),
866        }
867    }
868
869    fn third_char(&self) -> Option<char> {
870        match self.reinject_buf.len() {
871            0 => self.cursor.third(),
872            1 => self.cursor.second(),
873            2 => self.cursor.first(),
874            _ => self.reinject_buf.get(2).copied(),
875        }
876    }
877
878    fn fourth_char(&self) -> Option<char> {
879        match self.reinject_buf.len() {
880            0 => self.cursor.rest().chars().nth(3),
881            1 => self.cursor.third(),
882            2 => self.cursor.second(),
883            3 => self.cursor.first(),
884            _ => self.reinject_buf.get(3).copied(),
885        }
886    }
887
888    fn consume_source_bytes(&mut self, byte_len: usize) {
889        debug_assert!(self.reinject_buf.is_empty());
890        self.sync_offset_to_cursor();
891        self.offset += byte_len;
892        self.cursor.skip_bytes(byte_len);
893    }
894
895    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896        debug_assert!(self.reinject_buf.is_empty());
897        self.offset += byte_len;
898    }
899
900    fn consume_ascii_chars(&mut self, count: usize) {
901        if self.reinject_buf.is_empty() {
902            self.consume_source_bytes(count);
903            return;
904        }
905
906        for _ in 0..count {
907            self.advance();
908        }
909    }
910
911    fn source_horizontal_whitespace_len(&self) -> usize {
912        self.cursor
913            .rest()
914            .as_bytes()
915            .iter()
916            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917            .count()
918    }
919
920    fn source_ascii_plain_word_len(&self) -> usize {
921        self.cursor
922            .rest()
923            .as_bytes()
924            .iter()
925            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926            .count()
927    }
928
929    fn find_double_quote_special(source: &str) -> Option<usize> {
930        source
931            .as_bytes()
932            .iter()
933            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934    }
935
936    fn ensure_capture_from_source(
937        &self,
938        capture: &mut Option<String>,
939        start: Position,
940        end: Position,
941    ) {
942        if capture.is_none() {
943            *capture = Some(self.input[start.offset..end.offset].to_string());
944        }
945    }
946
947    fn push_capture_char(capture: &mut Option<String>, ch: char) {
948        if let Some(text) = capture.as_mut() {
949            text.push(ch);
950        }
951    }
952
953    fn push_capture_str(capture: &mut Option<String>, text: &str) {
954        if let Some(current) = capture.as_mut() {
955            current.push_str(text);
956        }
957    }
958
959    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960        if let Some(timeline) = self.zsh_timeline.as_ref() {
961            while self.zsh_timeline_index < timeline.entries.len()
962                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963            {
964                self.zsh_timeline_index += 1;
965            }
966            return if self.zsh_timeline_index == 0 {
967                self.initial_zsh_options.as_ref()
968            } else {
969                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970            };
971        }
972
973        self.initial_zsh_options.as_ref()
974    }
975
976    fn comments_enabled(&mut self) -> bool {
977        !self
978            .current_zsh_options()
979            .is_some_and(|options| options.interactive_comments.is_definitely_off())
980    }
981
982    fn rc_quotes_enabled(&mut self) -> bool {
983        self.current_zsh_options()
984            .is_some_and(|options| options.rc_quotes.is_definitely_on())
985    }
986
987    fn ignore_braces_enabled(&mut self) -> bool {
988        self.current_zsh_options()
989            .is_some_and(|options| options.ignore_braces.is_definitely_on())
990    }
991
992    fn ignore_close_braces_enabled(&mut self) -> bool {
993        self.current_zsh_options().is_some_and(|options| {
994            options.ignore_braces.is_definitely_on()
995                || options.ignore_close_braces.is_definitely_on()
996        })
997    }
998
999    fn should_treat_hash_as_word_char(&mut self) -> bool {
1000        if !self.comments_enabled() {
1001            return true;
1002        }
1003        self.reinject_buf.is_empty()
1004            && (self
1005                .input
1006                .get(..self.offset)
1007                .and_then(|prefix| prefix.chars().next_back())
1008                .is_some_and(|prev| {
1009                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010                })
1011                || self.is_inside_unclosed_double_paren_on_line())
1012    }
1013
1014    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015        capture
1016            .as_deref()
1017            .unwrap_or(&self.input[start.offset..self.offset])
1018    }
1019
1020    fn current_word_surface_is_single_char(
1021        &self,
1022        start: Position,
1023        capture: &Option<String>,
1024        target: char,
1025    ) -> bool {
1026        let text = self.current_word_text(start, capture);
1027        if !text.contains('\x00') {
1028            let mut encoded = [0; 4];
1029            return text == target.encode_utf8(&mut encoded);
1030        }
1031
1032        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034    }
1035
1036    fn current_word_surface_last_char<'b>(
1037        &'b self,
1038        start: Position,
1039        capture: &'b Option<String>,
1040    ) -> Option<char> {
1041        self.current_word_text(start, capture)
1042            .chars()
1043            .rev()
1044            .find(|&ch| ch != '\x00')
1045    }
1046
1047    fn current_word_surface_ends_with_char(
1048        &self,
1049        start: Position,
1050        capture: &Option<String>,
1051        target: char,
1052    ) -> bool {
1053        self.current_word_surface_last_char(start, capture) == Some(target)
1054    }
1055
1056    fn current_word_surface_ends_with_extglob_prefix(
1057        &self,
1058        start: Position,
1059        capture: &Option<String>,
1060    ) -> bool {
1061        self.current_word_surface_last_char(start, capture)
1062            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063    }
1064
1065    /// Get the next source-backed token from the input, skipping line comments.
1066    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067        self.skip_whitespace();
1068        let start = self.current_position();
1069        let token = self.next_lexed_token_inner(false)?;
1070        let end = self.current_position();
1071        Some(token.with_span(Span::from_positions(start, end)))
1072    }
1073
1074    /// Get the next source-backed token from the input, preserving line comments.
1075    pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076        self.skip_whitespace();
1077        let start = self.current_position();
1078        let token = self.next_lexed_token_inner(true)?;
1079        let end = self.current_position();
1080        Some(token.with_span(Span::from_positions(start, end)))
1081    }
1082
1083    /// Internal: get next token without recording position (called after whitespace skip)
1084    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085        let ch = self.peek_char()?;
1086
1087        match ch {
1088            '\n' => {
1089                self.consume_ascii_chars(1);
1090                Some(LexedToken::punctuation(TokenKind::Newline))
1091            }
1092            ';' => {
1093                if self.second_char() == Some(';') {
1094                    if self.third_char() == Some('&') {
1095                        self.consume_ascii_chars(3);
1096                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1097                    } else {
1098                        self.consume_ascii_chars(2);
1099                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1100                    }
1101                } else if self.second_char() == Some('|') {
1102                    self.consume_ascii_chars(2);
1103                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1104                } else if self.second_char() == Some('&') {
1105                    self.consume_ascii_chars(2);
1106                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1107                } else {
1108                    self.consume_ascii_chars(1);
1109                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1110                }
1111            }
1112            '|' => {
1113                if self.second_char() == Some('|') {
1114                    self.consume_ascii_chars(2);
1115                    Some(LexedToken::punctuation(TokenKind::Or))
1116                } else if self.second_char() == Some('&') {
1117                    self.consume_ascii_chars(2);
1118                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119                } else {
1120                    self.consume_ascii_chars(1);
1121                    Some(LexedToken::punctuation(TokenKind::Pipe))
1122                }
1123            }
1124            '&' => {
1125                if self.second_char() == Some('&') {
1126                    self.consume_ascii_chars(2);
1127                    Some(LexedToken::punctuation(TokenKind::And))
1128                } else if self.second_char() == Some('>') {
1129                    if self.third_char() == Some('>') {
1130                        self.consume_ascii_chars(3);
1131                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132                    } else {
1133                        self.consume_ascii_chars(2);
1134                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135                    }
1136                } else if self.second_char() == Some('|') {
1137                    self.consume_ascii_chars(2);
1138                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139                } else if self.second_char() == Some('!') {
1140                    self.consume_ascii_chars(2);
1141                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142                } else {
1143                    self.consume_ascii_chars(1);
1144                    Some(LexedToken::punctuation(TokenKind::Background))
1145                }
1146            }
1147            '>' => {
1148                if self.second_char() == Some('>') {
1149                    if self.third_char() == Some('|') {
1150                        self.consume_ascii_chars(3);
1151                    } else {
1152                        self.consume_ascii_chars(2);
1153                    }
1154                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155                } else if self.second_char() == Some('|') {
1156                    self.consume_ascii_chars(2);
1157                    Some(LexedToken::punctuation(TokenKind::Clobber))
1158                } else if self.second_char() == Some('(') {
1159                    self.consume_ascii_chars(2);
1160                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161                } else if self.second_char() == Some('&') {
1162                    self.consume_ascii_chars(2);
1163                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1164                } else {
1165                    self.consume_ascii_chars(1);
1166                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167                }
1168            }
1169            '<' => {
1170                if self.second_char() == Some('<') {
1171                    if self.third_char() == Some('<') {
1172                        self.consume_ascii_chars(3);
1173                        Some(LexedToken::punctuation(TokenKind::HereString))
1174                    } else if self.third_char() == Some('-') {
1175                        self.consume_ascii_chars(3);
1176                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177                    } else {
1178                        self.consume_ascii_chars(2);
1179                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1180                    }
1181                } else if self.second_char() == Some('>') {
1182                    self.consume_ascii_chars(2);
1183                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184                } else if self.second_char() == Some('(') {
1185                    self.consume_ascii_chars(2);
1186                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187                } else if self.second_char() == Some('&') {
1188                    self.consume_ascii_chars(2);
1189                    Some(LexedToken::punctuation(TokenKind::DupInput))
1190                } else {
1191                    self.consume_ascii_chars(1);
1192                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193                }
1194            }
1195            '(' => {
1196                if self.second_char() == Some('(') {
1197                    self.consume_ascii_chars(2);
1198                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199                } else {
1200                    self.consume_ascii_chars(1);
1201                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1202                }
1203            }
1204            ')' => {
1205                if self.second_char() == Some(')') {
1206                    self.consume_ascii_chars(2);
1207                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208                } else {
1209                    self.consume_ascii_chars(1);
1210                    Some(LexedToken::punctuation(TokenKind::RightParen))
1211                }
1212            }
1213            '{' => {
1214                if self.ignore_braces_enabled() {
1215                    let start = self.current_position();
1216                    self.consume_ascii_chars(1);
1217                    match self.peek_char() {
1218                        Some(' ') | Some('\t') | Some('\n') | None => {
1219                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220                        }
1221                        _ => self.read_word_starting_with("{", start),
1222                    }
1223                } else if self.looks_like_brace_expansion() {
1224                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1225                    // vs a brace group like { cmd; }
1226                    // Note: { must be followed by space/newline to be a brace group
1227                    self.read_brace_expansion_word()
1228                } else if self.is_brace_group_start() {
1229                    self.advance();
1230                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231                } else {
1232                    // {single} without comma/dot-dot is kept as literal word
1233                    self.read_brace_literal_word()
1234                }
1235            }
1236            '}' => {
1237                self.consume_ascii_chars(1);
1238                if self.ignore_close_braces_enabled() {
1239                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1240                } else {
1241                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1242                }
1243            }
1244            '[' => {
1245                let start = self.current_position();
1246                self.consume_ascii_chars(1);
1247                if self.peek_char() == Some('[')
1248                    && matches!(
1249                        self.second_char(),
1250                        Some(' ') | Some('\t') | Some('\n') | None
1251                    )
1252                {
1253                    self.consume_ascii_chars(1);
1254                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1255                } else {
1256                    // `[` can start the test command when followed by whitespace, or it can be
1257                    // ordinary word text such as a glob bracket expression.
1258                    //
1259                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1260                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1261                    // structural tokens mid-word.
1262                    match self.peek_char() {
1263                        Some(' ') | Some('\t') | Some('\n') | None => {
1264                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1265                        }
1266                        _ => self.read_word_starting_with("[", start),
1267                    }
1268                }
1269            }
1270            ']' => {
1271                if self.second_char() == Some(']') {
1272                    self.consume_ascii_chars(2);
1273                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1274                } else {
1275                    self.consume_ascii_chars(1);
1276                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1277                }
1278            }
1279            '\'' => self.read_single_quoted_string(),
1280            '"' => self.read_double_quoted_string(),
1281            '#' => {
1282                if self.should_treat_hash_as_word_char() {
1283                    let start = self.current_position();
1284                    return self.read_word_starting_with("#", start);
1285                }
1286                if preserve_comments {
1287                    self.read_comment();
1288                    Some(LexedToken::comment())
1289                } else {
1290                    self.skip_comment();
1291                    self.next_lexed_token_inner(false)
1292                }
1293            }
1294            // Handle file descriptor redirects like 2> or 2>&1
1295            '0'..='9' => self.read_word_or_fd_redirect(),
1296            _ => self.read_word(),
1297        }
1298    }
1299
1300    fn skip_whitespace(&mut self) {
1301        while let Some(ch) = self.peek_char() {
1302            if self.reinject_buf.is_empty() {
1303                let whitespace_len = self.source_horizontal_whitespace_len();
1304                if whitespace_len > 0 {
1305                    self.consume_source_bytes(whitespace_len);
1306                    continue;
1307                }
1308
1309                if self.cursor.rest().starts_with("\\\n") {
1310                    self.consume_source_bytes(2);
1311                    continue;
1312                }
1313            }
1314
1315            if ch == ' ' || ch == '\t' {
1316                self.consume_ascii_chars(1);
1317            } else if ch == '\\' {
1318                // Check for backslash-newline (line continuation) between tokens
1319                if self.second_char() == Some('\n') {
1320                    self.consume_ascii_chars(2);
1321                } else {
1322                    break;
1323                }
1324            } else {
1325                break;
1326            }
1327        }
1328    }
1329
1330    fn skip_comment(&mut self) {
1331        if self.reinject_buf.is_empty() {
1332            let end = self
1333                .cursor
1334                .find_byte(b'\n')
1335                .unwrap_or(self.cursor.rest().len());
1336            self.consume_source_bytes(end);
1337            return;
1338        }
1339
1340        while let Some(ch) = self.peek_char() {
1341            if ch == '\n' {
1342                break;
1343            }
1344            self.advance();
1345        }
1346    }
1347
1348    fn read_comment(&mut self) {
1349        debug_assert_eq!(self.peek_char(), Some('#'));
1350
1351        if self.reinject_buf.is_empty() {
1352            let rest = self.cursor.rest();
1353            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1354            self.consume_source_bytes(end);
1355            return;
1356        }
1357
1358        self.advance(); // consume '#'
1359
1360        while let Some(ch) = self.peek_char() {
1361            if ch == '\n' {
1362                break;
1363            }
1364            self.advance();
1365        }
1366    }
1367
1368    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1369        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1370            return false;
1371        }
1372
1373        let line_start = self.input[..self.offset]
1374            .rfind('\n')
1375            .map_or(0, |index| index + 1);
1376        let prefix = &self.input[line_start..self.offset];
1377        line_has_unclosed_double_paren(prefix)
1378    }
1379
1380    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1381    /// or just a regular word starting with a digit
1382    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1383        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1384            let Some(fd) = first_digit.to_digit(10) else {
1385                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1386            };
1387            let fd = fd as i32;
1388
1389            match (self.second_char(), self.third_char()) {
1390                (Some('>'), Some('>')) => {
1391                    if self.fourth_char() == Some('|') {
1392                        self.consume_ascii_chars(4);
1393                    } else {
1394                        self.consume_ascii_chars(3);
1395                    }
1396                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1397                }
1398                (Some('>'), Some('|')) => {
1399                    self.consume_ascii_chars(3);
1400                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1401                }
1402                (Some('>'), Some('&')) => {
1403                    self.consume_ascii_chars(3);
1404
1405                    let mut target_str = String::with_capacity(4);
1406                    while let Some(c) = self.peek_char() {
1407                        if c.is_ascii_digit() {
1408                            target_str.push(c);
1409                            self.advance();
1410                        } else {
1411                            break;
1412                        }
1413                    }
1414
1415                    if target_str.is_empty() {
1416                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1417                    }
1418
1419                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1420                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1421                }
1422                (Some('>'), _) => {
1423                    self.consume_ascii_chars(2);
1424                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1425                }
1426                (Some('<'), Some('&')) => {
1427                    self.consume_ascii_chars(3);
1428
1429                    let mut target_str = String::with_capacity(4);
1430                    while let Some(c) = self.peek_char() {
1431                        if c.is_ascii_digit() || c == '-' {
1432                            target_str.push(c);
1433                            self.advance();
1434                            if c == '-' {
1435                                break;
1436                            }
1437                        } else {
1438                            break;
1439                        }
1440                    }
1441
1442                    if target_str == "-" {
1443                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1444                    }
1445                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1446                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1447                }
1448                (Some('<'), Some('>')) => {
1449                    self.consume_ascii_chars(3);
1450                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1451                }
1452                (Some('<'), Some('<')) => {}
1453                (Some('<'), _) => {
1454                    self.consume_ascii_chars(2);
1455                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1456                }
1457                _ => {}
1458            }
1459        }
1460
1461        // Not a fd redirect pattern, read as regular word
1462        self.read_word()
1463    }
1464
1465    fn read_word_starting_with(
1466        &mut self,
1467        _prefix: &str,
1468        start: Position,
1469    ) -> Option<LexedToken<'a>> {
1470        let segment = match self.read_unquoted_segment(start) {
1471            Ok(segment) => segment,
1472            Err(kind) => return Some(LexedToken::error(kind)),
1473        };
1474        if segment.as_str().is_empty() {
1475            return None;
1476        }
1477        let mut lexed_word = LexedWord::from_segment(segment);
1478        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1479            return Some(LexedToken::error(kind));
1480        }
1481        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1482    }
1483
1484    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1485        let start = self.current_position();
1486
1487        if self.reinject_buf.is_empty() {
1488            let ascii_len = self.source_ascii_plain_word_len();
1489            let chunk = if ascii_len > 0
1490                && self
1491                    .cursor
1492                    .rest()
1493                    .as_bytes()
1494                    .get(ascii_len)
1495                    .is_none_or(|byte| byte.is_ascii())
1496            {
1497                self.consume_source_bytes(ascii_len);
1498                &self.input[start.offset..self.offset]
1499            } else {
1500                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1501                self.advance_scanned_source_bytes(chunk.len());
1502                chunk
1503            };
1504            if !chunk.is_empty() {
1505                let continues = matches!(
1506                    self.peek_char(),
1507                    Some(next)
1508                        if Self::is_word_char(next)
1509                            || next == '$'
1510                            || matches!(next, '\'' | '"')
1511                            || next == '{'
1512                            || (next == '('
1513                                && (chunk.ends_with('=')
1514                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1515                );
1516
1517                if !continues {
1518                    let end = self.current_position();
1519                    return Some(LexedToken::borrowed_word(
1520                        TokenKind::Word,
1521                        &self.input[start.offset..self.offset],
1522                        Some(Span::from_positions(start, end)),
1523                    ));
1524                }
1525
1526                if self.peek_char() == Some('(')
1527                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1528                {
1529                    return self.read_complex_word(start);
1530                }
1531
1532                let end = self.current_position();
1533                return self.finish_segmented_word(LexedWord::borrowed(
1534                    LexedWordSegmentKind::Plain,
1535                    &self.input[start.offset..self.offset],
1536                    Some(Span::from_positions(start, end)),
1537                ));
1538            }
1539        }
1540
1541        self.read_complex_word(start)
1542    }
1543
1544    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1545        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1546            return Some(LexedToken::error(kind));
1547        }
1548
1549        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1550    }
1551
1552    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1553        if self.peek_char() == Some('$') {
1554            match self.second_char() {
1555                Some('\'') => return self.read_dollar_single_quoted_string(),
1556                Some('"') => return self.read_dollar_double_quoted_string(),
1557                _ => {}
1558            }
1559        }
1560
1561        let segment = match self.read_unquoted_segment(start) {
1562            Ok(segment) => segment,
1563            Err(kind) => return Some(LexedToken::error(kind)),
1564        };
1565
1566        if segment.as_str().is_empty() {
1567            return None;
1568        }
1569
1570        self.finish_segmented_word(LexedWord::from_segment(segment))
1571    }
1572
1573    fn read_unquoted_segment(
1574        &mut self,
1575        start: Position,
1576    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1577        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1578        while let Some(ch) = self.peek_char() {
1579            if ch == '"' || ch == '\'' {
1580                break;
1581            } else if ch == '$' {
1582                if matches!(self.second_char(), Some('\'') | Some('"'))
1583                    && (self.current_position().offset > start.offset
1584                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1585                {
1586                    break;
1587                }
1588
1589                // Handle variable references and command substitution
1590                self.advance();
1591
1592                Self::push_capture_char(&mut word, ch); // push the '$'
1593
1594                // Check for $[ / $( / ${ forms before falling back to variable text.
1595                if self.peek_char() == Some('[') {
1596                    Self::push_capture_char(&mut word, '[');
1597                    self.advance();
1598                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1599                        return Err(LexerErrorKind::CommandSubstitution);
1600                    }
1601                } else if self.peek_char() == Some('(') {
1602                    if self.second_char() == Some('(') {
1603                        if !self.read_arithmetic_expansion_into(&mut word) {
1604                            return Err(LexerErrorKind::CommandSubstitution);
1605                        }
1606                    } else {
1607                        Self::push_capture_char(&mut word, '(');
1608                        self.advance();
1609                        if !self.read_command_subst_into(&mut word) {
1610                            return Err(LexerErrorKind::CommandSubstitution);
1611                        }
1612                    }
1613                } else if self.peek_char() == Some('{') {
1614                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1615                    // doesn't stop at the inner }.
1616                    Self::push_capture_char(&mut word, '{');
1617                    self.advance();
1618                    let _ = self.read_param_expansion_into(&mut word, start);
1619                } else {
1620                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1621                    if let Some(c) = self.peek_char() {
1622                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1623                            || c.is_ascii_digit()
1624                        {
1625                            Self::push_capture_char(&mut word, c);
1626                            self.advance();
1627                        } else {
1628                            // Read variable name (alphanumeric + _)
1629                            while let Some(c) = self.peek_char() {
1630                                if c.is_ascii_alphanumeric() || c == '_' {
1631                                    Self::push_capture_char(&mut word, c);
1632                                    self.advance();
1633                                } else {
1634                                    break;
1635                                }
1636                            }
1637                        }
1638                    }
1639                }
1640            } else if ch == '{' {
1641                if self.looks_like_mid_word_brace_segment() {
1642                    // Keep balanced {...} forms attached to the current word so
1643                    // plain literals like foo{bar} and brace expansions stay intact.
1644                    Self::push_capture_char(&mut word, ch);
1645                    self.advance();
1646                    let mut depth = 1;
1647                    while let Some(c) = self.peek_char() {
1648                        Self::push_capture_char(&mut word, c);
1649                        self.advance();
1650                        if c == '{' {
1651                            depth += 1;
1652                        } else if c == '}' {
1653                            depth -= 1;
1654                            if depth == 0 {
1655                                break;
1656                            }
1657                        }
1658                    }
1659                } else {
1660                    // Unmatched literal braces in regexes like ^{ should not swallow
1661                    // trailing delimiters such as ]] or then.
1662                    Self::push_capture_char(&mut word, ch);
1663                    self.advance();
1664                }
1665            } else if ch == '`' {
1666                // Preserve legacy backticks verbatim so the parser can keep the
1667                // original syntax form.
1668                let capture_end = self.current_position();
1669                self.ensure_capture_from_source(&mut word, start, capture_end);
1670                Self::push_capture_char(&mut word, ch);
1671                self.advance(); // consume opening `
1672                let mut closed = false;
1673                while let Some(c) = self.peek_char() {
1674                    Self::push_capture_char(&mut word, c);
1675                    self.advance();
1676                    if c == '`' {
1677                        closed = true;
1678                        break;
1679                    }
1680                    if c == '\\'
1681                        && let Some(next) = self.peek_char()
1682                    {
1683                        Self::push_capture_char(&mut word, next);
1684                        self.advance();
1685                    }
1686                }
1687                if !closed {
1688                    return Err(LexerErrorKind::BacktickSubstitution);
1689                }
1690            } else if ch == '\\' {
1691                let capture_end = self.current_position();
1692                self.ensure_capture_from_source(&mut word, start, capture_end);
1693                self.advance();
1694                if let Some(next) = self.peek_char() {
1695                    if next == '\n' {
1696                        // Line continuation: skip backslash + newline
1697                        self.advance();
1698                    } else {
1699                        // Escaped character: backslash quotes the next char
1700                        // (quote removal — only the literal char survives).
1701                        // Preserve source/decoded alignment with a sentinel so
1702                        // downstream word decoding keeps later spans anchored.
1703                        Self::push_capture_char(&mut word, '\x00');
1704                        Self::push_capture_char(&mut word, next);
1705                        self.advance();
1706                        if next == '{'
1707                            && self.current_word_surface_is_single_char(start, &word, '{')
1708                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1709                        {
1710                            let mut depth = 1;
1711                            while let Some(c) = self.peek_char() {
1712                                Self::push_capture_char(&mut word, c);
1713                                self.advance();
1714                                match c {
1715                                    '{' => depth += 1,
1716                                    '}' => {
1717                                        depth -= 1;
1718                                        if depth == 0 {
1719                                            break;
1720                                        }
1721                                    }
1722                                    _ => {}
1723                                }
1724                            }
1725                        }
1726                    }
1727                } else {
1728                    Self::push_capture_char(&mut word, '\\');
1729                }
1730            } else if ch == '('
1731                && self.current_word_surface_ends_with_char(start, &word, '=')
1732                && self.looks_like_assoc_assign()
1733            {
1734                // Associative compound assignment: var=([k]="v" ...) — keep entire
1735                // (...) as part of word so declare -A m=([k]="v") stays one token.
1736                Self::push_capture_char(&mut word, ch);
1737                self.advance();
1738                let mut depth = 1;
1739                while let Some(c) = self.peek_char() {
1740                    Self::push_capture_char(&mut word, c);
1741                    self.advance();
1742                    match c {
1743                        '(' => depth += 1,
1744                        ')' => {
1745                            depth -= 1;
1746                            if depth == 0 {
1747                                break;
1748                            }
1749                        }
1750                        '"' => {
1751                            while let Some(qc) = self.peek_char() {
1752                                Self::push_capture_char(&mut word, qc);
1753                                self.advance();
1754                                if qc == '"' {
1755                                    break;
1756                                }
1757                                if qc == '\\'
1758                                    && let Some(esc) = self.peek_char()
1759                                {
1760                                    Self::push_capture_char(&mut word, esc);
1761                                    self.advance();
1762                                }
1763                            }
1764                        }
1765                        '\'' => {
1766                            while let Some(qc) = self.peek_char() {
1767                                Self::push_capture_char(&mut word, qc);
1768                                self.advance();
1769                                if qc == '\'' {
1770                                    break;
1771                                }
1772                            }
1773                        }
1774                        '\\' => {
1775                            if let Some(esc) = self.peek_char() {
1776                                Self::push_capture_char(&mut word, esc);
1777                                self.advance();
1778                            }
1779                        }
1780                        _ => {}
1781                    }
1782                }
1783            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1784            {
1785                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1786                // Consume through matching ) including nested parens
1787                Self::push_capture_char(&mut word, ch);
1788                self.advance();
1789                let mut depth = 1;
1790                while let Some(c) = self.peek_char() {
1791                    Self::push_capture_char(&mut word, c);
1792                    self.advance();
1793                    match c {
1794                        '(' => depth += 1,
1795                        ')' => {
1796                            depth -= 1;
1797                            if depth == 0 {
1798                                break;
1799                            }
1800                        }
1801                        '\\' => {
1802                            if let Some(esc) = self.peek_char() {
1803                                Self::push_capture_char(&mut word, esc);
1804                                self.advance();
1805                            }
1806                        }
1807                        _ => {}
1808                    }
1809                }
1810            } else if Self::is_plain_word_char(ch) {
1811                if self.reinject_buf.is_empty() {
1812                    let ascii_len = self.source_ascii_plain_word_len();
1813                    let chunk = if ascii_len > 0
1814                        && self
1815                            .cursor
1816                            .rest()
1817                            .as_bytes()
1818                            .get(ascii_len)
1819                            .is_none_or(|byte| byte.is_ascii())
1820                    {
1821                        self.consume_source_bytes(ascii_len);
1822                        &self.input[self.offset - ascii_len..self.offset]
1823                    } else {
1824                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1825                        self.advance_scanned_source_bytes(chunk.len());
1826                        chunk
1827                    };
1828                    Self::push_capture_str(&mut word, chunk);
1829                } else {
1830                    Self::push_capture_char(&mut word, ch);
1831                    self.advance();
1832                }
1833            } else {
1834                break;
1835            }
1836        }
1837
1838        if let Some(word) = word {
1839            let span = Some(Span::from_positions(start, self.current_position()));
1840            Ok(LexedWordSegment::owned_with_spans(
1841                LexedWordSegmentKind::Plain,
1842                word,
1843                span,
1844                span,
1845            ))
1846        } else {
1847            let end = self.current_position();
1848            Ok(LexedWordSegment::borrowed(
1849                LexedWordSegmentKind::Plain,
1850                &self.input[start.offset..self.offset],
1851                Some(Span::from_positions(start, end)),
1852            ))
1853        }
1854    }
1855
1856    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1857        let segment = match self.read_single_quoted_segment() {
1858            Ok(segment) => segment,
1859            Err(kind) => return Some(LexedToken::error(kind)),
1860        };
1861        let mut word = LexedWord::from_segment(segment);
1862        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1863            return Some(LexedToken::error(kind));
1864        }
1865
1866        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1867    }
1868
1869    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1870        debug_assert_eq!(self.peek_char(), Some('\''));
1871
1872        let wrapper_start = self.current_position();
1873        self.consume_ascii_chars(1); // consume opening '
1874        let content_start = self.current_position();
1875        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1876        let mut content_end = content_start;
1877        let mut content = String::with_capacity(16);
1878        let mut closed = false;
1879
1880        if can_borrow {
1881            let rest = self.cursor.rest();
1882            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1883                self.consume_source_bytes(quote_index);
1884                content_end = self.current_position();
1885                self.consume_ascii_chars(1); // consume closing '
1886                closed = true;
1887            } else {
1888                self.consume_source_bytes(rest.len());
1889            }
1890        }
1891
1892        while let Some(ch) = self.peek_char() {
1893            if closed {
1894                break;
1895            }
1896            if ch == '\'' {
1897                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1898                    if !can_borrow {
1899                        content.push('\'');
1900                    }
1901                    self.advance();
1902                    self.advance();
1903                    continue;
1904                }
1905                content_end = self.current_position();
1906                self.consume_ascii_chars(1); // consume closing '
1907                closed = true;
1908                break;
1909            }
1910            if !can_borrow {
1911                content.push(ch);
1912            }
1913            self.advance();
1914        }
1915
1916        if !closed {
1917            return Err(LexerErrorKind::SingleQuote);
1918        }
1919
1920        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1921        let content_span = Some(Span::from_positions(content_start, content_end));
1922
1923        if can_borrow {
1924            Ok(LexedWordSegment::borrowed_with_spans(
1925                LexedWordSegmentKind::SingleQuoted,
1926                &self.input[content_start.offset..content_end.offset],
1927                content_span,
1928                wrapper_span,
1929            ))
1930        } else {
1931            Ok(LexedWordSegment::owned_with_spans(
1932                LexedWordSegmentKind::SingleQuoted,
1933                content,
1934                content_span,
1935                wrapper_span,
1936            ))
1937        }
1938    }
1939
1940    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1941        let segment = match self.read_dollar_single_quoted_segment() {
1942            Ok(segment) => segment,
1943            Err(kind) => return Some(LexedToken::error(kind)),
1944        };
1945        let mut word = LexedWord::from_segment(segment);
1946        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1947            return Some(LexedToken::error(kind));
1948        }
1949
1950        let kind = if word.single_segment().is_some() {
1951            TokenKind::LiteralWord
1952        } else {
1953            TokenKind::Word
1954        };
1955
1956        Some(LexedToken::with_word_payload(kind, word))
1957    }
1958
1959    fn read_dollar_single_quoted_segment(
1960        &mut self,
1961    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1962        debug_assert_eq!(self.peek_char(), Some('$'));
1963        debug_assert_eq!(self.second_char(), Some('\''));
1964
1965        let wrapper_start = self.current_position();
1966        self.consume_ascii_chars(2); // consume $'
1967        let content_start = self.current_position();
1968        let mut out = String::with_capacity(16);
1969
1970        while let Some(ch) = self.peek_char() {
1971            if ch == '\'' {
1972                let content_end = self.current_position();
1973                self.advance();
1974                let wrapper_span =
1975                    Some(Span::from_positions(wrapper_start, self.current_position()));
1976                let content_span = Some(Span::from_positions(content_start, content_end));
1977                return Ok(LexedWordSegment::owned_with_spans(
1978                    LexedWordSegmentKind::DollarSingleQuoted,
1979                    out,
1980                    content_span,
1981                    wrapper_span,
1982                ));
1983            }
1984
1985            if ch == '\\' {
1986                self.advance();
1987                if let Some(esc) = self.peek_char() {
1988                    self.advance();
1989                    match esc {
1990                        'n' => out.push('\n'),
1991                        't' => out.push('\t'),
1992                        'r' => out.push('\r'),
1993                        'a' => out.push('\x07'),
1994                        'b' => out.push('\x08'),
1995                        'f' => out.push('\x0C'),
1996                        'v' => out.push('\x0B'),
1997                        'e' | 'E' => out.push('\x1B'),
1998                        '\\' => out.push('\\'),
1999                        '\'' => out.push('\''),
2000                        '"' => out.push('"'),
2001                        '?' => out.push('?'),
2002                        'c' => {
2003                            if let Some(control) = self.peek_char() {
2004                                self.advance();
2005                                out.push(((control as u32 & 0x1F) as u8) as char);
2006                            } else {
2007                                out.push('\\');
2008                                out.push('c');
2009                            }
2010                        }
2011                        'x' => {
2012                            let mut hex = String::new();
2013                            for _ in 0..2 {
2014                                if let Some(h) = self.peek_char() {
2015                                    if h.is_ascii_hexdigit() {
2016                                        hex.push(h);
2017                                        self.advance();
2018                                    } else {
2019                                        break;
2020                                    }
2021                                }
2022                            }
2023                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2024                                out.push(val as char);
2025                            }
2026                        }
2027                        'u' => {
2028                            let mut hex = String::new();
2029                            for _ in 0..4 {
2030                                if let Some(h) = self.peek_char() {
2031                                    if h.is_ascii_hexdigit() {
2032                                        hex.push(h);
2033                                        self.advance();
2034                                    } else {
2035                                        break;
2036                                    }
2037                                }
2038                            }
2039                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2040                                && let Some(c) = char::from_u32(val)
2041                            {
2042                                out.push(c);
2043                            }
2044                        }
2045                        'U' => {
2046                            let mut hex = String::new();
2047                            for _ in 0..8 {
2048                                if let Some(h) = self.peek_char() {
2049                                    if h.is_ascii_hexdigit() {
2050                                        hex.push(h);
2051                                        self.advance();
2052                                    } else {
2053                                        break;
2054                                    }
2055                                }
2056                            }
2057                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2058                                && let Some(c) = char::from_u32(val)
2059                            {
2060                                out.push(c);
2061                            }
2062                        }
2063                        '0'..='7' => {
2064                            let mut oct = String::new();
2065                            oct.push(esc);
2066                            for _ in 0..2 {
2067                                if let Some(o) = self.peek_char() {
2068                                    if o.is_ascii_digit() && o < '8' {
2069                                        oct.push(o);
2070                                        self.advance();
2071                                    } else {
2072                                        break;
2073                                    }
2074                                }
2075                            }
2076                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2077                                out.push(val as char);
2078                            }
2079                        }
2080                        _ => {
2081                            out.push('\\');
2082                            out.push(esc);
2083                        }
2084                    }
2085                } else {
2086                    out.push('\\');
2087                }
2088                continue;
2089            }
2090
2091            out.push(ch);
2092            self.advance();
2093        }
2094
2095        Err(LexerErrorKind::SingleQuote)
2096    }
2097
2098    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2099        let start = self.current_position();
2100
2101        if self.reinject_buf.is_empty() {
2102            let ascii_len = self.source_ascii_plain_word_len();
2103            let chunk = if ascii_len > 0
2104                && self
2105                    .cursor
2106                    .rest()
2107                    .as_bytes()
2108                    .get(ascii_len)
2109                    .is_none_or(|byte| byte.is_ascii())
2110            {
2111                self.consume_source_bytes(ascii_len);
2112                &self.input[start.offset..self.offset]
2113            } else {
2114                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2115                self.advance_scanned_source_bytes(chunk.len());
2116                chunk
2117            };
2118            if chunk.is_empty() {
2119                return None;
2120            }
2121
2122            let end = self.current_position();
2123            return Some(LexedWordSegment::borrowed(
2124                LexedWordSegmentKind::Plain,
2125                &self.input[start.offset..self.offset],
2126                Some(Span::from_positions(start, end)),
2127            ));
2128        }
2129
2130        let ch = self.peek_char()?;
2131        if !Self::is_plain_word_char(ch) {
2132            return None;
2133        }
2134
2135        let mut text = String::with_capacity(16);
2136        while let Some(ch) = self.peek_char() {
2137            if !Self::is_plain_word_char(ch) {
2138                break;
2139            }
2140            text.push(ch);
2141            self.advance();
2142        }
2143
2144        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2145    }
2146
2147    /// After a closing quote, read any adjacent quoted or unquoted word chars
2148    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2149    fn append_segmented_continuation(
2150        &mut self,
2151        word: &mut LexedWord<'a>,
2152    ) -> Result<(), LexerErrorKind> {
2153        loop {
2154            match self.peek_char() {
2155                Some('\'') => {
2156                    word.push_segment(self.read_single_quoted_segment()?);
2157                }
2158                Some('"') => {
2159                    word.push_segment(self.read_double_quoted_segment()?);
2160                }
2161                Some('$') if self.second_char() == Some('\'') => {
2162                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2163                }
2164                Some('$') if self.second_char() == Some('"') => {
2165                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2166                }
2167                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2168                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2169                        unreachable!("peeked '(' should produce a suffix segment");
2170                    };
2171                    word.push_segment(segment);
2172                }
2173                _ => {
2174                    if let Some(segment) = self.read_plain_continuation_segment() {
2175                        word.push_segment(segment);
2176                        continue;
2177                    }
2178
2179                    let start = self.current_position();
2180                    let plain = self.read_unquoted_segment(start)?;
2181                    if plain.as_str().is_empty() {
2182                        break;
2183                    }
2184                    word.push_segment(plain);
2185                }
2186            }
2187        }
2188
2189        Ok(())
2190    }
2191
2192    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2193        debug_assert_eq!(self.peek_char(), Some('('));
2194
2195        let start = self.current_position();
2196        let mut depth = 0usize;
2197        let mut escaped = false;
2198        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2199
2200        while let Some(ch) = self.peek_char() {
2201            if let Some(text) = text.as_mut() {
2202                text.push(ch);
2203            }
2204            self.advance();
2205
2206            if escaped {
2207                escaped = false;
2208                continue;
2209            }
2210
2211            match ch {
2212                '\\' => escaped = true,
2213                '(' => depth += 1,
2214                ')' => {
2215                    depth = depth.saturating_sub(1);
2216                    if depth == 0 {
2217                        break;
2218                    }
2219                }
2220                _ => {}
2221            }
2222        }
2223
2224        let end = self.current_position();
2225        let span = Some(Span::from_positions(start, end));
2226        if let Some(text) = text {
2227            Some(LexedWordSegment::owned_with_spans(
2228                LexedWordSegmentKind::Plain,
2229                text,
2230                span,
2231                span,
2232            ))
2233        } else {
2234            Some(LexedWordSegment::borrowed_with_spans(
2235                LexedWordSegmentKind::Plain,
2236                &self.input[start.offset..end.offset],
2237                span,
2238                span,
2239            ))
2240        }
2241    }
2242
2243    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2244        self.read_double_quoted_word(false)
2245    }
2246
2247    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2248        self.read_double_quoted_word(true)
2249    }
2250
2251    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2252        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2253            Ok(segment) => segment,
2254            Err(kind) => return Some(LexedToken::error(kind)),
2255        };
2256        let mut word = LexedWord::from_segment(segment);
2257        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2258            return Some(LexedToken::error(kind));
2259        }
2260
2261        let kind = if word.single_segment().is_some() {
2262            TokenKind::QuotedWord
2263        } else {
2264            TokenKind::Word
2265        };
2266
2267        Some(LexedToken::with_word_payload(kind, word))
2268    }
2269
2270    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2271        self.read_double_quoted_segment_with_dollar(false)
2272    }
2273
2274    fn read_dollar_double_quoted_segment(
2275        &mut self,
2276    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2277        self.read_double_quoted_segment_with_dollar(true)
2278    }
2279
2280    fn read_double_quoted_segment_with_dollar(
2281        &mut self,
2282        dollar: bool,
2283    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2284        if dollar {
2285            debug_assert_eq!(self.peek_char(), Some('$'));
2286            debug_assert_eq!(self.second_char(), Some('"'));
2287        } else {
2288            debug_assert_eq!(self.peek_char(), Some('"'));
2289        }
2290
2291        let wrapper_start = self.current_position();
2292        if dollar {
2293            self.consume_ascii_chars(2); // consume $"
2294        } else {
2295            self.consume_ascii_chars(1); // consume opening "
2296        }
2297        let content_start = self.current_position();
2298        let mut content_end = content_start;
2299        let mut simple = self.reinject_buf.is_empty();
2300        let mut borrowable = self.reinject_buf.is_empty();
2301        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2302        let mut closed = false;
2303
2304        while let Some(ch) = self.peek_char() {
2305            if simple {
2306                if self.reinject_buf.is_empty() {
2307                    let rest = self.cursor.rest();
2308                    match Self::find_double_quote_special(rest) {
2309                        Some(index) if index > 0 => {
2310                            self.consume_source_bytes(index);
2311                            continue;
2312                        }
2313                        None => {
2314                            self.consume_source_bytes(rest.len());
2315                            return Err(LexerErrorKind::DoubleQuote);
2316                        }
2317                        _ => {}
2318                    }
2319                }
2320
2321                match ch {
2322                    '"' => {
2323                        content_end = self.current_position();
2324                        self.consume_ascii_chars(1); // consume closing "
2325                        closed = true;
2326                        break;
2327                    }
2328                    '\\' | '$' | '`' => {
2329                        simple = false;
2330                        if ch == '`' {
2331                            borrowable = false;
2332                            let capture_end = self.current_position();
2333                            self.ensure_capture_from_source(
2334                                &mut content,
2335                                content_start,
2336                                capture_end,
2337                            );
2338                        }
2339                    }
2340                    _ => {
2341                        self.advance();
2342                    }
2343                }
2344                if simple {
2345                    continue;
2346                }
2347            }
2348
2349            match ch {
2350                '"' => {
2351                    if borrowable {
2352                        content_end = self.current_position();
2353                    }
2354                    self.consume_ascii_chars(1); // consume closing "
2355                    closed = true;
2356                    break;
2357                }
2358                '\\' => {
2359                    let escape_start = self.current_position();
2360                    self.advance();
2361                    if let Some(next) = self.peek_char() {
2362                        match next {
2363                            '\n' => {
2364                                borrowable = false;
2365                                self.ensure_capture_from_source(
2366                                    &mut content,
2367                                    content_start,
2368                                    escape_start,
2369                                );
2370                                self.advance();
2371                            }
2372                            '$' => {
2373                                borrowable = false;
2374                                self.ensure_capture_from_source(
2375                                    &mut content,
2376                                    content_start,
2377                                    escape_start,
2378                                );
2379                                Self::push_capture_char(&mut content, '\x00');
2380                                Self::push_capture_char(&mut content, '$');
2381                                self.advance();
2382                            }
2383                            '"' | '\\' | '`' => {
2384                                borrowable = false;
2385                                self.ensure_capture_from_source(
2386                                    &mut content,
2387                                    content_start,
2388                                    escape_start,
2389                                );
2390                                if next == '\\' {
2391                                    Self::push_capture_char(&mut content, '\x00');
2392                                }
2393                                if next == '`' {
2394                                    Self::push_capture_char(&mut content, '\x00');
2395                                }
2396                                Self::push_capture_char(&mut content, next);
2397                                self.advance();
2398                                content_end = self.current_position();
2399                            }
2400                            _ => {
2401                                Self::push_capture_char(&mut content, '\\');
2402                                Self::push_capture_char(&mut content, next);
2403                                self.advance();
2404                                content_end = self.current_position();
2405                            }
2406                        }
2407                    }
2408                }
2409                '$' => {
2410                    Self::push_capture_char(&mut content, '$');
2411                    self.advance();
2412                    if self.peek_char() == Some('(') {
2413                        if self.second_char() == Some('(') {
2414                            self.read_arithmetic_expansion_into(&mut content);
2415                        } else {
2416                            Self::push_capture_char(&mut content, '(');
2417                            self.advance();
2418                            self.read_command_subst_into(&mut content);
2419                        }
2420                    } else if self.peek_char() == Some('{') {
2421                        Self::push_capture_char(&mut content, '{');
2422                        self.advance();
2423                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2424                    }
2425                    content_end = self.current_position();
2426                }
2427                '`' => {
2428                    borrowable = false;
2429                    let capture_end = self.current_position();
2430                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2431                    Self::push_capture_char(&mut content, '`');
2432                    self.advance(); // consume opening `
2433                    while let Some(c) = self.peek_char() {
2434                        Self::push_capture_char(&mut content, c);
2435                        self.advance();
2436                        if c == '`' {
2437                            break;
2438                        }
2439                        if c == '\\'
2440                            && let Some(next) = self.peek_char()
2441                        {
2442                            Self::push_capture_char(&mut content, next);
2443                            self.advance();
2444                        }
2445                    }
2446                    content_end = self.current_position();
2447                }
2448                _ => {
2449                    Self::push_capture_char(&mut content, ch);
2450                    self.advance();
2451                    content_end = self.current_position();
2452                }
2453            }
2454        }
2455
2456        if !closed {
2457            return Err(LexerErrorKind::DoubleQuote);
2458        }
2459
2460        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2461        let content_span = Some(Span::from_positions(content_start, content_end));
2462
2463        if borrowable {
2464            Ok(LexedWordSegment::borrowed_with_spans(
2465                if dollar {
2466                    LexedWordSegmentKind::DollarDoubleQuoted
2467                } else {
2468                    LexedWordSegmentKind::DoubleQuoted
2469                },
2470                &self.input[content_start.offset..content_end.offset],
2471                content_span,
2472                wrapper_span,
2473            ))
2474        } else {
2475            Ok(LexedWordSegment::owned_with_spans(
2476                if dollar {
2477                    LexedWordSegmentKind::DollarDoubleQuoted
2478                } else {
2479                    LexedWordSegmentKind::DoubleQuoted
2480                },
2481                content.unwrap_or_default(),
2482                content_span,
2483                wrapper_span,
2484            ))
2485        }
2486    }
2487
2488    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2489        debug_assert_eq!(self.peek_char(), Some('('));
2490        debug_assert_eq!(self.second_char(), Some('('));
2491
2492        Self::push_capture_char(content, '(');
2493        self.advance();
2494        Self::push_capture_char(content, '(');
2495        self.advance();
2496
2497        let mut depth = 2;
2498        while let Some(c) = self.peek_char() {
2499            match c {
2500                '\\' => {
2501                    Self::push_capture_char(content, c);
2502                    self.advance();
2503                    if let Some(next) = self.peek_char() {
2504                        Self::push_capture_char(content, next);
2505                        self.advance();
2506                    }
2507                }
2508                '\'' => {
2509                    Self::push_capture_char(content, c);
2510                    self.advance();
2511                    while let Some(quoted) = self.peek_char() {
2512                        Self::push_capture_char(content, quoted);
2513                        self.advance();
2514                        if quoted == '\'' {
2515                            break;
2516                        }
2517                    }
2518                }
2519                '"' => {
2520                    let mut escaped = false;
2521                    Self::push_capture_char(content, c);
2522                    self.advance();
2523                    while let Some(quoted) = self.peek_char() {
2524                        Self::push_capture_char(content, quoted);
2525                        self.advance();
2526                        if escaped {
2527                            escaped = false;
2528                            continue;
2529                        }
2530                        match quoted {
2531                            '\\' => escaped = true,
2532                            '"' => break,
2533                            _ => {}
2534                        }
2535                    }
2536                }
2537                '`' => {
2538                    let mut escaped = false;
2539                    Self::push_capture_char(content, c);
2540                    self.advance();
2541                    while let Some(quoted) = self.peek_char() {
2542                        Self::push_capture_char(content, quoted);
2543                        self.advance();
2544                        if escaped {
2545                            escaped = false;
2546                            continue;
2547                        }
2548                        match quoted {
2549                            '\\' => escaped = true,
2550                            '`' => break,
2551                            _ => {}
2552                        }
2553                    }
2554                }
2555                '(' => {
2556                    Self::push_capture_char(content, c);
2557                    self.advance();
2558                    depth += 1;
2559                }
2560                ')' => {
2561                    Self::push_capture_char(content, c);
2562                    self.advance();
2563                    depth -= 1;
2564                    if depth == 0 {
2565                        return true;
2566                    }
2567                }
2568                _ => {
2569                    Self::push_capture_char(content, c);
2570                    self.advance();
2571                }
2572            }
2573        }
2574
2575        false
2576    }
2577
2578    fn read_legacy_arithmetic_into(
2579        &mut self,
2580        content: &mut Option<String>,
2581        segment_start: Position,
2582    ) -> bool {
2583        let mut bracket_depth = 1;
2584
2585        while let Some(c) = self.peek_char() {
2586            match c {
2587                '\\' => {
2588                    Self::push_capture_char(content, c);
2589                    self.advance();
2590                    if let Some(next) = self.peek_char() {
2591                        Self::push_capture_char(content, next);
2592                        self.advance();
2593                    }
2594                }
2595                '\'' => {
2596                    Self::push_capture_char(content, c);
2597                    self.advance();
2598                    while let Some(quoted) = self.peek_char() {
2599                        Self::push_capture_char(content, quoted);
2600                        self.advance();
2601                        if quoted == '\'' {
2602                            break;
2603                        }
2604                    }
2605                }
2606                '"' => {
2607                    let mut escaped = false;
2608                    Self::push_capture_char(content, c);
2609                    self.advance();
2610                    while let Some(quoted) = self.peek_char() {
2611                        Self::push_capture_char(content, quoted);
2612                        self.advance();
2613                        if escaped {
2614                            escaped = false;
2615                            continue;
2616                        }
2617                        match quoted {
2618                            '\\' => escaped = true,
2619                            '"' => break,
2620                            _ => {}
2621                        }
2622                    }
2623                }
2624                '`' => {
2625                    let mut escaped = false;
2626                    Self::push_capture_char(content, c);
2627                    self.advance();
2628                    while let Some(quoted) = self.peek_char() {
2629                        Self::push_capture_char(content, quoted);
2630                        self.advance();
2631                        if escaped {
2632                            escaped = false;
2633                            continue;
2634                        }
2635                        match quoted {
2636                            '\\' => escaped = true,
2637                            '`' => break,
2638                            _ => {}
2639                        }
2640                    }
2641                }
2642                '[' => {
2643                    Self::push_capture_char(content, c);
2644                    self.advance();
2645                    bracket_depth += 1;
2646                }
2647                ']' => {
2648                    Self::push_capture_char(content, c);
2649                    self.advance();
2650                    bracket_depth -= 1;
2651                    if bracket_depth == 0 {
2652                        return true;
2653                    }
2654                }
2655                '$' => {
2656                    Self::push_capture_char(content, c);
2657                    self.advance();
2658                    if self.peek_char() == Some('(') {
2659                        if self.second_char() == Some('(') {
2660                            if !self.read_arithmetic_expansion_into(content) {
2661                                return false;
2662                            }
2663                        } else {
2664                            Self::push_capture_char(content, '(');
2665                            self.advance();
2666                            if !self.read_command_subst_into(content) {
2667                                return false;
2668                            }
2669                        }
2670                    } else if self.peek_char() == Some('{') {
2671                        Self::push_capture_char(content, '{');
2672                        self.advance();
2673                        if !self.read_param_expansion_into(content, segment_start) {
2674                            return false;
2675                        }
2676                    } else if self.peek_char() == Some('[') {
2677                        Self::push_capture_char(content, '[');
2678                        self.advance();
2679                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2680                            return false;
2681                        }
2682                    }
2683                }
2684                _ => {
2685                    Self::push_capture_char(content, c);
2686                    self.advance();
2687                }
2688            }
2689        }
2690
2691        false
2692    }
2693
2694    /// Read command substitution content after `$(`, handling nested parens and quotes.
2695    /// Appends chars to `content` and adds the closing `)`.
2696    /// `subst_depth` tracks nesting to prevent stack overflow.
2697    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2698        self.read_command_subst_into_depth(content, 0)
2699    }
2700
2701    fn flush_command_subst_keyword(
2702        current_word: &mut String,
2703        pending_case_headers: &mut usize,
2704        case_clause_depths: &mut SmallVec<[usize; 4]>,
2705        depth: usize,
2706        word_started_at_command_start: &mut bool,
2707    ) {
2708        if current_word.is_empty() {
2709            *word_started_at_command_start = false;
2710            return;
2711        }
2712
2713        match current_word.as_str() {
2714            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2715            "in" if *pending_case_headers > 0 => {
2716                *pending_case_headers -= 1;
2717                case_clause_depths.push(depth);
2718            }
2719            "esac" if *word_started_at_command_start => {
2720                case_clause_depths.pop();
2721            }
2722            _ => {}
2723        }
2724
2725        current_word.clear();
2726        *word_started_at_command_start = false;
2727    }
2728
2729    fn read_command_subst_heredoc_delimiter_into(
2730        &mut self,
2731        content: &mut Option<String>,
2732    ) -> Option<String> {
2733        while let Some(ch) = self.peek_char() {
2734            if !matches!(ch, ' ' | '\t') {
2735                break;
2736            }
2737            Self::push_capture_char(content, ch);
2738            self.advance();
2739        }
2740
2741        let mut cooked = String::new();
2742        let mut in_single = false;
2743        let mut in_double = false;
2744        let mut escaped = false;
2745        let mut saw_any = false;
2746
2747        while let Some(ch) = self.peek_char() {
2748            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2749                break;
2750            }
2751
2752            saw_any = true;
2753            Self::push_capture_char(content, ch);
2754            self.advance();
2755
2756            if escaped {
2757                cooked.push(ch);
2758                escaped = false;
2759                continue;
2760            }
2761
2762            match ch {
2763                '\\' if !in_single => escaped = true,
2764                '\'' if !in_double => in_single = !in_single,
2765                '"' if !in_single => in_double = !in_double,
2766                _ => cooked.push(ch),
2767            }
2768        }
2769
2770        saw_any.then_some(cooked)
2771    }
2772
2773    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2774        Self::push_capture_char(content, '`');
2775        self.advance();
2776        while let Some(ch) = self.peek_char() {
2777            Self::push_capture_char(content, ch);
2778            self.advance();
2779            if ch == '\\' {
2780                if let Some(esc) = self.peek_char() {
2781                    Self::push_capture_char(content, esc);
2782                    self.advance();
2783                }
2784                continue;
2785            }
2786            if ch == '`' {
2787                break;
2788            }
2789        }
2790    }
2791
2792    fn read_command_subst_pending_heredoc_into(
2793        &mut self,
2794        content: &mut Option<String>,
2795        delimiter: &str,
2796        strip_tabs: bool,
2797    ) -> bool {
2798        loop {
2799            let mut line = String::new();
2800            let mut saw_newline = false;
2801
2802            while let Some(ch) = self.peek_char() {
2803                self.advance();
2804                if ch == '\n' {
2805                    saw_newline = true;
2806                    break;
2807                }
2808                line.push(ch);
2809            }
2810
2811            Self::push_capture_str(content, &line);
2812            if saw_newline {
2813                Self::push_capture_char(content, '\n');
2814            }
2815
2816            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2817                return true;
2818            }
2819        }
2820    }
2821
2822    fn read_command_subst_into_depth(
2823        &mut self,
2824        content: &mut Option<String>,
2825        subst_depth: usize,
2826    ) -> bool {
2827        if subst_depth >= self.max_subst_depth {
2828            // Depth limit exceeded — consume until matching ')' and emit error token
2829            let mut depth = 1;
2830            while let Some(c) = self.peek_char() {
2831                self.advance();
2832                match c {
2833                    '(' => depth += 1,
2834                    ')' => {
2835                        depth -= 1;
2836                        if depth == 0 {
2837                            Self::push_capture_char(content, ')');
2838                            return true;
2839                        }
2840                    }
2841                    _ => {}
2842                }
2843            }
2844            return false;
2845        }
2846
2847        let mut depth = 1;
2848        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2849        let mut pending_case_headers = 0usize;
2850        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2851        let mut current_word = String::with_capacity(16);
2852        let mut at_command_start = true;
2853        let mut expecting_redirection_target = false;
2854        let mut current_word_started_at_command_start = false;
2855        while let Some(c) = self.peek_char() {
2856            match c {
2857                '#' if !self.should_treat_hash_as_word_char() => {
2858                    let had_word = !current_word.is_empty();
2859                    Self::flush_command_subst_keyword(
2860                        &mut current_word,
2861                        &mut pending_case_headers,
2862                        &mut case_clause_depths,
2863                        depth,
2864                        &mut current_word_started_at_command_start,
2865                    );
2866                    if had_word && expecting_redirection_target {
2867                        expecting_redirection_target = false;
2868                    }
2869                    Self::push_capture_char(content, '#');
2870                    self.advance();
2871                    while let Some(comment_ch) = self.peek_char() {
2872                        Self::push_capture_char(content, comment_ch);
2873                        self.advance();
2874                        if comment_ch == '\n' {
2875                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2876                                if !self.read_command_subst_pending_heredoc_into(
2877                                    content, &delimiter, strip_tabs,
2878                                ) {
2879                                    return false;
2880                                }
2881                            }
2882                            at_command_start = true;
2883                            expecting_redirection_target = false;
2884                            break;
2885                        }
2886                    }
2887                }
2888                '(' => {
2889                    Self::flush_command_subst_keyword(
2890                        &mut current_word,
2891                        &mut pending_case_headers,
2892                        &mut case_clause_depths,
2893                        depth,
2894                        &mut current_word_started_at_command_start,
2895                    );
2896                    depth += 1;
2897                    Self::push_capture_char(content, c);
2898                    self.advance();
2899                    at_command_start = true;
2900                    expecting_redirection_target = false;
2901                }
2902                ')' => {
2903                    Self::flush_command_subst_keyword(
2904                        &mut current_word,
2905                        &mut pending_case_headers,
2906                        &mut case_clause_depths,
2907                        depth,
2908                        &mut current_word_started_at_command_start,
2909                    );
2910                    if case_clause_depths
2911                        .last()
2912                        .is_some_and(|case_depth| *case_depth == depth)
2913                    {
2914                        Self::push_capture_char(content, ')');
2915                        self.advance();
2916                        at_command_start = true;
2917                        expecting_redirection_target = false;
2918                        continue;
2919                    }
2920                    depth -= 1;
2921                    self.advance();
2922                    if depth == 0 {
2923                        Self::push_capture_char(content, ')');
2924                        return true;
2925                    }
2926                    Self::push_capture_char(content, c);
2927                    at_command_start = false;
2928                    expecting_redirection_target = false;
2929                }
2930                '"' => {
2931                    let had_word = !current_word.is_empty();
2932                    Self::flush_command_subst_keyword(
2933                        &mut current_word,
2934                        &mut pending_case_headers,
2935                        &mut case_clause_depths,
2936                        depth,
2937                        &mut current_word_started_at_command_start,
2938                    );
2939                    if had_word && expecting_redirection_target {
2940                        expecting_redirection_target = false;
2941                    }
2942                    // Nested double-quoted string inside $()
2943                    Self::push_capture_char(content, '"');
2944                    self.advance();
2945                    while let Some(qc) = self.peek_char() {
2946                        match qc {
2947                            '"' => {
2948                                Self::push_capture_char(content, '"');
2949                                self.advance();
2950                                break;
2951                            }
2952                            '\\' => {
2953                                Self::push_capture_char(content, '\\');
2954                                self.advance();
2955                                if let Some(esc) = self.peek_char() {
2956                                    Self::push_capture_char(content, esc);
2957                                    self.advance();
2958                                }
2959                            }
2960                            '$' => {
2961                                Self::push_capture_char(content, '$');
2962                                self.advance();
2963                                if self.peek_char() == Some('(') {
2964                                    if self.second_char() == Some('(') {
2965                                        if !self.read_arithmetic_expansion_into(content) {
2966                                            return false;
2967                                        }
2968                                    } else {
2969                                        Self::push_capture_char(content, '(');
2970                                        self.advance();
2971                                        if !self
2972                                            .read_command_subst_into_depth(content, subst_depth + 1)
2973                                        {
2974                                            return false;
2975                                        }
2976                                    }
2977                                }
2978                            }
2979                            _ => {
2980                                Self::push_capture_char(content, qc);
2981                                self.advance();
2982                            }
2983                        }
2984                    }
2985                    if expecting_redirection_target {
2986                        expecting_redirection_target = false;
2987                    } else {
2988                        at_command_start = false;
2989                    }
2990                }
2991                '\'' => {
2992                    let had_word = !current_word.is_empty();
2993                    Self::flush_command_subst_keyword(
2994                        &mut current_word,
2995                        &mut pending_case_headers,
2996                        &mut case_clause_depths,
2997                        depth,
2998                        &mut current_word_started_at_command_start,
2999                    );
3000                    if had_word && expecting_redirection_target {
3001                        expecting_redirection_target = false;
3002                    }
3003                    // Single-quoted string inside $()
3004                    Self::push_capture_char(content, '\'');
3005                    self.advance();
3006                    while let Some(qc) = self.peek_char() {
3007                        Self::push_capture_char(content, qc);
3008                        self.advance();
3009                        if qc == '\'' {
3010                            break;
3011                        }
3012                    }
3013                    if expecting_redirection_target {
3014                        expecting_redirection_target = false;
3015                    } else {
3016                        at_command_start = false;
3017                    }
3018                }
3019                '`' => {
3020                    let had_word = !current_word.is_empty();
3021                    Self::flush_command_subst_keyword(
3022                        &mut current_word,
3023                        &mut pending_case_headers,
3024                        &mut case_clause_depths,
3025                        depth,
3026                        &mut current_word_started_at_command_start,
3027                    );
3028                    if had_word && expecting_redirection_target {
3029                        expecting_redirection_target = false;
3030                    }
3031                    self.read_command_subst_backtick_segment_into(content);
3032                    if expecting_redirection_target {
3033                        expecting_redirection_target = false;
3034                    } else {
3035                        at_command_start = false;
3036                    }
3037                }
3038                '$' if self.second_char() == Some('\'') => {
3039                    let had_word = !current_word.is_empty();
3040                    Self::flush_command_subst_keyword(
3041                        &mut current_word,
3042                        &mut pending_case_headers,
3043                        &mut case_clause_depths,
3044                        depth,
3045                        &mut current_word_started_at_command_start,
3046                    );
3047                    if had_word && expecting_redirection_target {
3048                        expecting_redirection_target = false;
3049                    }
3050                    Self::push_capture_char(content, '$');
3051                    self.advance();
3052                    Self::push_capture_char(content, '\'');
3053                    self.advance();
3054                    while let Some(qc) = self.peek_char() {
3055                        Self::push_capture_char(content, qc);
3056                        self.advance();
3057                        if qc == '\\' {
3058                            if let Some(esc) = self.peek_char() {
3059                                Self::push_capture_char(content, esc);
3060                                self.advance();
3061                            }
3062                            continue;
3063                        }
3064                        if qc == '\'' {
3065                            break;
3066                        }
3067                    }
3068                    if expecting_redirection_target {
3069                        expecting_redirection_target = false;
3070                    } else {
3071                        at_command_start = false;
3072                    }
3073                }
3074                '\\' => {
3075                    let had_word = !current_word.is_empty();
3076                    Self::flush_command_subst_keyword(
3077                        &mut current_word,
3078                        &mut pending_case_headers,
3079                        &mut case_clause_depths,
3080                        depth,
3081                        &mut current_word_started_at_command_start,
3082                    );
3083                    if had_word && expecting_redirection_target {
3084                        expecting_redirection_target = false;
3085                    }
3086                    Self::push_capture_char(content, '\\');
3087                    self.advance();
3088                    if let Some(esc) = self.peek_char() {
3089                        Self::push_capture_char(content, esc);
3090                        self.advance();
3091                    }
3092                    if expecting_redirection_target {
3093                        expecting_redirection_target = false;
3094                    } else {
3095                        at_command_start = false;
3096                    }
3097                }
3098                '<' if self.second_char() == Some('<') => {
3099                    let word_was_redirection_fd = current_word_started_at_command_start
3100                        && !current_word.is_empty()
3101                        && current_word.chars().all(|current| current.is_ascii_digit());
3102                    Self::flush_command_subst_keyword(
3103                        &mut current_word,
3104                        &mut pending_case_headers,
3105                        &mut case_clause_depths,
3106                        depth,
3107                        &mut current_word_started_at_command_start,
3108                    );
3109                    if word_was_redirection_fd {
3110                        at_command_start = true;
3111                    }
3112
3113                    Self::push_capture_char(content, '<');
3114                    self.advance();
3115                    Self::push_capture_char(content, '<');
3116                    self.advance();
3117
3118                    if self.peek_char() == Some('<') {
3119                        Self::push_capture_char(content, '<');
3120                        self.advance();
3121                        expecting_redirection_target = true;
3122                        continue;
3123                    }
3124
3125                    let strip_tabs = if self.peek_char() == Some('-') {
3126                        Self::push_capture_char(content, '-');
3127                        self.advance();
3128                        true
3129                    } else {
3130                        false
3131                    };
3132
3133                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3134                    {
3135                        pending_heredocs.push((delimiter, strip_tabs));
3136                        expecting_redirection_target = false;
3137                    } else {
3138                        expecting_redirection_target = true;
3139                    }
3140                }
3141                '>' | '<' => {
3142                    let word_was_redirection_fd = current_word_started_at_command_start
3143                        && !current_word.is_empty()
3144                        && current_word.chars().all(|current| current.is_ascii_digit());
3145                    Self::flush_command_subst_keyword(
3146                        &mut current_word,
3147                        &mut pending_case_headers,
3148                        &mut case_clause_depths,
3149                        depth,
3150                        &mut current_word_started_at_command_start,
3151                    );
3152                    if word_was_redirection_fd {
3153                        at_command_start = true;
3154                    }
3155                    Self::push_capture_char(content, c);
3156                    self.advance();
3157                    expecting_redirection_target = true;
3158                }
3159                '\n' => {
3160                    Self::flush_command_subst_keyword(
3161                        &mut current_word,
3162                        &mut pending_case_headers,
3163                        &mut case_clause_depths,
3164                        depth,
3165                        &mut current_word_started_at_command_start,
3166                    );
3167                    Self::push_capture_char(content, '\n');
3168                    self.advance();
3169                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3170                        if !self.read_command_subst_pending_heredoc_into(
3171                            content, &delimiter, strip_tabs,
3172                        ) {
3173                            return false;
3174                        }
3175                    }
3176                    at_command_start = true;
3177                    expecting_redirection_target = false;
3178                }
3179                _ => {
3180                    if c.is_ascii_alphanumeric() || c == '_' {
3181                        if current_word.is_empty()
3182                            && !expecting_redirection_target
3183                            && at_command_start
3184                        {
3185                            current_word_started_at_command_start = true;
3186                            at_command_start = false;
3187                        }
3188                        current_word.push(c);
3189                    } else {
3190                        let had_word = !current_word.is_empty();
3191                        Self::flush_command_subst_keyword(
3192                            &mut current_word,
3193                            &mut pending_case_headers,
3194                            &mut case_clause_depths,
3195                            depth,
3196                            &mut current_word_started_at_command_start,
3197                        );
3198                        if had_word && expecting_redirection_target {
3199                            expecting_redirection_target = false;
3200                        }
3201                        match c {
3202                            ' ' | '\t' => {}
3203                            ';' | '|' | '&' => {
3204                                at_command_start = true;
3205                                expecting_redirection_target = false;
3206                            }
3207                            _ => {
3208                                if !expecting_redirection_target {
3209                                    at_command_start = false;
3210                                }
3211                            }
3212                        }
3213                    }
3214                    Self::push_capture_char(content, c);
3215                    self.advance();
3216                }
3217            }
3218        }
3219
3220        false
3221    }
3222
3223    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3224    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3225    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3226    fn read_param_expansion_into(
3227        &mut self,
3228        content: &mut Option<String>,
3229        segment_start: Position,
3230    ) -> bool {
3231        let mut borrowable = true;
3232        let mut depth = 1;
3233        let mut literal_brace_depth = 0usize;
3234        let mut in_single = false;
3235        let mut in_double = false;
3236        let mut double_quote_depth = 0usize;
3237        while let Some(c) = self.peek_char() {
3238            if in_single {
3239                match c {
3240                    '\\' => {
3241                        let escape_start = self.current_position();
3242                        if self.second_char() == Some('"') {
3243                            self.advance();
3244                            borrowable = false;
3245                            self.ensure_capture_from_source(content, segment_start, escape_start);
3246                            Self::push_capture_char(content, '"');
3247                            self.advance();
3248                        } else {
3249                            Self::push_capture_char(content, '\\');
3250                            self.advance();
3251                        }
3252                    }
3253                    '\'' => {
3254                        Self::push_capture_char(content, c);
3255                        self.advance();
3256                        in_single = false;
3257                    }
3258                    _ => {
3259                        Self::push_capture_char(content, c);
3260                        self.advance();
3261                    }
3262                }
3263                continue;
3264            }
3265
3266            match c {
3267                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3268                    self.advance();
3269                    Self::push_capture_char(content, '}');
3270                    if depth == 1
3271                        && literal_brace_depth > 0
3272                        && self.has_later_top_level_param_expansion_closer(depth)
3273                    {
3274                        literal_brace_depth -= 1;
3275                        continue;
3276                    }
3277                    depth -= 1;
3278                    if depth == 0 {
3279                        break;
3280                    }
3281                }
3282                '{' if !in_single && !in_double => {
3283                    literal_brace_depth += 1;
3284                    Self::push_capture_char(content, '{');
3285                    self.advance();
3286                }
3287                '"' => {
3288                    // Quotes inside ${...} are part of the expansion, not string delimiters
3289                    Self::push_capture_char(content, '"');
3290                    self.advance();
3291                    in_double = !in_double;
3292                    double_quote_depth = if in_double { depth } else { 0 };
3293                }
3294                '\'' => {
3295                    Self::push_capture_char(content, '\'');
3296                    self.advance();
3297                    if !in_double {
3298                        in_single = true;
3299                    }
3300                }
3301                '\\' => {
3302                    // Inside ${...} within double quotes, same escape rules apply:
3303                    // \", \\, \$, \` produce the escaped char; others keep backslash
3304                    let escape_start = self.current_position();
3305                    self.advance();
3306                    if let Some(esc) = self.peek_char() {
3307                        match esc {
3308                            '$' => {
3309                                borrowable = false;
3310                                self.ensure_capture_from_source(
3311                                    content,
3312                                    segment_start,
3313                                    escape_start,
3314                                );
3315                                Self::push_capture_char(content, '\x00');
3316                                Self::push_capture_char(content, '$');
3317                                self.advance();
3318                            }
3319                            '"' | '\\' | '`' => {
3320                                borrowable = false;
3321                                self.ensure_capture_from_source(
3322                                    content,
3323                                    segment_start,
3324                                    escape_start,
3325                                );
3326                                Self::push_capture_char(content, esc);
3327                                self.advance();
3328                            }
3329                            '}' => {
3330                                // \} should be a literal } without closing the expansion
3331                                Self::push_capture_char(content, '\\');
3332                                Self::push_capture_char(content, '}');
3333                                self.advance();
3334                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3335                            }
3336                            _ => {
3337                                Self::push_capture_char(content, '\\');
3338                                Self::push_capture_char(content, esc);
3339                                self.advance();
3340                            }
3341                        }
3342                    } else {
3343                        Self::push_capture_char(content, '\\');
3344                    }
3345                }
3346                '$' => {
3347                    Self::push_capture_char(content, '$');
3348                    self.advance();
3349                    if self.peek_char() == Some('(') {
3350                        if self.second_char() == Some('(') {
3351                            if !self.read_arithmetic_expansion_into(content) {
3352                                borrowable = false;
3353                            }
3354                        } else {
3355                            Self::push_capture_char(content, '(');
3356                            self.advance();
3357                            self.read_command_subst_into(content);
3358                        }
3359                    } else if self.peek_char() == Some('{') {
3360                        Self::push_capture_char(content, '{');
3361                        self.advance();
3362                        borrowable &= self.read_param_expansion_into(content, segment_start);
3363                    }
3364                }
3365                _ => {
3366                    Self::push_capture_char(content, c);
3367                    self.advance();
3368                }
3369            }
3370        }
3371        borrowable
3372    }
3373
3374    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3375        let mut chars = self.lookahead_chars().peekable();
3376        let mut depth = target_depth;
3377        let mut in_single = false;
3378        let mut in_double = false;
3379        let mut double_quote_depth = 0usize;
3380
3381        while let Some(ch) = chars.next() {
3382            if in_single {
3383                match ch {
3384                    '\'' => in_single = false,
3385                    '\\' if chars.peek() == Some(&'"') => {
3386                        chars.next();
3387                    }
3388                    '\\' => {}
3389                    _ => {}
3390                }
3391                continue;
3392            }
3393
3394            if in_double {
3395                match ch {
3396                    '"' => {
3397                        in_double = false;
3398                        double_quote_depth = 0;
3399                    }
3400                    '\\' => {
3401                        chars.next();
3402                    }
3403                    '$' if chars.peek() == Some(&'{') => {
3404                        chars.next();
3405                        depth += 1;
3406                    }
3407                    '}' if depth > double_quote_depth => {
3408                        depth -= 1;
3409                    }
3410                    _ => {}
3411                }
3412                continue;
3413            }
3414
3415            match ch {
3416                '\n' if depth == target_depth => return false,
3417                '\'' => in_single = true,
3418                '"' => {
3419                    in_double = true;
3420                    double_quote_depth = depth;
3421                }
3422                '\\' => {
3423                    chars.next();
3424                }
3425                '$' if chars.peek() == Some(&'{') => {
3426                    chars.next();
3427                    depth += 1;
3428                }
3429                '}' => {
3430                    if depth == target_depth {
3431                        return true;
3432                    }
3433                    depth -= 1;
3434                }
3435                _ => {}
3436            }
3437        }
3438
3439        false
3440    }
3441
3442    /// Check if the content starting with { looks like a brace expansion
3443    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3444    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3445    /// Caps lookahead to prevent O(n^2) scanning when input
3446    /// contains many unmatched `{` characters (issue #997).
3447    fn looks_like_brace_expansion(&self) -> bool {
3448        const MAX_LOOKAHEAD: usize = 10_000;
3449
3450        let mut chars = self.lookahead_chars();
3451
3452        // Skip the opening {
3453        if chars.next() != Some('{') {
3454            return false;
3455        }
3456
3457        let mut depth = 1;
3458        let mut has_comma = false;
3459        let mut has_dot_dot = false;
3460        let mut prev_char = None;
3461        let mut scanned = 0usize;
3462
3463        for ch in chars {
3464            scanned += 1;
3465            if scanned > MAX_LOOKAHEAD {
3466                return false;
3467            }
3468            match ch {
3469                '{' => depth += 1,
3470                '}' => {
3471                    depth -= 1;
3472                    if depth == 0 {
3473                        // Found matching }, check if we have brace expansion markers
3474                        return has_comma || has_dot_dot;
3475                    }
3476                }
3477                ',' if depth == 1 => has_comma = true,
3478                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3479                // Brace groups have whitespace/newlines/semicolons at depth 1
3480                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3481                _ => {}
3482            }
3483            prev_char = Some(ch);
3484        }
3485
3486        false
3487    }
3488
3489    /// Check whether a mid-word `{...}` segment can stay attached to the current
3490    /// word without crossing a top-level word boundary.
3491    fn looks_like_mid_word_brace_segment(&self) -> bool {
3492        const MAX_LOOKAHEAD: usize = 10_000;
3493
3494        let mut chars = self.lookahead_chars();
3495        if chars.next() != Some('{') {
3496            return false;
3497        }
3498
3499        let mut brace_depth = 1;
3500        let mut paren_depth = 0usize;
3501        let mut escaped = false;
3502        let mut in_single = false;
3503        let mut in_double = false;
3504        let mut in_backtick = false;
3505        let mut prev_char = None;
3506        let mut scanned = 0usize;
3507
3508        for ch in chars {
3509            scanned += 1;
3510            if scanned > MAX_LOOKAHEAD {
3511                return false;
3512            }
3513
3514            if !in_single
3515                && !in_double
3516                && !in_backtick
3517                && !escaped
3518                && brace_depth == 1
3519                && paren_depth == 0
3520                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3521            {
3522                return false;
3523            }
3524
3525            if escaped {
3526                escaped = false;
3527                prev_char = Some(ch);
3528                continue;
3529            }
3530
3531            match ch {
3532                '\\' => escaped = true,
3533                '\'' if !in_double && !in_backtick => in_single = !in_single,
3534                '"' if !in_single && !in_backtick => in_double = !in_double,
3535                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3536                '(' if !in_single
3537                    && !in_double
3538                    && !in_backtick
3539                    && (paren_depth > 0 || prev_char == Some('$')) =>
3540                {
3541                    paren_depth += 1
3542                }
3543                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3544                    paren_depth -= 1
3545                }
3546                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3547                '}' => {
3548                    brace_depth -= 1;
3549                    if brace_depth == 0 {
3550                        return true;
3551                    }
3552                }
3553                _ => {}
3554            }
3555
3556            prev_char = Some(ch);
3557        }
3558
3559        false
3560    }
3561
3562    /// Check if { is followed by whitespace (brace group start)
3563    fn is_brace_group_start(&self) -> bool {
3564        let mut chars = self.lookahead_chars();
3565        // Skip the opening {
3566        if chars.next() != Some('{') {
3567            return false;
3568        }
3569        // If next char is whitespace or newline, it's a brace group
3570        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3571    }
3572
3573    /// Check whether the text after an escaped `{` looks like a brace-expansion
3574    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3575    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3576        const MAX_LOOKAHEAD: usize = 10_000;
3577
3578        let mut chars = self.lookahead_chars();
3579        let mut depth = 1;
3580        let mut has_comma = false;
3581        let mut has_dot_dot = false;
3582        let mut prev_char = None;
3583        let mut scanned = 0usize;
3584
3585        for ch in chars.by_ref() {
3586            scanned += 1;
3587            if scanned > MAX_LOOKAHEAD {
3588                return false;
3589            }
3590            match ch {
3591                '{' => depth += 1,
3592                '}' => {
3593                    depth -= 1;
3594                    if depth == 0 {
3595                        return has_comma || has_dot_dot;
3596                    }
3597                }
3598                ',' if depth == 1 => has_comma = true,
3599                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3600                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3601                _ => {}
3602            }
3603            prev_char = Some(ch);
3604        }
3605
3606        false
3607    }
3608
3609    /// Read a {literal} pattern without comma/dot-dot as a word
3610    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3611        let mut word = String::with_capacity(16);
3612
3613        // Read the opening {
3614        if let Some('{') = self.peek_char() {
3615            word.push('{');
3616            self.advance();
3617        } else {
3618            return None;
3619        }
3620
3621        // Read until matching }
3622        let mut depth = 1;
3623        while let Some(ch) = self.peek_char() {
3624            word.push(ch);
3625            self.advance();
3626            match ch {
3627                '{' => depth += 1,
3628                '}' => {
3629                    depth -= 1;
3630                    if depth == 0 {
3631                        break;
3632                    }
3633                }
3634                _ => {}
3635            }
3636        }
3637
3638        // Continue reading any suffix
3639        while let Some(ch) = self.peek_char() {
3640            if Self::is_word_char(ch) {
3641                if self.reinject_buf.is_empty() {
3642                    let chunk = self.cursor.eat_while(Self::is_word_char);
3643                    word.push_str(chunk);
3644                    self.advance_scanned_source_bytes(chunk.len());
3645                } else {
3646                    word.push(ch);
3647                    self.advance();
3648                }
3649            } else {
3650                break;
3651            }
3652        }
3653
3654        Some(LexedToken::owned_word(TokenKind::Word, word))
3655    }
3656
3657    /// Read a brace expansion pattern as a word
3658    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3659        let mut word = String::with_capacity(16);
3660
3661        // Read the opening {
3662        if let Some('{') = self.peek_char() {
3663            word.push('{');
3664            self.advance();
3665        } else {
3666            return None;
3667        }
3668
3669        // Read until matching }
3670        let mut depth = 1;
3671        while let Some(ch) = self.peek_char() {
3672            word.push(ch);
3673            self.advance();
3674            match ch {
3675                '{' => depth += 1,
3676                '}' => {
3677                    depth -= 1;
3678                    if depth == 0 {
3679                        break;
3680                    }
3681                }
3682                _ => {}
3683            }
3684        }
3685
3686        // Continue reading any suffix after the brace pattern
3687        while let Some(ch) = self.peek_char() {
3688            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3689                if ch == '{' {
3690                    // Another brace pattern - include it
3691                    word.push(ch);
3692                    self.advance();
3693                    let mut inner_depth = 1;
3694                    while let Some(c) = self.peek_char() {
3695                        word.push(c);
3696                        self.advance();
3697                        match c {
3698                            '{' => inner_depth += 1,
3699                            '}' => {
3700                                inner_depth -= 1;
3701                                if inner_depth == 0 {
3702                                    break;
3703                                }
3704                            }
3705                            _ => {}
3706                        }
3707                    }
3708                } else {
3709                    word.push(ch);
3710                    self.advance();
3711                }
3712            } else {
3713                break;
3714            }
3715        }
3716
3717        Some(LexedToken::owned_word(TokenKind::Word, word))
3718    }
3719
3720    /// Peek ahead (without consuming) to see if `=(` starts an associative
3721    /// compound assignment like `([key]=val ...)`.  Returns true when the
3722    /// first non-whitespace char after `(` is `[`.
3723    fn looks_like_assoc_assign(&self) -> bool {
3724        let mut chars = self.lookahead_chars();
3725        // Skip the `(` we haven't consumed yet
3726        if chars.next() != Some('(') {
3727            return false;
3728        }
3729        // Skip optional whitespace
3730        for ch in chars {
3731            match ch {
3732                ' ' | '\t' => continue,
3733                '[' => return true,
3734                _ => return false,
3735            }
3736        }
3737        false
3738    }
3739
3740    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3741        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3742    }
3743
3744    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3745        word.segments().any(|segment| {
3746            matches!(
3747                segment.kind(),
3748                LexedWordSegmentKind::SingleQuoted
3749                    | LexedWordSegmentKind::DollarSingleQuoted
3750                    | LexedWordSegmentKind::DoubleQuoted
3751                    | LexedWordSegmentKind::DollarDoubleQuoted
3752            )
3753        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3754    }
3755
3756    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3757        text.contains(['*', '?'])
3758            || text.ends_with('}') && text.contains("${")
3759            || text.ends_with(']')
3760                && text
3761                    .rfind('[')
3762                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3763    }
3764
3765    fn is_word_char(ch: char) -> bool {
3766        !matches!(
3767            ch,
3768            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3769        )
3770    }
3771
3772    const fn is_ascii_word_byte(byte: u8) -> bool {
3773        !matches!(
3774            byte,
3775            b' ' | b'\t'
3776                | b'\n'
3777                | b';'
3778                | b'|'
3779                | b'&'
3780                | b'>'
3781                | b'<'
3782                | b'('
3783                | b')'
3784                | b'{'
3785                | b'}'
3786                | b'\''
3787                | b'"'
3788        )
3789    }
3790
3791    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3792        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3793    }
3794
3795    fn is_plain_word_char(ch: char) -> bool {
3796        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3797    }
3798
3799    /// Read here document content until the delimiter line is found
3800    pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3801        let mut content = String::with_capacity(64);
3802        let mut current_line = String::with_capacity(64);
3803
3804        // Save rest of current line (after the delimiter token on the command line).
3805        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3806        // tokenize the pipe and subsequent command after the heredoc body.
3807        //
3808        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3809        // so we track quoting state and continue across newlines until quotes close.
3810        let mut rest_of_line = String::with_capacity(32);
3811        let rest_of_line_start = self.current_position();
3812        let mut in_double_quote = false;
3813        let mut in_single_quote = false;
3814        let mut in_comment = false;
3815        let mut saw_non_whitespace_tail = false;
3816        let mut consecutive_backslashes = 0usize;
3817        let mut previous_tail_char = None;
3818        while let Some(ch) = self.peek_char() {
3819            self.advance();
3820            if in_comment {
3821                if ch == '\n' {
3822                    break;
3823                }
3824                rest_of_line.push(ch);
3825                previous_tail_char = Some(ch);
3826                continue;
3827            }
3828            if ch == '#'
3829                && !in_single_quote
3830                && !in_double_quote
3831                && self.comments_enabled()
3832                && heredoc_tail_hash_starts_comment(previous_tail_char)
3833            {
3834                in_comment = true;
3835                rest_of_line.push(ch);
3836                previous_tail_char = Some(ch);
3837                consecutive_backslashes = 0;
3838                continue;
3839            }
3840            let backslash_continues_line = ch == '\\'
3841                && !in_single_quote
3842                && self.peek_char() == Some('\n')
3843                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3844                && consecutive_backslashes.is_multiple_of(2);
3845            if backslash_continues_line {
3846                rest_of_line.push(ch);
3847                rest_of_line.push('\n');
3848                self.advance();
3849                consecutive_backslashes = 0;
3850                continue;
3851            }
3852            if ch == '\n' && !in_double_quote && !in_single_quote {
3853                break;
3854            }
3855            if ch == '"' && !in_single_quote {
3856                in_double_quote = !in_double_quote;
3857            } else if ch == '\'' && !in_double_quote {
3858                in_single_quote = !in_single_quote;
3859            } else if ch == '\\' && in_double_quote {
3860                // Escaped char inside double quotes — skip the next char too
3861                rest_of_line.push(ch);
3862                if let Some(next) = self.peek_char() {
3863                    rest_of_line.push(next);
3864                    self.advance();
3865                }
3866                continue;
3867            }
3868            rest_of_line.push(ch);
3869            if !ch.is_whitespace() {
3870                saw_non_whitespace_tail = true;
3871            }
3872            if ch == '\\' && !in_single_quote {
3873                consecutive_backslashes += 1;
3874            } else {
3875                consecutive_backslashes = 0;
3876            }
3877            previous_tail_char = Some(ch);
3878        }
3879
3880        // If we just drained a heredoc replay buffer (for example when multiple
3881        // heredocs share one command line), resume tracking from the true cursor
3882        // position before we measure the body span.
3883        self.sync_offset_to_cursor();
3884        let content_start = self.current_position();
3885        let mut current_line_start = content_start;
3886        let content_end;
3887
3888        // Read lines until we find the delimiter
3889        loop {
3890            if self.reinject_buf.is_empty() {
3891                // When the body reading drains a reinject buffer (from a
3892                // previous heredoc on the same command line), the virtual
3893                // offset drifts away from the cursor. Snap it back before
3894                // any source-based work so spans and `post_heredoc_offset`
3895                // stay within bounds.
3896                self.sync_offset_to_cursor();
3897                let rest = self.cursor.rest();
3898                if rest.is_empty() {
3899                    content_end = self.current_position();
3900                    break;
3901                }
3902
3903                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3904                let line = &rest[..line_len];
3905                let has_newline = line_len < rest.len();
3906
3907                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3908                    content_end = current_line_start;
3909                    self.consume_source_bytes(line_len);
3910                    if has_newline {
3911                        self.consume_ascii_chars(1);
3912                    }
3913                    break;
3914                }
3915
3916                content.push_str(line);
3917                self.consume_source_bytes(line_len);
3918
3919                if has_newline {
3920                    self.consume_ascii_chars(1);
3921                    content.push('\n');
3922                    current_line_start = self.current_position();
3923                    continue;
3924                }
3925
3926                content_end = self.current_position();
3927                break;
3928            }
3929
3930            match self.peek_char() {
3931                Some('\n') => {
3932                    self.advance();
3933                    // Check if current line matches delimiter
3934                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
3935                        content_end = current_line_start;
3936                        break;
3937                    }
3938                    content.push_str(&current_line);
3939                    content.push('\n');
3940                    current_line.clear();
3941                    current_line_start = self.current_position();
3942                }
3943                Some(ch) => {
3944                    current_line.push(ch);
3945                    self.advance();
3946                }
3947                None => {
3948                    // End of input - check last line
3949                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
3950                        content_end = current_line_start;
3951                        break;
3952                    }
3953                    if !current_line.is_empty() {
3954                        content.push_str(&current_line);
3955                    }
3956                    content_end = self.current_position();
3957                    break;
3958                }
3959            }
3960        }
3961
3962        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
3963        // redirects, command words, additional heredocs) stay visible to the
3964        // parser. Always replay a terminating newline so parsing stops before
3965        // tokens that originally lived on later source lines, like `}` or `do`.
3966        let post_heredoc_offset = self.offset;
3967        self.offset = rest_of_line_start.offset;
3968        for ch in rest_of_line.chars() {
3969            self.reinject_buf.push_back(ch);
3970        }
3971        self.reinject_buf.push_back('\n');
3972        self.reinject_resume_offset = Some(post_heredoc_offset);
3973
3974        HeredocRead {
3975            content,
3976            content_span: Span::from_positions(content_start, content_end),
3977        }
3978    }
3979
3980    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
3981        let mut chars = self.cursor.rest().chars();
3982        if chars.next() != Some('\n') {
3983            return false;
3984        }
3985
3986        for ch in chars {
3987            if matches!(ch, ' ' | '\t') {
3988                continue;
3989            }
3990            if ch == '\n' {
3991                return false;
3992            }
3993            return matches!(ch, '|' | '&' | ';' | '<' | '>')
3994                || (ch == '#' && self.comments_enabled());
3995        }
3996
3997        false
3998    }
3999}
4000
4001fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4002    let line = if strip_tabs {
4003        line.trim_start_matches('\t')
4004    } else {
4005        line
4006    };
4007
4008    if line == delimiter {
4009        return true;
4010    }
4011
4012    let Some(trailing) = line.strip_prefix(delimiter) else {
4013        return false;
4014    };
4015
4016    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4017}
4018
4019fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4020    previous_tail_char.is_none_or(|prev| {
4021        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4022    })
4023}
4024
4025fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4026    let ch = input.get(index..)?.chars().next()?;
4027    Some((ch, index + ch.len_utf8()))
4028}
4029
4030fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4031    let mut index = 0usize;
4032    let mut depth = 0usize;
4033    let mut in_single = false;
4034    let mut in_double = false;
4035    let mut in_backtick = false;
4036    let mut escaped = false;
4037
4038    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4039        let was_escaped = escaped;
4040        if ch == '\\' && !in_single {
4041            escaped = !escaped;
4042            index = next_index;
4043            continue;
4044        }
4045        escaped = false;
4046
4047        match ch {
4048            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4049            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4050            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4051            '(' if !in_single
4052                && !in_double
4053                && !in_backtick
4054                && !was_escaped
4055                && prefix[next_index..].starts_with('(') =>
4056            {
4057                depth += 1;
4058                index = next_index + '('.len_utf8();
4059                continue;
4060            }
4061            ')' if !in_single
4062                && !in_double
4063                && !in_backtick
4064                && !was_escaped
4065                && prefix[next_index..].starts_with(')') =>
4066            {
4067                depth = depth.saturating_sub(1);
4068                index = next_index + ')'.len_utf8();
4069                continue;
4070            }
4071            _ => {}
4072        }
4073
4074        index = next_index;
4075    }
4076
4077    depth > 0
4078}
4079
4080fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4081    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4082    let prefix = &input[line_start..index];
4083    line_has_unclosed_double_paren(prefix)
4084}
4085
4086fn hash_starts_comment(input: &str, index: usize) -> bool {
4087    if inside_unclosed_double_paren_on_line(input, index) {
4088        return false;
4089    }
4090
4091    let next = &input[index + '#'.len_utf8()..];
4092    input[..index]
4093        .chars()
4094        .next_back()
4095        .is_none_or(|prev| match prev {
4096            '(' => {
4097                let whitespace_index = next.find(char::is_whitespace);
4098                let close_index = next.find(')');
4099
4100                match (whitespace_index, close_index) {
4101                    (Some(whitespace), Some(close)) => whitespace < close,
4102                    (Some(_), None) | (None, None) => true,
4103                    (None, Some(_)) => false,
4104                }
4105            }
4106            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4107        })
4108}
4109
4110fn heredoc_delimiter_is_terminator(
4111    ch: char,
4112    in_single: bool,
4113    in_double: bool,
4114    escaped: bool,
4115) -> bool {
4116    !in_single
4117        && !in_double
4118        && !escaped
4119        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4120}
4121
4122fn scan_double_quoted_command_substitution_segment(
4123    input: &str,
4124    mut index: usize,
4125    subst_depth: usize,
4126) -> Option<usize> {
4127    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4128        match ch {
4129            '"' => return Some(next_index),
4130            '\\' => {
4131                index = next_index;
4132                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4133                    index = escaped_next;
4134                }
4135            }
4136            '$' if input[next_index..].starts_with('{') => {
4137                let consumed = scan_command_subst_parameter_expansion_len(
4138                    &input[next_index + '{'.len_utf8()..],
4139                    subst_depth,
4140                )?;
4141                index = next_index + '{'.len_utf8() + consumed;
4142            }
4143            '$' if input[next_index..].starts_with('(')
4144                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4145            {
4146                let consumed = scan_command_substitution_body_len_inner(
4147                    &input[next_index + '('.len_utf8()..],
4148                    subst_depth + 1,
4149                )?;
4150                index = next_index + '('.len_utf8() + consumed;
4151            }
4152            _ => index = next_index,
4153        }
4154    }
4155
4156    None
4157}
4158
4159fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4160    let mut index = 0usize;
4161    let mut in_single = false;
4162    let mut in_double = false;
4163    let mut in_ansi_c_single = false;
4164    let mut in_backtick = false;
4165    let mut escaped = false;
4166    let mut ansi_c_quote_pending = false;
4167
4168    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4169        let was_escaped = escaped;
4170        if ch == '\\' && !in_single {
4171            escaped = !escaped;
4172            index = next_index;
4173            ansi_c_quote_pending = false;
4174            continue;
4175        }
4176        escaped = false;
4177
4178        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4179            if input[next_index..].starts_with('{')
4180                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4181                    &input[next_index + '{'.len_utf8()..],
4182                    subst_depth,
4183                )
4184            {
4185                index = next_index + '{'.len_utf8() + consumed;
4186                ansi_c_quote_pending = false;
4187                continue;
4188            }
4189
4190            if input[next_index..].starts_with('(')
4191                && !input[next_index + '('.len_utf8()..].starts_with('(')
4192                && let Some(consumed) = scan_command_substitution_body_len_inner(
4193                    &input[next_index + '('.len_utf8()..],
4194                    subst_depth + 1,
4195                )
4196            {
4197                index = next_index + '('.len_utf8() + consumed;
4198                ansi_c_quote_pending = false;
4199                continue;
4200            }
4201        }
4202
4203        if !in_single
4204            && !in_ansi_c_single
4205            && !in_double
4206            && !in_backtick
4207            && !was_escaped
4208            && matches!(ch, '<' | '>')
4209            && input[next_index..].starts_with('(')
4210            && let Some(consumed) = scan_command_substitution_body_len_inner(
4211                &input[next_index + '('.len_utf8()..],
4212                subst_depth + 1,
4213            )
4214        {
4215            index = next_index + '('.len_utf8() + consumed;
4216            ansi_c_quote_pending = false;
4217            continue;
4218        }
4219
4220        match ch {
4221            '\'' if !in_double && !in_backtick && !was_escaped => {
4222                if in_ansi_c_single {
4223                    in_ansi_c_single = false;
4224                } else if !in_single && ansi_c_quote_pending {
4225                    in_ansi_c_single = true;
4226                } else {
4227                    in_single = !in_single;
4228                }
4229            }
4230            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4231                in_double = !in_double
4232            }
4233            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4234                in_backtick = !in_backtick
4235            }
4236            '}' if !in_single
4237                && !in_ansi_c_single
4238                && !in_double
4239                && !in_backtick
4240                && !was_escaped =>
4241            {
4242                return Some(next_index);
4243            }
4244            _ => {}
4245        }
4246
4247        ansi_c_quote_pending = ch == '$'
4248            && !in_single
4249            && !in_ansi_c_single
4250            && !in_double
4251            && !in_backtick
4252            && !was_escaped;
4253        index = next_index;
4254    }
4255
4256    None
4257}
4258
4259fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4260    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4261        if !matches!(ch, ' ' | '\t') {
4262            break;
4263        }
4264        index = next_index;
4265    }
4266
4267    let start = index;
4268    let mut cooked = String::new();
4269    let mut in_single = false;
4270    let mut in_double = false;
4271    let mut escaped = false;
4272
4273    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4274        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4275            break;
4276        }
4277
4278        index = next_index;
4279        if escaped {
4280            cooked.push(ch);
4281            escaped = false;
4282            continue;
4283        }
4284
4285        match ch {
4286            '\\' if !in_single => escaped = true,
4287            '\'' if !in_double => in_single = !in_single,
4288            '"' if !in_single => in_double = !in_double,
4289            _ => cooked.push(ch),
4290        }
4291    }
4292
4293    (index > start).then_some((index, cooked))
4294}
4295
4296fn skip_command_subst_pending_heredoc(
4297    input: &str,
4298    mut index: usize,
4299    delimiter: &str,
4300    strip_tabs: bool,
4301) -> usize {
4302    while index <= input.len() {
4303        let rest = &input[index..];
4304        let line_len = rest.find('\n').unwrap_or(rest.len());
4305        let line = &rest[..line_len];
4306        let has_newline = line_len < rest.len();
4307
4308        index += line_len;
4309        if has_newline {
4310            index += '\n'.len_utf8();
4311        }
4312
4313        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4314            return index;
4315        }
4316    }
4317
4318    index
4319}
4320
4321fn scan_command_subst_ansi_c_single_quoted_segment(
4322    input: &str,
4323    quote_index: usize,
4324) -> Option<usize> {
4325    let mut index = quote_index + '\''.len_utf8();
4326
4327    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4328        index = next_index;
4329        if ch == '\\' {
4330            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4331                index = escaped_next;
4332            }
4333            continue;
4334        }
4335
4336        if ch == '\'' {
4337            return Some(index);
4338        }
4339    }
4340
4341    None
4342}
4343
4344fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4345    let mut index = start;
4346
4347    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4348        index = next_index;
4349        if ch == '\\' {
4350            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4351                index = escaped_next;
4352            }
4353            continue;
4354        }
4355
4356        if ch == '`' {
4357            return Some(index);
4358        }
4359    }
4360
4361    None
4362}
4363
4364fn flush_scanned_command_subst_keyword(
4365    current_word: &mut String,
4366    pending_case_headers: &mut usize,
4367    case_clause_depths: &mut SmallVec<[usize; 4]>,
4368    depth: usize,
4369    word_started_at_command_start: &mut bool,
4370) {
4371    if current_word.is_empty() {
4372        *word_started_at_command_start = false;
4373        return;
4374    }
4375
4376    match current_word.as_str() {
4377        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4378        "in" if *pending_case_headers > 0 => {
4379            *pending_case_headers -= 1;
4380            case_clause_depths.push(depth);
4381        }
4382        "esac" if *word_started_at_command_start => {
4383            case_clause_depths.pop();
4384        }
4385        _ => {}
4386    }
4387
4388    current_word.clear();
4389    *word_started_at_command_start = false;
4390}
4391
4392fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4393    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4394        return None;
4395    }
4396
4397    let mut index = 0usize;
4398    let mut depth = 1;
4399    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4400    let mut pending_case_headers = 0usize;
4401    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4402    let mut current_word = String::with_capacity(16);
4403    let mut at_command_start = true;
4404    let mut expecting_redirection_target = false;
4405    let mut current_word_started_at_command_start = false;
4406
4407    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4408        match ch {
4409            '#' if hash_starts_comment(input, index) => {
4410                let had_word = !current_word.is_empty();
4411                flush_scanned_command_subst_keyword(
4412                    &mut current_word,
4413                    &mut pending_case_headers,
4414                    &mut case_clause_depths,
4415                    depth,
4416                    &mut current_word_started_at_command_start,
4417                );
4418                if had_word && expecting_redirection_target {
4419                    expecting_redirection_target = false;
4420                }
4421                index = next_index;
4422                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4423                    index = comment_next;
4424                    if comment_ch == '\n' {
4425                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4426                            index = skip_command_subst_pending_heredoc(
4427                                input, index, &delimiter, strip_tabs,
4428                            );
4429                        }
4430                        at_command_start = true;
4431                        expecting_redirection_target = false;
4432                        break;
4433                    }
4434                }
4435            }
4436            '(' => {
4437                flush_scanned_command_subst_keyword(
4438                    &mut current_word,
4439                    &mut pending_case_headers,
4440                    &mut case_clause_depths,
4441                    depth,
4442                    &mut current_word_started_at_command_start,
4443                );
4444                depth += 1;
4445                index = next_index;
4446                at_command_start = true;
4447                expecting_redirection_target = false;
4448            }
4449            ')' => {
4450                flush_scanned_command_subst_keyword(
4451                    &mut current_word,
4452                    &mut pending_case_headers,
4453                    &mut case_clause_depths,
4454                    depth,
4455                    &mut current_word_started_at_command_start,
4456                );
4457                if case_clause_depths
4458                    .last()
4459                    .is_some_and(|case_depth| *case_depth == depth)
4460                {
4461                    index = next_index;
4462                    at_command_start = true;
4463                    expecting_redirection_target = false;
4464                    continue;
4465                }
4466                depth -= 1;
4467                index = next_index;
4468                if depth == 0 {
4469                    return Some(index);
4470                }
4471                at_command_start = false;
4472                expecting_redirection_target = false;
4473            }
4474            '"' => {
4475                let had_word = !current_word.is_empty();
4476                flush_scanned_command_subst_keyword(
4477                    &mut current_word,
4478                    &mut pending_case_headers,
4479                    &mut case_clause_depths,
4480                    depth,
4481                    &mut current_word_started_at_command_start,
4482                );
4483                if had_word && expecting_redirection_target {
4484                    expecting_redirection_target = false;
4485                }
4486                index = scan_double_quoted_command_substitution_segment(
4487                    input,
4488                    next_index,
4489                    subst_depth,
4490                )?;
4491                if expecting_redirection_target {
4492                    expecting_redirection_target = false;
4493                } else {
4494                    at_command_start = false;
4495                }
4496            }
4497            '\'' => {
4498                let had_word = !current_word.is_empty();
4499                flush_scanned_command_subst_keyword(
4500                    &mut current_word,
4501                    &mut pending_case_headers,
4502                    &mut case_clause_depths,
4503                    depth,
4504                    &mut current_word_started_at_command_start,
4505                );
4506                if had_word && expecting_redirection_target {
4507                    expecting_redirection_target = false;
4508                }
4509                index = next_index;
4510                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4511                    index = quoted_next;
4512                    if quoted_ch == '\'' {
4513                        break;
4514                    }
4515                }
4516                if expecting_redirection_target {
4517                    expecting_redirection_target = false;
4518                } else {
4519                    at_command_start = false;
4520                }
4521            }
4522            '`' => {
4523                let had_word = !current_word.is_empty();
4524                flush_scanned_command_subst_keyword(
4525                    &mut current_word,
4526                    &mut pending_case_headers,
4527                    &mut case_clause_depths,
4528                    depth,
4529                    &mut current_word_started_at_command_start,
4530                );
4531                if had_word && expecting_redirection_target {
4532                    expecting_redirection_target = false;
4533                }
4534                index = scan_command_subst_backtick_segment(input, next_index)?;
4535                if expecting_redirection_target {
4536                    expecting_redirection_target = false;
4537                } else {
4538                    at_command_start = false;
4539                }
4540            }
4541            '$' if input[next_index..].starts_with('\'') => {
4542                let had_word = !current_word.is_empty();
4543                flush_scanned_command_subst_keyword(
4544                    &mut current_word,
4545                    &mut pending_case_headers,
4546                    &mut case_clause_depths,
4547                    depth,
4548                    &mut current_word_started_at_command_start,
4549                );
4550                if had_word && expecting_redirection_target {
4551                    expecting_redirection_target = false;
4552                }
4553                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4554                if expecting_redirection_target {
4555                    expecting_redirection_target = false;
4556                } else {
4557                    at_command_start = false;
4558                }
4559            }
4560            '\\' => {
4561                let had_word = !current_word.is_empty();
4562                flush_scanned_command_subst_keyword(
4563                    &mut current_word,
4564                    &mut pending_case_headers,
4565                    &mut case_clause_depths,
4566                    depth,
4567                    &mut current_word_started_at_command_start,
4568                );
4569                if had_word && expecting_redirection_target {
4570                    expecting_redirection_target = false;
4571                }
4572                index = next_index;
4573                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4574                    index = escaped_next;
4575                }
4576                if expecting_redirection_target {
4577                    expecting_redirection_target = false;
4578                } else {
4579                    at_command_start = false;
4580                }
4581            }
4582            '>' => {
4583                let word_was_redirection_fd = current_word_started_at_command_start
4584                    && !current_word.is_empty()
4585                    && current_word.chars().all(|current| current.is_ascii_digit());
4586                flush_scanned_command_subst_keyword(
4587                    &mut current_word,
4588                    &mut pending_case_headers,
4589                    &mut case_clause_depths,
4590                    depth,
4591                    &mut current_word_started_at_command_start,
4592                );
4593                if word_was_redirection_fd {
4594                    at_command_start = true;
4595                }
4596                index = next_index;
4597                expecting_redirection_target = true;
4598            }
4599            '<' if input[next_index..].starts_with('<') => {
4600                let word_was_redirection_fd = current_word_started_at_command_start
4601                    && !current_word.is_empty()
4602                    && current_word.chars().all(|current| current.is_ascii_digit());
4603                let had_word = !current_word.is_empty();
4604                flush_scanned_command_subst_keyword(
4605                    &mut current_word,
4606                    &mut pending_case_headers,
4607                    &mut case_clause_depths,
4608                    depth,
4609                    &mut current_word_started_at_command_start,
4610                );
4611                if had_word && expecting_redirection_target {
4612                    expecting_redirection_target = false;
4613                }
4614                if word_was_redirection_fd {
4615                    at_command_start = true;
4616                }
4617                if inside_unclosed_double_paren_on_line(input, index) {
4618                    index = next_index + '<'.len_utf8();
4619                    continue;
4620                }
4621
4622                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4623                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4624                    expecting_redirection_target = true;
4625                    continue;
4626                }
4627
4628                let strip_tabs = input[next_index..].starts_with("<-");
4629                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4630                if let Some((delimiter_index, delimiter)) =
4631                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4632                {
4633                    pending_heredocs.push((delimiter, strip_tabs));
4634                    index = delimiter_index;
4635                    expecting_redirection_target = false;
4636                } else {
4637                    index = next_index;
4638                    expecting_redirection_target = true;
4639                }
4640            }
4641            '\n' => {
4642                flush_scanned_command_subst_keyword(
4643                    &mut current_word,
4644                    &mut pending_case_headers,
4645                    &mut case_clause_depths,
4646                    depth,
4647                    &mut current_word_started_at_command_start,
4648                );
4649                index = next_index;
4650                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4651                    index =
4652                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4653                }
4654                at_command_start = true;
4655                expecting_redirection_target = false;
4656            }
4657            '$' if input[next_index..].starts_with('{') => {
4658                let had_word = !current_word.is_empty();
4659                flush_scanned_command_subst_keyword(
4660                    &mut current_word,
4661                    &mut pending_case_headers,
4662                    &mut case_clause_depths,
4663                    depth,
4664                    &mut current_word_started_at_command_start,
4665                );
4666                if had_word && expecting_redirection_target {
4667                    expecting_redirection_target = false;
4668                }
4669                let consumed = scan_command_subst_parameter_expansion_len(
4670                    &input[next_index + '{'.len_utf8()..],
4671                    subst_depth,
4672                )?;
4673                index = next_index + '{'.len_utf8() + consumed;
4674                if expecting_redirection_target {
4675                    expecting_redirection_target = false;
4676                } else {
4677                    at_command_start = false;
4678                }
4679            }
4680            '$' if input[next_index..].starts_with('(')
4681                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4682            {
4683                let had_word = !current_word.is_empty();
4684                flush_scanned_command_subst_keyword(
4685                    &mut current_word,
4686                    &mut pending_case_headers,
4687                    &mut case_clause_depths,
4688                    depth,
4689                    &mut current_word_started_at_command_start,
4690                );
4691                if had_word && expecting_redirection_target {
4692                    expecting_redirection_target = false;
4693                }
4694                let consumed = scan_command_substitution_body_len_inner(
4695                    &input[next_index + '('.len_utf8()..],
4696                    subst_depth + 1,
4697                )?;
4698                index = next_index + '('.len_utf8() + consumed;
4699                if expecting_redirection_target {
4700                    expecting_redirection_target = false;
4701                } else {
4702                    at_command_start = false;
4703                }
4704            }
4705            _ => {
4706                if ch.is_ascii_alphanumeric() || ch == '_' {
4707                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4708                    {
4709                        current_word_started_at_command_start = true;
4710                        at_command_start = false;
4711                    }
4712                    current_word.push(ch);
4713                } else {
4714                    let had_word = !current_word.is_empty();
4715                    flush_scanned_command_subst_keyword(
4716                        &mut current_word,
4717                        &mut pending_case_headers,
4718                        &mut case_clause_depths,
4719                        depth,
4720                        &mut current_word_started_at_command_start,
4721                    );
4722                    if had_word && expecting_redirection_target {
4723                        expecting_redirection_target = false;
4724                    }
4725                    match ch {
4726                        ' ' | '\t' => {}
4727                        ';' | '|' | '&' => {
4728                            at_command_start = true;
4729                            expecting_redirection_target = false;
4730                        }
4731                        _ => {
4732                            if !expecting_redirection_target {
4733                                at_command_start = false;
4734                            }
4735                        }
4736                    }
4737                }
4738                index = next_index;
4739            }
4740        }
4741    }
4742
4743    None
4744}
4745
4746pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4747    scan_command_substitution_body_len_inner(input, 0)
4748}
4749
4750#[cfg(test)]
4751mod tests {
4752    use super::*;
4753
4754    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4755        match token.kind {
4756            kind if kind.is_word_like() => token.word_string(),
4757            TokenKind::Comment => token
4758                .span
4759                .slice(source)
4760                .strip_prefix('#')
4761                .map(str::to_string),
4762            TokenKind::Error => token
4763                .error_kind()
4764                .map(LexerErrorKind::message)
4765                .map(str::to_string),
4766            _ => None,
4767        }
4768    }
4769
4770    fn assert_next_token(
4771        lexer: &mut Lexer<'_>,
4772        expected_kind: TokenKind,
4773        expected_text: Option<&str>,
4774    ) {
4775        let token = lexer.next_lexed_token().unwrap();
4776        assert_eq!(token.kind, expected_kind);
4777        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4778    }
4779
4780    fn assert_next_token_with_comments(
4781        lexer: &mut Lexer<'_>,
4782        expected_kind: TokenKind,
4783        expected_text: Option<&str>,
4784    ) {
4785        let token = lexer.next_lexed_token_with_comments().unwrap();
4786        assert_eq!(token.kind, expected_kind);
4787        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4788    }
4789
4790    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4791        let mut lexer = Lexer::new(input);
4792
4793        while let Some(token) = lexer.next_lexed_token() {
4794            if token.kind == TokenKind::Newline {
4795                continue;
4796            }
4797
4798            assert_eq!(
4799                token.span.start.line, token.span.end.line,
4800                "token should stay on one line: {:?}",
4801                token
4802            );
4803        }
4804    }
4805
4806    #[test]
4807    fn test_simple_words() {
4808        let mut lexer = Lexer::new("echo hello world");
4809
4810        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4811        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4812        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4813        assert!(lexer.next_lexed_token().is_none());
4814    }
4815
4816    #[test]
4817    fn test_single_quoted_string() {
4818        let mut lexer = Lexer::new("echo 'hello world'");
4819
4820        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4821        // Single-quoted strings return LiteralWord (no variable expansion)
4822        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4823        assert!(lexer.next_lexed_token().is_none());
4824    }
4825
4826    #[test]
4827    fn test_double_quoted_string() {
4828        let mut lexer = Lexer::new("echo \"hello world\"");
4829
4830        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4831        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4832        assert!(lexer.next_lexed_token().is_none());
4833    }
4834
4835    #[test]
4836    fn test_double_quoted_expansion_token_keeps_source_backing() {
4837        let source = r#""$bar""#;
4838        let mut lexer = Lexer::new(source);
4839
4840        let token = lexer.next_lexed_token().unwrap();
4841        assert_eq!(token.kind, TokenKind::QuotedWord);
4842        assert_eq!(token.word_text(), Some("$bar"));
4843
4844        let word = token.word().unwrap();
4845        let segment = word.single_segment().unwrap();
4846        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4847        assert_eq!(segment.span().unwrap().slice(source), "$bar");
4848    }
4849
4850    #[test]
4851    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4852        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4853        let mut lexer = Lexer::new(source);
4854
4855        let token = lexer.next_lexed_token().unwrap();
4856        assert_eq!(token.kind, TokenKind::QuotedWord);
4857        assert_eq!(
4858            token.word_text(),
4859            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4860        );
4861    }
4862
4863    #[test]
4864    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4865        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4866        let mut lexer = Lexer::new(source);
4867
4868        let token = lexer.next_lexed_token().unwrap();
4869        assert_eq!(token.kind, TokenKind::QuotedWord);
4870        assert_eq!(
4871            token.word_text(),
4872            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4873        );
4874    }
4875
4876    #[test]
4877    fn test_mixed_word_keeps_segment_kinds() {
4878        let source = r#"foo"bar"'baz'"#;
4879        let mut lexer = Lexer::new(source);
4880
4881        let token = lexer.next_lexed_token().unwrap();
4882        assert_eq!(token.kind, TokenKind::Word);
4883
4884        let word = token.word().unwrap();
4885        let segments: Vec<_> = word
4886            .segments()
4887            .map(|segment| (segment.kind(), segment.as_str().to_string()))
4888            .collect();
4889
4890        assert_eq!(
4891            segments,
4892            vec![
4893                (LexedWordSegmentKind::Plain, "foo".to_string()),
4894                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4895                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4896            ]
4897        );
4898        assert_eq!(word.joined_text(), "foobarbaz");
4899        assert_eq!(
4900            word.segments()
4901                .next()
4902                .and_then(LexedWordSegment::span)
4903                .unwrap()
4904                .slice(source),
4905            "foo"
4906        );
4907    }
4908
4909    #[test]
4910    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
4911        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
4912
4913        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4914        let body = &source[..consumed];
4915
4916        assert!(body.contains("field, direction"));
4917        assert!(body.ends_with(')'));
4918    }
4919
4920    #[test]
4921    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
4922        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
4923
4924        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4925        let body = &source[..consumed];
4926
4927        assert!(body.contains("printf '%s' y"));
4928        assert!(body.ends_with(')'));
4929    }
4930
4931    #[test]
4932    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
4933        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
4934
4935        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4936        let body = &source[..consumed];
4937
4938        assert!(body.contains("printf %s 1,2"));
4939        assert!(body.ends_with(')'));
4940    }
4941
4942    #[test]
4943    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
4944        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
4945
4946        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4947        let body = &source[..consumed];
4948
4949        assert!(body.contains("field, direction"));
4950        assert!(body.ends_with(')'));
4951    }
4952
4953    #[test]
4954    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
4955        let source = "printf %s ${x//foo/)},1)\"";
4956
4957        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4958        let body = &source[..consumed];
4959
4960        assert!(body.contains("${x//foo/)},1"));
4961        assert!(body.ends_with(')'));
4962    }
4963
4964    #[test]
4965    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
4966        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
4967
4968        let consumed = scan_command_substitution_body_len(source).expect("expected match");
4969        let body = &source[..consumed];
4970
4971        assert!(body.contains("printf %s 1,2"));
4972        assert!(body.ends_with(')'));
4973    }
4974
4975    #[test]
4976    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
4977        let source = "[[ \"$buf\" == (#b)(*) ]]";
4978        let index = source.find('#').expect("expected hash");
4979
4980        assert!(!hash_starts_comment(source, index));
4981    }
4982
4983    #[test]
4984    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
4985        let source = "(#comment with )";
4986        let index = source.find('#').expect("expected hash");
4987
4988        assert!(hash_starts_comment(source, index));
4989    }
4990
4991    #[test]
4992    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
4993        let source = "(( #c < 256 ))";
4994        let index = source.find('#').expect("expected hash");
4995
4996        assert!(!hash_starts_comment(source, index));
4997    }
4998
4999    #[test]
5000    fn test_hash_starts_comment_respects_quoted_double_parens() {
5001        let source = "printf '((' # comment";
5002        let index = source.find('#').expect("expected hash");
5003
5004        assert!(hash_starts_comment(source, index));
5005    }
5006
5007    #[test]
5008    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5009        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5010
5011        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5012        let body = &source[..consumed];
5013
5014        assert!(body.contains("printf %s 1,2"));
5015        assert!(body.ends_with(')'));
5016    }
5017
5018    #[test]
5019    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5020        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5021
5022        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5023        let body = &source[..consumed];
5024
5025        assert!(body.contains("printf %s 1,2"));
5026        assert!(body.ends_with(')'));
5027    }
5028
5029    #[test]
5030    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5031        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5032
5033        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5034        let body = &source[..consumed];
5035
5036        assert!(body.contains("printf %s 1,2"));
5037        assert!(body.ends_with(')'));
5038    }
5039
5040    #[test]
5041    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5042        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5043
5044        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5045        let body = &source[..consumed];
5046
5047        assert!(body.contains("printf %s 1,2"));
5048        assert!(body.ends_with("))"));
5049    }
5050
5051    #[test]
5052    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5053        let source = "printf %s 1,2; echo case in)\"";
5054
5055        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5056        let body = &source[..consumed];
5057
5058        assert!(body.contains("echo case in"));
5059        assert!(body.ends_with(')'));
5060    }
5061
5062    #[test]
5063    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5064        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5065
5066        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5067        let body = &source[..consumed];
5068
5069        assert!(body.contains("$'a\\'b'"));
5070        assert!(body.contains("printf %s 1,2"));
5071        assert!(body.ends_with(')'));
5072    }
5073
5074    #[test]
5075    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5076        let source = "printf %s `echo foo)`; printf %s ok)\"";
5077
5078        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5079        let body = &source[..consumed];
5080
5081        assert!(body.contains("`echo foo)`"));
5082        assert!(body.contains("printf %s ok"));
5083        assert!(body.ends_with(')'));
5084    }
5085
5086    #[test]
5087    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5088        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5089
5090        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5091        let body = &source[..consumed];
5092
5093        assert!(body.contains("${x/`echo }`/foo)},1"));
5094        assert!(body.ends_with(')'));
5095    }
5096
5097    #[test]
5098    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5099     {
5100        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5101
5102        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5103        let body = &source[..consumed];
5104
5105        assert!(body.contains("${x/<(echo })/foo)},1"));
5106        assert!(body.ends_with(')'));
5107    }
5108
5109    #[test]
5110    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5111        let source = "printf %s 1,2; echo case in)";
5112
5113        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5114        let body = &source[..consumed];
5115
5116        assert_eq!(body, source);
5117    }
5118
5119    #[test]
5120    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5121        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5122
5123        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5124        let body = &source[..consumed];
5125
5126        assert_eq!(body, source);
5127    }
5128
5129    #[test]
5130    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5131        let source = "printf %s `echo foo)`; printf %s ok)";
5132
5133        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5134        let body = &source[..consumed];
5135
5136        assert_eq!(body, source);
5137    }
5138
5139    #[test]
5140    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5141        let source = "echo \"$line\" | cut -d' ' -f2-)";
5142
5143        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5144        let body = &source[..consumed];
5145
5146        assert_eq!(body, source);
5147    }
5148
5149    #[test]
5150    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5151        let source = "echo \"${@}\" | tr -d '[:space:]')";
5152
5153        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5154        let body = &source[..consumed];
5155
5156        assert_eq!(body, source);
5157    }
5158
5159    #[test]
5160    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5161        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5162
5163        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5164        let body = &source[..consumed];
5165
5166        assert_eq!(body, source);
5167    }
5168
5169    #[test]
5170    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5171        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5172
5173        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5174        let body = &source[..consumed];
5175
5176        assert_eq!(body, source);
5177    }
5178
5179    #[test]
5180    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5181        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5182        let mut lexer = Lexer::new(source);
5183
5184        let first = lexer.next_lexed_token().expect("expected first token");
5185        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5186        assert_eq!(first.word_string().as_deref(), Some("echo"));
5187
5188        let second = lexer.next_lexed_token().expect("expected second token");
5189        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5190        assert_eq!(
5191            second.word_string().as_deref(),
5192            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5193        );
5194    }
5195
5196    #[test]
5197    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5198        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5199        let start = source.find("$(").expect("expected command substitution") + 2;
5200        let consumed =
5201            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5202        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5203    }
5204
5205    #[test]
5206    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5207        let source = "echo $(echo $(basename $filename .fuzz))";
5208        let start = source.find("$(").expect("expected command substitution") + 2;
5209        let consumed =
5210            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5211        assert_eq!(
5212            &source[start..start + consumed],
5213            "echo $(basename $filename .fuzz))"
5214        );
5215    }
5216
5217    #[test]
5218    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5219        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5220        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5221        assert_eq!(consumed, source.len());
5222    }
5223
5224    #[test]
5225    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5226        let source = "'foo'bar";
5227        let mut lexer = Lexer::new(source);
5228
5229        let token = lexer.next_lexed_token().unwrap();
5230        assert_eq!(token.kind, TokenKind::LiteralWord);
5231
5232        let word = token.word().unwrap();
5233        let segments: Vec<_> = word
5234            .segments()
5235            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5236            .collect();
5237
5238        assert_eq!(
5239            segments,
5240            vec![
5241                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5242                (LexedWordSegmentKind::Plain, "bar".to_string()),
5243            ]
5244        );
5245        assert_eq!(word.joined_text(), "foobar");
5246        assert_eq!(
5247            word.segments()
5248                .nth(1)
5249                .and_then(LexedWordSegment::span)
5250                .unwrap()
5251                .slice(source),
5252            "bar"
5253        );
5254    }
5255
5256    #[test]
5257    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5258        let source = "$(printf hi)";
5259        let mut lexer = Lexer::new(source);
5260
5261        let token = lexer.next_lexed_token().unwrap();
5262        assert_eq!(token.kind, TokenKind::Word);
5263
5264        let word = token.word().unwrap();
5265        let segment = word.single_segment().unwrap();
5266        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5267        assert_eq!(segment.as_str(), source);
5268        assert_eq!(segment.span().unwrap().slice(source), source);
5269    }
5270
5271    #[test]
5272    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5273        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5274        let mut lexer = Lexer::new(source);
5275
5276        let token = lexer.next_lexed_token().unwrap();
5277        assert_eq!(token.kind, TokenKind::Word);
5278
5279        let word = token.word().unwrap();
5280        let segment = word.single_segment().unwrap();
5281        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5282        assert_eq!(segment.as_str(), source);
5283        assert_eq!(segment.span().unwrap().slice(source), source);
5284    }
5285
5286    #[test]
5287    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5288        let source = "\"foo\"$(printf hi)";
5289        let mut lexer = Lexer::new(source);
5290
5291        let token = lexer.next_lexed_token().unwrap();
5292        assert_eq!(token.kind, TokenKind::Word);
5293
5294        let word = token.word().unwrap();
5295        let continuation = word.segments().nth(1).unwrap();
5296        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5297        assert_eq!(continuation.as_str(), "$(printf hi)");
5298        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5299    }
5300
5301    #[test]
5302    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5303        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5304        let mut lexer = Lexer::new(source);
5305
5306        let token = lexer.next_lexed_token().unwrap();
5307        assert_eq!(token.kind, TokenKind::QuotedWord);
5308
5309        let word = token.word().unwrap();
5310        let segment = word.single_segment().unwrap();
5311        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5312        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5313        assert_eq!(
5314            segment.span().unwrap().slice(source),
5315            "${arr[$RANDOM % ${#arr[@]}]}"
5316        );
5317    }
5318
5319    #[test]
5320    fn test_ansi_c_control_escape_can_consume_quote() {
5321        let mut lexer = Lexer::new("echo $'\\c''");
5322
5323        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5324        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5325        assert!(lexer.next_lexed_token().is_none());
5326    }
5327
5328    #[test]
5329    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5330        let source = r#"out_line="${out_line//'"'/'\"'}"
5331"#;
5332        let mut lexer = Lexer::new(source);
5333
5334        assert_next_token(
5335            &mut lexer,
5336            TokenKind::Word,
5337            Some(r#"out_line=${out_line//'"'/'"'}"#),
5338        );
5339        assert_next_token(&mut lexer, TokenKind::Newline, None);
5340        assert!(lexer.next_lexed_token().is_none());
5341    }
5342
5343    #[test]
5344    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5345        let source = r#"out_line="${out_line//'"'/'\"'}"
5346echo "Error: Missing python3!"
5347cat << 'EOF' > "${pywrapper}"
5348import os
5349EOF
5350"#;
5351        let mut lexer = Lexer::new(source);
5352
5353        assert_next_token(
5354            &mut lexer,
5355            TokenKind::Word,
5356            Some(r#"out_line=${out_line//'"'/'"'}"#),
5357        );
5358        assert_next_token(&mut lexer, TokenKind::Newline, None);
5359        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5360        assert_next_token(
5361            &mut lexer,
5362            TokenKind::QuotedWord,
5363            Some("Error: Missing python3!"),
5364        );
5365        assert_next_token(&mut lexer, TokenKind::Newline, None);
5366        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5367        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5368        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5369        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5370        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5371    }
5372
5373    #[test]
5374    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5375        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5376        let mut lexer = Lexer::new(source);
5377
5378        let token = lexer.next_lexed_token().unwrap();
5379        assert_eq!(token.kind, TokenKind::Word);
5380        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5381        assert!(token.source_slice(source).is_none());
5382        assert_eq!(
5383            token.word_string().as_deref(),
5384            Some("crypt=${crypt//\\/\\\\}")
5385        );
5386        assert_next_token(&mut lexer, TokenKind::Newline, None);
5387        assert!(lexer.next_lexed_token().is_none());
5388    }
5389
5390    #[test]
5391    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5392        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5393        let mut lexer = Lexer::new(source);
5394
5395        assert_next_token(
5396            &mut lexer,
5397            TokenKind::Word,
5398            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5399        );
5400        assert_next_token(&mut lexer, TokenKind::Newline, None);
5401        assert_next_token(&mut lexer, TokenKind::Newline, None);
5402        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5403        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5404        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5405        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5406        assert_next_token(&mut lexer, TokenKind::Newline, None);
5407        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5408        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5409        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5410        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5411        assert_next_token(&mut lexer, TokenKind::Newline, None);
5412        assert_next_token(
5413            &mut lexer,
5414            TokenKind::Word,
5415            Some(
5416                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5417            ),
5418        );
5419        assert_next_token(&mut lexer, TokenKind::Newline, None);
5420        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5421        assert_next_token(&mut lexer, TokenKind::Newline, None);
5422        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5423        assert_next_token(&mut lexer, TokenKind::Newline, None);
5424        assert!(lexer.next_lexed_token().is_none());
5425    }
5426
5427    #[test]
5428    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5429        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5430        let mut lexer = Lexer::new(source);
5431
5432        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5433        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5434        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5435        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5436        assert_next_token(&mut lexer, TokenKind::And, None);
5437        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5438        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5439        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5440        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5441        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5442        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5443        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5444        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5445        assert_next_token(&mut lexer, TokenKind::Newline, None);
5446        assert!(lexer.next_lexed_token().is_none());
5447    }
5448
5449    #[test]
5450    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5451        let source = "echo -{$(echo a),b}-\n";
5452        let mut lexer = Lexer::new(source);
5453
5454        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5455        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5456        assert_next_token(&mut lexer, TokenKind::Newline, None);
5457        assert!(lexer.next_lexed_token().is_none());
5458    }
5459
5460    #[test]
5461    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5462        let source = "echo -{$((1 + 2)),b}-\n";
5463        let mut lexer = Lexer::new(source);
5464
5465        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5466        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5467        assert_next_token(&mut lexer, TokenKind::Newline, None);
5468        assert!(lexer.next_lexed_token().is_none());
5469    }
5470
5471    #[test]
5472    fn test_operators() {
5473        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5474
5475        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5476        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5477        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5478        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5479        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5480        assert_next_token(&mut lexer, TokenKind::And, None);
5481        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5482        assert_next_token(&mut lexer, TokenKind::Or, None);
5483        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5484        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5485        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5486        assert_next_token(&mut lexer, TokenKind::Background, None);
5487        assert!(lexer.next_lexed_token().is_none());
5488    }
5489
5490    #[test]
5491    fn test_double_left_bracket_requires_separator() {
5492        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5493
5494        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5495        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5496        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5497        assert_next_token(&mut lexer, TokenKind::Newline, None);
5498        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5499        assert_next_token(&mut lexer, TokenKind::Newline, None);
5500        assert!(lexer.next_lexed_token().is_none());
5501    }
5502
5503    #[test]
5504    fn test_redirects() {
5505        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5506
5507        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5508        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5509        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5510        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5511        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5512        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5513        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5514        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5515        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5516        let token = lexer.next_lexed_token().unwrap();
5517        assert_eq!(token.kind, TokenKind::Clobber);
5518        assert_eq!(token.fd_value(), Some(2));
5519        assert_eq!(token_text(&token, lexer.input), None);
5520        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5521        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5522        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5523        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5524        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5525        assert_next_token(&mut lexer, TokenKind::HereString, None);
5526        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5527        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5528        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5529        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5530        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5531    }
5532
5533    #[test]
5534    fn test_comment() {
5535        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5536
5537        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5538        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5539        assert_next_token(&mut lexer, TokenKind::Newline, None);
5540        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5541        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5542    }
5543
5544    #[test]
5545    fn test_comment_token_with_span() {
5546        let mut lexer = Lexer::new("# lead\necho hi # tail");
5547
5548        let comment = lexer.next_lexed_token_with_comments().unwrap();
5549        assert_eq!(comment.kind, TokenKind::Comment);
5550        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5551        assert_eq!(comment.span.start.line, 1);
5552        assert_eq!(comment.span.start.column, 1);
5553        assert_eq!(comment.span.end.line, 1);
5554        assert_eq!(comment.span.end.column, 7);
5555
5556        assert_next_token(&mut lexer, TokenKind::Newline, None);
5557        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5558        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5559
5560        let inline = lexer.next_lexed_token_with_comments().unwrap();
5561        assert_eq!(inline.kind, TokenKind::Comment);
5562        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5563        assert_eq!(inline.span.start.line, 2);
5564        assert_eq!(inline.span.start.column, 9);
5565    }
5566
5567    #[test]
5568    fn test_comment_token_preserves_hash_boundaries() {
5569        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5570
5571        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5572        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5573        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5574        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5575        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5576        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5577        assert!(lexer.next_lexed_token_with_comments().is_none());
5578    }
5579
5580    #[test]
5581    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5582        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5583
5584        let mut saw_comment = false;
5585        while let Some(token) = lexer.next_lexed_token_with_comments() {
5586            if token.kind == TokenKind::Comment {
5587                saw_comment = true;
5588                break;
5589            }
5590        }
5591
5592        assert!(
5593            !saw_comment,
5594            "zsh inline glob controls inside [[ ]] should not lex as comments"
5595        );
5596    }
5597
5598    #[test]
5599    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5600        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5601
5602        let mut saw_comment = false;
5603        while let Some(token) = lexer.next_lexed_token_with_comments() {
5604            if token.kind == TokenKind::Comment {
5605                saw_comment = true;
5606                break;
5607            }
5608        }
5609
5610        assert!(
5611            !saw_comment,
5612            "zsh arithmetic char literals inside (( )) should not lex as comments"
5613        );
5614    }
5615
5616    #[test]
5617    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5618        let mut lexer = Lexer::new(
5619            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5620        );
5621
5622        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5623        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5624        assert_next_token(
5625            &mut lexer,
5626            TokenKind::LiteralWord,
5627            Some("\\e]133;C;cmdline_url=%s\\a"),
5628        );
5629        assert_next_token(
5630            &mut lexer,
5631            TokenKind::QuotedWord,
5632            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5633        );
5634        assert_next_token(&mut lexer, TokenKind::Newline, None);
5635    }
5636
5637    #[test]
5638    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5639        let mut lexer = Lexer::new(
5640            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5641        );
5642
5643        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5644        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5645        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5646        assert_next_token(&mut lexer, TokenKind::Newline, None);
5647        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5648        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5649        assert_next_token(
5650            &mut lexer,
5651            TokenKind::LiteralWord,
5652            Some("\\e]133;C;cmdline_url=%s\\a"),
5653        );
5654        assert_next_token(
5655            &mut lexer,
5656            TokenKind::QuotedWord,
5657            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5658        );
5659        assert_next_token(&mut lexer, TokenKind::Newline, None);
5660        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5661        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5662        assert_next_token(&mut lexer, TokenKind::Newline, None);
5663    }
5664
5665    #[test]
5666    fn test_variable_words() {
5667        let mut lexer = Lexer::new("echo $HOME $USER");
5668
5669        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5670        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5671        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5672        assert!(lexer.next_lexed_token().is_none());
5673    }
5674
5675    #[test]
5676    fn test_pipeline_tokens() {
5677        let mut lexer = Lexer::new("echo hello | cat");
5678
5679        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5680        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5681        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5682        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5683        assert!(lexer.next_lexed_token().is_none());
5684    }
5685
5686    #[test]
5687    fn test_read_heredoc() {
5688        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5689        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5690        let content = lexer.read_heredoc("EOF", false);
5691        assert_eq!(content.content, "hello\nworld\n");
5692    }
5693
5694    #[test]
5695    fn test_read_heredoc_single_line() {
5696        let mut lexer = Lexer::new("\ntest\nEOF");
5697        let content = lexer.read_heredoc("EOF", false);
5698        assert_eq!(content.content, "test\n");
5699    }
5700
5701    #[test]
5702    fn test_read_heredoc_full_scenario() {
5703        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5704        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5705
5706        // Parser would read these tokens
5707        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5708        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5709        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5710
5711        // Now read heredoc content
5712        let content = lexer.read_heredoc("EOF", false);
5713        assert_eq!(content.content, "hello\nworld\n");
5714    }
5715
5716    #[test]
5717    fn test_read_heredoc_with_redirect() {
5718        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5719        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5720        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5721        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5722        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5723        let content = lexer.read_heredoc("EOF", false);
5724        assert_eq!(content.content, "hello\n");
5725        // The redirect tokens are now available from the lexer
5726        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5727        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5728    }
5729
5730    #[test]
5731    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5732        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
5733        let mut lexer = Lexer::new(source);
5734
5735        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5736        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5737        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5738
5739        let heredoc = lexer.read_heredoc("EOF", false);
5740        assert_eq!(heredoc.content, "hello\n");
5741
5742        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5743        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5744        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5745        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5746        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5747        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5748        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5749    }
5750
5751    #[test]
5752    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5753        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5754        let mut lexer = Lexer::new(source);
5755
5756        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5757        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5758        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5759
5760        let heredoc = lexer.read_heredoc("EOF", false);
5761        assert_eq!(heredoc.content, "1\n2\n3\n");
5762    }
5763
5764    #[test]
5765    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5766        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5767        let mut lexer = Lexer::new(source);
5768
5769        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5770        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5771        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5772
5773        let heredoc = lexer.read_heredoc("EOF", false);
5774        assert_eq!(heredoc.content, "body\n");
5775    }
5776
5777    #[test]
5778    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5779        let source = "cat <<EOF # note \\\nbody\nEOF\n";
5780        let mut lexer = Lexer::new(source);
5781
5782        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5783        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5784        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5785
5786        let heredoc = lexer.read_heredoc("EOF", false);
5787        assert_eq!(heredoc.content, "body\n");
5788    }
5789
5790    #[test]
5791    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5792        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5793        let mut lexer = Lexer::new(source);
5794
5795        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5796        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5797        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5798        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5799
5800        let heredoc = lexer.read_heredoc("EOF", false);
5801        assert_eq!(heredoc.content, "body\n");
5802
5803        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5804    }
5805
5806    #[test]
5807    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5808        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5809        let mut lexer = Lexer::new(source);
5810
5811        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5812        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5813        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5814
5815        let heredoc = lexer.read_heredoc("EOF", false);
5816        assert_eq!(heredoc.content, "1\n");
5817
5818        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5819        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5820    }
5821
5822    #[test]
5823    fn test_read_heredoc_with_redirect_preserves_following_spans() {
5824        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5825        let mut lexer = Lexer::new(source);
5826
5827        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5828        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5829        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5830
5831        let heredoc = lexer.read_heredoc("EOF", false);
5832        assert_eq!(heredoc.content, "hello\n");
5833
5834        let redirect = lexer.next_lexed_token_with_comments().unwrap();
5835        assert_eq!(redirect.kind, TokenKind::RedirectOut);
5836        assert_eq!(redirect.span.slice(source), ">");
5837
5838        let target = lexer.next_lexed_token_with_comments().unwrap();
5839        assert_eq!(target.kind, TokenKind::Word);
5840        assert_eq!(
5841            token_text(&target, lexer.input).as_deref(),
5842            Some("file.txt")
5843        );
5844        assert_eq!(target.span.slice(source), "file.txt");
5845
5846        let newline = lexer.next_lexed_token_with_comments().unwrap();
5847        assert_eq!(newline.kind, TokenKind::Newline);
5848        assert_eq!(newline.span.slice(source), "\n");
5849
5850        let comment = lexer.next_lexed_token_with_comments().unwrap();
5851        assert_eq!(comment.kind, TokenKind::Comment);
5852        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5853        assert_eq!(comment.span.slice(source), "# done");
5854    }
5855
5856    #[test]
5857    fn test_comment_with_unicode() {
5858        // Comment containing multi-byte UTF-8 characters
5859        let source = "# café résumé\necho ok";
5860        let mut lexer = Lexer::new(source);
5861
5862        let comment = lexer.next_lexed_token_with_comments().unwrap();
5863        assert_eq!(comment.kind, TokenKind::Comment);
5864        assert_eq!(
5865            token_text(&comment, lexer.input).as_deref(),
5866            Some(" café résumé")
5867        );
5868        // Span should cover exactly the comment bytes (including #)
5869        let start = comment.span.start.offset;
5870        let end = comment.span.end.offset;
5871        assert_eq!(start, 0);
5872        assert_eq!(&source[start..end], "# café résumé");
5873        assert!(source.is_char_boundary(start));
5874        assert!(source.is_char_boundary(end));
5875
5876        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5877        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5878    }
5879
5880    #[test]
5881    fn test_comment_with_cjk_characters() {
5882        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
5883        let source = "# 你好世界\necho ok";
5884        let mut lexer = Lexer::new(source);
5885
5886        let comment = lexer.next_lexed_token_with_comments().unwrap();
5887        assert_eq!(comment.kind, TokenKind::Comment);
5888        assert_eq!(
5889            token_text(&comment, lexer.input).as_deref(),
5890            Some(" 你好世界")
5891        );
5892        let start = comment.span.start.offset;
5893        let end = comment.span.end.offset;
5894        assert_eq!(&source[start..end], "# 你好世界");
5895        assert!(source.is_char_boundary(start));
5896        assert!(source.is_char_boundary(end));
5897    }
5898
5899    #[test]
5900    fn test_heredoc_with_comments_inside() {
5901        // Comments inside heredoc body should NOT appear as comment tokens
5902        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
5903        let mut lexer = Lexer::new(source);
5904
5905        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5906        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5907        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5908
5909        let heredoc = lexer.read_heredoc("EOF", false);
5910        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
5911
5912        // After heredoc, replayed line termination should appear before
5913        // tokens from following source lines.
5914        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5915        let comment = lexer.next_lexed_token_with_comments().unwrap();
5916        assert_eq!(comment.kind, TokenKind::Comment);
5917        assert_eq!(
5918            token_text(&comment, lexer.input).as_deref(),
5919            Some(" real comment")
5920        );
5921    }
5922
5923    #[test]
5924    fn test_heredoc_with_hash_in_variable() {
5925        // ${var#pattern} inside heredoc should not produce comment tokens
5926        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
5927        let mut lexer = Lexer::new(source);
5928
5929        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5930        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5931        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5932
5933        let heredoc = lexer.read_heredoc("EOF", false);
5934        assert_eq!(heredoc.content, "val=${x#prefix}\n");
5935    }
5936
5937    #[test]
5938    fn test_heredoc_span_does_not_leak() {
5939        // Heredoc content span must be within source bounds and must not
5940        // overlap with content before or after.
5941        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
5942        let mut lexer = Lexer::new(source);
5943
5944        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5945        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5946        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5947
5948        let heredoc = lexer.read_heredoc("EOF", false);
5949        let start = heredoc.content_span.start.offset;
5950        let end = heredoc.content_span.end.offset;
5951        assert!(
5952            end <= source.len(),
5953            "heredoc span end ({end}) exceeds source length ({})",
5954            source.len()
5955        );
5956        assert_eq!(&source[start..end], "hello\nworld\n");
5957
5958        // Tokens after heredoc should still parse correctly
5959        assert_next_token(&mut lexer, TokenKind::Newline, None);
5960        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5961        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
5962    }
5963
5964    #[test]
5965    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
5966        let source = "\
5967cat <<\\_ACEOF
5968Use these variables to override the choices made by `configure' or to help
5969it to find libraries and programs with nonstandard names/locations.
5970_ACEOF
5971ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
5972ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
5973";
5974        let mut lexer = Lexer::new(source);
5975
5976        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5977        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5978        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
5979        assert_eq!(delimiter.kind, TokenKind::Word);
5980        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
5981
5982        let heredoc = lexer.read_heredoc("_ACEOF", false);
5983        assert_eq!(
5984            heredoc.content,
5985            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
5986        );
5987
5988        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5989
5990        let first = lexer.next_lexed_token_with_comments().unwrap();
5991        assert_eq!(first.kind, TokenKind::Word);
5992        assert_eq!(
5993            first.span.slice(source),
5994            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
5995        );
5996        let first_segments = first
5997            .word()
5998            .unwrap()
5999            .segments()
6000            .map(|segment| {
6001                (
6002                    segment.kind(),
6003                    segment.as_str().to_string(),
6004                    segment.span().map(|span| span.slice(source).to_string()),
6005                )
6006            })
6007            .collect::<Vec<_>>();
6008        assert_eq!(
6009            first_segments,
6010            vec![
6011                (
6012                    LexedWordSegmentKind::Plain,
6013                    "ac_dir_suffix=/".to_string(),
6014                    Some("ac_dir_suffix=/".to_string()),
6015                ),
6016                (
6017                    LexedWordSegmentKind::Plain,
6018                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6019                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6020                ),
6021            ]
6022        );
6023
6024        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6025
6026        let second = lexer.next_lexed_token_with_comments().unwrap();
6027        assert_eq!(second.kind, TokenKind::Word);
6028        assert_eq!(
6029            second.span.slice(source),
6030            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6031        );
6032        let second_segments = second
6033            .word()
6034            .unwrap()
6035            .segments()
6036            .map(|segment| {
6037                (
6038                    segment.kind(),
6039                    segment.as_str().to_string(),
6040                    segment.span().map(|span| span.slice(source).to_string()),
6041                )
6042            })
6043            .collect::<Vec<_>>();
6044        assert_eq!(
6045            second_segments,
6046            vec![
6047                (
6048                    LexedWordSegmentKind::Plain,
6049                    "ac_top_builddir_sub=".to_string(),
6050                    Some("ac_top_builddir_sub=".to_string()),
6051                ),
6052                (
6053                    LexedWordSegmentKind::Plain,
6054                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6055                    Some(
6056                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6057                            .to_string(),
6058                    ),
6059                ),
6060            ]
6061        );
6062    }
6063
6064    #[test]
6065    fn test_heredoc_with_unicode_content() {
6066        // Heredoc containing multi-byte characters; spans must be on char boundaries
6067        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6068        let mut lexer = Lexer::new(source);
6069
6070        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6071        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6072        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6073
6074        let heredoc = lexer.read_heredoc("EOF", false);
6075        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6076        let start = heredoc.content_span.start.offset;
6077        let end = heredoc.content_span.end.offset;
6078        assert!(
6079            source.is_char_boundary(start),
6080            "heredoc span start ({start}) not on char boundary"
6081        );
6082        assert!(
6083            source.is_char_boundary(end),
6084            "heredoc span end ({end}) not on char boundary"
6085        );
6086        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6087    }
6088
6089    #[test]
6090    fn test_assoc_compound_assignment() {
6091        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6092        // assignment as a single Word token
6093        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6094        assert_next_token(
6095            &mut lexer,
6096            TokenKind::Word,
6097            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6098        );
6099        assert!(lexer.next_lexed_token().is_none());
6100    }
6101
6102    #[test]
6103    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6104        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6105        let mut lexer = Lexer::new(source);
6106
6107        let token = lexer.next_lexed_token().unwrap();
6108        assert_eq!(token.kind, TokenKind::Word);
6109        assert_eq!(token.span.slice(source), source);
6110        assert!(lexer.next_lexed_token().is_none());
6111    }
6112
6113    #[test]
6114    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6115        let source = r#"foo\_bar@(baz|qux)"#;
6116        let mut lexer = Lexer::new(source);
6117
6118        let token = lexer.next_lexed_token().unwrap();
6119        assert_eq!(token.kind, TokenKind::Word);
6120        assert_eq!(token.span.slice(source), source);
6121        assert!(lexer.next_lexed_token().is_none());
6122    }
6123
6124    #[test]
6125    fn test_indexed_array_not_collapsed() {
6126        // arr=("hello world") should NOT be collapsed — parser handles
6127        // quoted elements token-by-token via the LeftParen path
6128        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6129        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6130        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6131    }
6132
6133    #[test]
6134    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6135        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6136        let mut lexer = Lexer::new(source);
6137
6138        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6139        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6140
6141        let token = lexer.next_lexed_token().unwrap();
6142        assert_eq!(token.kind, TokenKind::Word);
6143        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6144
6145        let word = token.word().unwrap();
6146        let segments: Vec<_> = word
6147            .segments()
6148            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6149            .collect();
6150        assert_eq!(
6151            segments,
6152            vec![
6153                (
6154                    LexedWordSegmentKind::DoubleQuoted,
6155                    "$plugin_dir".to_string()
6156                ),
6157                (LexedWordSegmentKind::Plain, "/*".to_string()),
6158                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6159            ]
6160        );
6161
6162        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6163        assert!(lexer.next_lexed_token().is_none());
6164    }
6165
6166    #[test]
6167    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6168        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6169        let mut lexer = Lexer::new(source);
6170
6171        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6172        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6173
6174        let token = lexer.next_lexed_token().unwrap();
6175        assert_eq!(token.kind, TokenKind::Word);
6176        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6177
6178        let word = token.word().unwrap();
6179        let segments: Vec<_> = word
6180            .segments()
6181            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6182            .collect();
6183        assert_eq!(
6184            segments,
6185            vec![
6186                (
6187                    LexedWordSegmentKind::DoubleQuoted,
6188                    "$__GREP_CACHE_FILE".to_string()
6189                ),
6190                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6191            ]
6192        );
6193
6194        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6195        assert!(lexer.next_lexed_token().is_none());
6196    }
6197
6198    #[test]
6199    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6200        let source = r#"$dir/${~pats}(N)"#;
6201        let mut lexer = Lexer::new(source);
6202
6203        let token = lexer.next_lexed_token().unwrap();
6204        assert_eq!(token.kind, TokenKind::Word);
6205        assert_eq!(token.span.slice(source), source);
6206        assert!(lexer.next_lexed_token().is_none());
6207    }
6208
6209    #[test]
6210    fn test_dollar_word_does_not_absorb_function_parens() {
6211        let mut lexer = Lexer::new(r#"foo$x()"#);
6212
6213        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6214        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6215        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6216        assert!(lexer.next_lexed_token().is_none());
6217    }
6218
6219    #[test]
6220    fn test_command_substitution_word_does_not_absorb_function_parens() {
6221        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6222
6223        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6224        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6225        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6226        assert!(lexer.next_lexed_token().is_none());
6227    }
6228
6229    /// Regression test for fuzz crash: single digit at EOF should not panic
6230    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6231    #[test]
6232    fn test_digit_at_eof_no_panic() {
6233        // A lone digit with no following redirect operator must not panic
6234        let mut lexer = Lexer::new("2");
6235        let token = lexer.next_lexed_token();
6236        assert!(token.is_some());
6237    }
6238
6239    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6240    #[test]
6241    fn test_nested_brace_expansion_single_token() {
6242        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6243        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6244        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6245        // No more tokens — everything was consumed
6246        assert!(lexer.next_lexed_token().is_none());
6247    }
6248
6249    /// Simple ${var} still works after brace depth change.
6250    #[test]
6251    fn test_simple_brace_expansion_unchanged() {
6252        let mut lexer = Lexer::new("${foo}");
6253        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6254        assert!(lexer.next_lexed_token().is_none());
6255    }
6256
6257    #[test]
6258    fn test_nvm_fixture_lexes_without_stalling() {
6259        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6260        let mut lexer = Lexer::new(input);
6261        let mut tokens = 0usize;
6262
6263        while lexer.next_lexed_token().is_some() {
6264            tokens += 1;
6265            assert!(
6266                tokens < 100_000,
6267                "lexer should continue making progress on the nvm fixture"
6268            );
6269        }
6270
6271        assert!(tokens > 0, "nvm fixture should produce at least one token");
6272    }
6273
6274    #[test]
6275    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6276        let input = concat!(
6277            "case \"${_input_type:-}\" in\n",
6278            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6279            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6280            "esac\n",
6281        );
6282
6283        assert_non_newline_tokens_stay_on_one_line(input);
6284
6285        let mut lexer = Lexer::new(input);
6286        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6287            .map(|token| (token.kind, token_text(&token, input)))
6288            .collect::<Vec<_>>();
6289        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6290        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6291    }
6292
6293    #[test]
6294    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6295        let input = concat!(
6296            "case $2 in\n",
6297            "  cygwin*) bin='cygwin32/bin' ;|\n",
6298            "esac\n",
6299        );
6300
6301        let mut lexer = Lexer::new(input);
6302        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6303            .map(|token| (token.kind, token_text(&token, input)))
6304            .collect::<Vec<_>>();
6305
6306        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6307        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6308        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6309    }
6310
6311    #[test]
6312    fn test_inline_if_with_array_append_stays_line_local() {
6313        let input = concat!(
6314            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6315            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6316        );
6317
6318        assert_non_newline_tokens_stay_on_one_line(input);
6319    }
6320
6321    #[test]
6322    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6323        let source = "unsetopt interactive_comments\n#literal\n";
6324        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6325        let mut lexer = Lexer::with_profile(source, &profile);
6326
6327        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6328        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6329        assert_next_token(&mut lexer, TokenKind::Newline, None);
6330        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6331    }
6332
6333    #[test]
6334    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6335        let source = "setopt rc_quotes\nprint 'a''b'\n";
6336        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6337        let mut lexer = Lexer::with_profile(source, &profile);
6338
6339        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6340        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6341        assert_next_token(&mut lexer, TokenKind::Newline, None);
6342        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6343        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6344    }
6345
6346    #[test]
6347    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6348        let source = "setopt ignore_braces\n{ echo }\n";
6349        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6350        let mut lexer = Lexer::with_profile(source, &profile);
6351
6352        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6353        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6354        assert_next_token(&mut lexer, TokenKind::Newline, None);
6355        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6356        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6357        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6358    }
6359
6360    #[test]
6361    fn test_heredoc_in_arithmetic_fuzz_crash() {
6362        // Regression test: the fuzzer found that heredoc re-injection inside
6363        // arithmetic context can push self.offset past self.input.len(),
6364        // causing a panic in read_unquoted_segment's borrowed-slice path.
6365        let data: &[u8] = &[
6366            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6367            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6368            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6369            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6370            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6371            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6372            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6373            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6374            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6375            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6376            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6377            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6378            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6379            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6380            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6381            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6382            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6383            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6384            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6385        ];
6386        let input = std::str::from_utf8(data).unwrap();
6387        let script = format!("echo $(({input}))\n");
6388        // Must not panic.
6389        let _ = crate::parser::Parser::new(&script).parse();
6390    }
6391}
shuck_parser/parser/lexer.rs

shuck_parser/parser/
lexer.rs