Skip to main content

shuck_parser/parser/
lexer.rs

1//! Lexer for bash scripts
2//!
3//! Tokenizes input into a stream of tokens with source position tracking.
4
5use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17    const COOKED_TEXT: u8 = 1 << 0;
18    const SYNTHETIC: u8 = 1 << 1;
19
20    const fn empty() -> Self {
21        Self(0)
22    }
23
24    const fn cooked_text() -> Self {
25        Self(Self::COOKED_TEXT)
26    }
27
28    pub(crate) const fn with_synthetic(self) -> Self {
29        Self(self.0 | Self::SYNTHETIC)
30    }
31
32    pub(crate) const fn has_cooked_text(self) -> bool {
33        self.0 & Self::COOKED_TEXT != 0
34    }
35
36    pub(crate) const fn is_synthetic(self) -> bool {
37        self.0 & Self::SYNTHETIC != 0
38    }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43    Borrowed(&'a str),
44    Shared {
45        source: Arc<str>,
46        range: Range<usize>,
47    },
48    Owned(String),
49}
50
51impl TokenText<'_> {
52    pub(crate) fn as_str(&self) -> &str {
53        match self {
54            Self::Borrowed(text) => text,
55            Self::Shared { source, range } => &source[range.clone()],
56            Self::Owned(text) => text,
57        }
58    }
59
60    fn into_owned<'a>(self) -> TokenText<'a> {
61        match self {
62            Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63            Self::Shared { source, range } => TokenText::Shared { source, range },
64            Self::Owned(text) => TokenText::Owned(text),
65        }
66    }
67
68    fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69        match self {
70            Self::Borrowed(text) => span
71                .filter(|span| span.end.offset <= source.len())
72                .map_or_else(
73                    || TokenText::Owned(text.to_string()),
74                    |span| TokenText::Shared {
75                        source: Arc::clone(source),
76                        range: span.start.offset..span.end.offset,
77                    },
78                ),
79            Self::Shared { source, range } => TokenText::Shared { source, range },
80            Self::Owned(text) => TokenText::Owned(text),
81        }
82    }
83}
84
85/// Classification of one segment inside a lexed shell word.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88    /// Unquoted or otherwise plain text.
89    Plain,
90    /// Text from a single-quoted string.
91    SingleQuoted,
92    /// Text from a `$'...'` string.
93    DollarSingleQuoted,
94    /// Text from a double-quoted string.
95    DoubleQuoted,
96    /// Text from a `$"..."` string.
97    DollarDoubleQuoted,
98    /// Text composed from multiple lexical forms.
99    Composite,
100}
101
102/// One segment of a lexed shell word, optionally backed by source text.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105    kind: LexedWordSegmentKind,
106    text: TokenText<'a>,
107    span: Option<Span>,
108    wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113        Self {
114            kind,
115            text: TokenText::Borrowed(text),
116            span,
117            wrapper_span: span,
118        }
119    }
120
121    fn borrowed_with_spans(
122        kind: LexedWordSegmentKind,
123        text: &'a str,
124        span: Option<Span>,
125        wrapper_span: Option<Span>,
126    ) -> Self {
127        Self {
128            kind,
129            text: TokenText::Borrowed(text),
130            span,
131            wrapper_span,
132        }
133    }
134
135    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136        Self {
137            kind,
138            text: TokenText::Owned(text),
139            span: None,
140            wrapper_span: None,
141        }
142    }
143
144    fn owned_with_spans(
145        kind: LexedWordSegmentKind,
146        text: String,
147        span: Option<Span>,
148        wrapper_span: Option<Span>,
149    ) -> Self {
150        Self {
151            kind,
152            text: TokenText::Owned(text),
153            span,
154            wrapper_span,
155        }
156    }
157
158    /// Borrow this segment's cooked text.
159    pub fn as_str(&self) -> &str {
160        self.text.as_str()
161    }
162
163    pub(crate) const fn text_is_source_backed(&self) -> bool {
164        matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165    }
166
167    /// Return the lexical classification of this segment.
168    pub const fn kind(&self) -> LexedWordSegmentKind {
169        self.kind
170    }
171
172    /// Return the span of the inner text, if it is tracked.
173    pub const fn span(&self) -> Option<Span> {
174        self.span
175    }
176
177    /// Return the span including surrounding quoting syntax when available.
178    pub fn wrapper_span(&self) -> Option<Span> {
179        self.wrapper_span.or(self.span)
180    }
181
182    fn rebased(mut self, base: Position) -> Self {
183        self.span = self.span.map(|span| span.rebased(base));
184        self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185        self
186    }
187
188    fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189        LexedWordSegment {
190            kind: self.kind,
191            text: self.text.into_owned(),
192            span: self.span,
193            wrapper_span: self.wrapper_span,
194        }
195    }
196
197    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198        LexedWordSegment {
199            kind: self.kind,
200            text: self.text.into_shared(source, self.span),
201            span: self.span,
202            wrapper_span: self.wrapper_span,
203        }
204    }
205}
206
207/// Source-backed representation of a shell word produced by the lexer.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210    primary_segment: LexedWordSegment<'a>,
211    trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215    fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216        Self {
217            primary_segment,
218            trailing_segments: Vec::new(),
219        }
220    }
221
222    fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223        Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224    }
225
226    fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227        Self::from_segment(LexedWordSegment::owned(kind, text))
228    }
229
230    fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231        self.trailing_segments.push(segment);
232    }
233
234    /// Iterate over the segments that make up this word.
235    pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236        std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237    }
238
239    /// Return the word text when it is represented by a single segment.
240    pub fn text(&self) -> Option<&str> {
241        self.single_segment().map(LexedWordSegment::as_str)
242    }
243
244    /// Join all segments into an owned string.
245    pub fn joined_text(&self) -> String {
246        let mut text = String::new();
247        for segment in self.segments() {
248            text.push_str(segment.as_str());
249        }
250        text
251    }
252
253    /// Return the only segment when this word is not segmented.
254    pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255        self.trailing_segments
256            .is_empty()
257            .then_some(&self.primary_segment)
258    }
259
260    fn has_cooked_text(&self) -> bool {
261        self.segments()
262            .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263    }
264
265    fn rebased(mut self, base: Position) -> Self {
266        self.primary_segment = self.primary_segment.rebased(base);
267        self.trailing_segments = self
268            .trailing_segments
269            .into_iter()
270            .map(|segment| segment.rebased(base))
271            .collect();
272        self
273    }
274
275    fn into_owned<'b>(self) -> LexedWord<'b> {
276        LexedWord {
277            primary_segment: self.primary_segment.into_owned(),
278            trailing_segments: self
279                .trailing_segments
280                .into_iter()
281                .map(LexedWordSegment::into_owned)
282                .collect(),
283        }
284    }
285
286    fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287        LexedWord {
288            primary_segment: self.primary_segment.into_shared(source),
289            trailing_segments: self
290                .trailing_segments
291                .into_iter()
292                .map(|segment| segment.into_shared(source))
293                .collect(),
294        }
295    }
296}
297
298/// Kinds of lexer error payloads attached to `TokenKind::Error`.
299#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301    /// Unterminated `$()` command substitution.
302    CommandSubstitution,
303    /// Unterminated backtick command substitution.
304    BacktickSubstitution,
305    /// Unterminated single-quoted string.
306    SingleQuote,
307    /// Unterminated double-quoted string.
308    DoubleQuote,
309}
310
311impl LexerErrorKind {
312    /// Human-readable message for this lexer error kind.
313    pub const fn message(self) -> &'static str {
314        match self {
315            Self::CommandSubstitution => "unterminated command substitution",
316            Self::BacktickSubstitution => "unterminated backtick substitution",
317            Self::SingleQuote => "unterminated single quote",
318            Self::DoubleQuote => "unterminated double quote",
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325    None,
326    Word(LexedWord<'a>),
327    Fd(i32),
328    FdPair(i32, i32),
329    Error(LexerErrorKind),
330}
331
332/// Token produced by the shell lexer.
333#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335    /// Token kind used by the parser.
336    pub kind: TokenKind,
337    /// Source span covered by the token.
338    pub span: Span,
339    pub(crate) flags: TokenFlags,
340    payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344    fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345        match kind {
346            TokenKind::Word => LexedWordSegmentKind::Plain,
347            TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348            TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349            _ => LexedWordSegmentKind::Composite,
350        }
351    }
352
353    pub(crate) fn punctuation(kind: TokenKind) -> Self {
354        Self {
355            kind,
356            span: Span::new(),
357            flags: TokenFlags::empty(),
358            payload: TokenPayload::None,
359        }
360    }
361
362    fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363        let flags = if word.has_cooked_text() {
364            TokenFlags::cooked_text()
365        } else {
366            TokenFlags::empty()
367        };
368
369        Self {
370            kind,
371            span: Span::new(),
372            flags,
373            payload: TokenPayload::Word(word),
374        }
375    }
376
377    fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378        Self::with_word_payload(
379            kind,
380            LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381        )
382    }
383
384    fn owned_word(kind: TokenKind, text: String) -> Self {
385        Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386    }
387
388    fn comment() -> Self {
389        Self {
390            kind: TokenKind::Comment,
391            span: Span::new(),
392            flags: TokenFlags::empty(),
393            payload: TokenPayload::None,
394        }
395    }
396
397    fn fd(kind: TokenKind, fd: i32) -> Self {
398        Self {
399            kind,
400            span: Span::new(),
401            flags: TokenFlags::empty(),
402            payload: TokenPayload::Fd(fd),
403        }
404    }
405
406    fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407        Self {
408            kind,
409            span: Span::new(),
410            flags: TokenFlags::empty(),
411            payload: TokenPayload::FdPair(src_fd, dst_fd),
412        }
413    }
414
415    fn error(kind: LexerErrorKind) -> Self {
416        Self {
417            kind: TokenKind::Error,
418            span: Span::new(),
419            flags: TokenFlags::empty(),
420            payload: TokenPayload::Error(kind),
421        }
422    }
423
424    pub(crate) fn with_span(mut self, span: Span) -> Self {
425        self.span = span;
426        self
427    }
428
429    pub(crate) fn rebased(mut self, base: Position) -> Self {
430        self.span = self.span.rebased(base);
431        self.payload = match self.payload {
432            TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433            payload => payload,
434        };
435        self
436    }
437
438    pub(crate) fn with_synthetic_flag(mut self) -> Self {
439        self.flags = self.flags.with_synthetic();
440        self
441    }
442
443    pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444        let payload = match self.payload {
445            TokenPayload::None => TokenPayload::None,
446            TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449            TokenPayload::Error(kind) => TokenPayload::Error(kind),
450        };
451
452        LexedToken {
453            kind: self.kind,
454            span: self.span,
455            flags: self.flags,
456            payload,
457        }
458    }
459
460    pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461        let payload = match self.payload {
462            TokenPayload::None => TokenPayload::None,
463            TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464            TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465            TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466            TokenPayload::Error(kind) => TokenPayload::Error(kind),
467        };
468
469        LexedToken {
470            kind: self.kind,
471            span: self.span,
472            flags: self.flags,
473            payload,
474        }
475    }
476
477    /// Borrow the token text when it is a single-segment word token.
478    pub fn word_text(&self) -> Option<&str> {
479        self.kind
480            .is_word_like()
481            .then_some(())
482            .and_then(|_| match &self.payload {
483                TokenPayload::Word(word) => word.text(),
484                _ => None,
485            })
486    }
487
488    /// Return an owned string containing the token's word text.
489    pub fn word_string(&self) -> Option<String> {
490        self.kind
491            .is_word_like()
492            .then_some(())
493            .and_then(|_| match &self.payload {
494                TokenPayload::Word(word) => Some(word.joined_text()),
495                _ => None,
496            })
497    }
498
499    /// Borrow the structured word payload for word-like tokens.
500    pub fn word(&self) -> Option<&LexedWord<'a>> {
501        match &self.payload {
502            TokenPayload::Word(word) => Some(word),
503            _ => None,
504        }
505    }
506
507    /// Borrow the original source slice when the token is source-backed and uncooked.
508    pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509        if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510            return None;
511        }
512
513        (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514            .then(|| &source[self.span.start.offset..self.span.end.offset])
515    }
516
517    /// Return the file-descriptor payload for redirection tokens that carry one.
518    pub fn fd_value(&self) -> Option<i32> {
519        match self.payload {
520            TokenPayload::Fd(fd) => Some(fd),
521            _ => None,
522        }
523    }
524
525    /// Return the `(source_fd, target_fd)` payload for descriptor-pair redirections.
526    pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527        match self.payload {
528            TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529            _ => None,
530        }
531    }
532
533    /// Return the lexer error payload when this token represents `TokenKind::Error`.
534    pub fn error_kind(&self) -> Option<LexerErrorKind> {
535        match self.payload {
536            TokenPayload::Error(kind) => Some(kind),
537            _ => None,
538        }
539    }
540}
541
542/// Result of reading a heredoc body from the source.
543#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545    /// Decoded heredoc content.
546    pub content: String,
547    /// Source span covering the heredoc body content.
548    pub content_span: Span,
549}
550
551/// Maximum nesting depth for command substitution in the lexer.
552/// Prevents stack overflow from deeply nested $() patterns.
553const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557    rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561    fn new(source: &'a str) -> Self {
562        Self { rest: source }
563    }
564
565    fn first(&self) -> Option<char> {
566        self.rest.chars().next()
567    }
568
569    fn second(&self) -> Option<char> {
570        let mut chars = self.rest.chars();
571        chars.next()?;
572        chars.next()
573    }
574
575    fn third(&self) -> Option<char> {
576        let mut chars = self.rest.chars();
577        chars.next()?;
578        chars.next()?;
579        chars.next()
580    }
581
582    fn bump(&mut self) -> Option<char> {
583        let ch = self.first()?;
584        self.rest = &self.rest[ch.len_utf8()..];
585        Some(ch)
586    }
587
588    fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589        let start = self.rest;
590        let mut end = 0;
591
592        for ch in start.chars() {
593            if !predicate(ch) {
594                break;
595            }
596            end += ch.len_utf8();
597        }
598
599        self.rest = &start[end..];
600        &start[..end]
601    }
602
603    fn rest(&self) -> &'a str {
604        self.rest
605    }
606
607    fn skip_bytes(&mut self, count: usize) {
608        self.rest = &self.rest[count..];
609    }
610
611    fn find_byte(&self, byte: u8) -> Option<usize> {
612        memchr(byte, self.rest.as_bytes())
613    }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618    source: &'a str,
619    line_starts: Arc<[usize]>,
620    cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626    pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630    fn new(source: &'a str) -> Self {
631        let mut line_starts =
632            Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633        line_starts.push(0);
634        line_starts.extend(
635            source
636                .bytes()
637                .enumerate()
638                .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639        );
640
641        Self {
642            source,
643            line_starts: line_starts.into(),
644            cached: Position::new(),
645        }
646    }
647
648    fn position(&mut self, offset: usize) -> Position {
649        if offset == self.cached.offset {
650            return self.cached;
651        }
652
653        let position = if offset > self.cached.offset && offset <= self.source.len() {
654            Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655        } else {
656            self.position_uncached(offset)
657        };
658        self.cached = position;
659        position
660    }
661
662    fn position_uncached(&self, offset: usize) -> Position {
663        let offset = offset.min(self.source.len());
664        let line_index = self
665            .line_starts
666            .partition_point(|start| *start <= offset)
667            .saturating_sub(1);
668        let line_start = self.line_starts[line_index];
669        let line_text = &self.source[line_start..offset];
670        let column = if line_text.is_ascii() {
671            line_text.len() + 1
672        } else {
673            line_text.chars().count() + 1
674        };
675
676        Position {
677            line: line_index + 1,
678            column,
679            offset,
680        }
681    }
682
683    fn advance_from(mut position: Position, text: &str) -> Position {
684        position.offset += text.len();
685        let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686        if newline_count == 0 {
687            position.column += if text.is_ascii() {
688                text.len()
689            } else {
690                text.chars().count()
691            };
692            return position;
693        }
694
695        position.line += newline_count;
696        let tail_start = memrchr(b'\n', text.as_bytes())
697            .map(|index| index + 1)
698            .unwrap_or_default();
699        let tail = &text[tail_start..];
700        position.column = if tail.is_ascii() {
701            tail.len() + 1
702        } else {
703            tail.chars().count() + 1
704        };
705        position
706    }
707}
708
709/// Lexer for bash scripts.
710#[derive(Clone)]
711pub struct Lexer<'a> {
712    #[allow(dead_code)] // Stored for error reporting in future
713    input: &'a str,
714    /// Current byte offset in the input/reinjected stream.
715    offset: usize,
716    cursor: Cursor<'a>,
717    position_map: PositionMap<'a>,
718    /// Buffer for re-injected characters (e.g., rest-of-line after heredoc delimiter).
719    /// Consumed before `cursor`.
720    reinject_buf: VecDeque<char>,
721    /// Cursor byte offset to restore once a heredoc replay buffer is exhausted.
722    reinject_resume_offset: Option<usize>,
723    /// Maximum allowed nesting depth for command substitution
724    max_subst_depth: usize,
725    initial_zsh_options: Option<ZshOptionState>,
726    zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727    zsh_timeline_index: usize,
728    #[cfg(feature = "benchmarking")]
729    benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733    /// Create a new lexer for the given input.
734    pub fn new(input: &'a str) -> Self {
735        Self::with_max_subst_depth_and_profile(
736            input,
737            DEFAULT_MAX_SUBST_DEPTH,
738            &ShellProfile::native(super::ShellDialect::Bash),
739            None,
740        )
741    }
742
743    /// Create a new lexer with a custom max substitution nesting depth.
744    /// Limits recursion in read_command_subst_into().
745    pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746        Self::with_max_subst_depth_and_profile(
747            input,
748            max_depth,
749            &ShellProfile::native(super::ShellDialect::Bash),
750            None,
751        )
752    }
753
754    /// Create a new lexer using the provided shell profile.
755    pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756        let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757            .then(|| ZshOptionTimeline::build(input, shell_profile))
758            .flatten()
759            .map(Arc::new);
760        Self::with_max_subst_depth_and_profile(
761            input,
762            DEFAULT_MAX_SUBST_DEPTH,
763            shell_profile,
764            zsh_timeline,
765        )
766    }
767
768    pub(crate) fn with_max_subst_depth_and_profile(
769        input: &'a str,
770        max_depth: usize,
771        shell_profile: &ShellProfile,
772        zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773    ) -> Self {
774        Self {
775            input,
776            offset: 0,
777            cursor: Cursor::new(input),
778            position_map: PositionMap::new(input),
779            reinject_buf: VecDeque::new(),
780            reinject_resume_offset: None,
781            max_subst_depth: max_depth,
782            initial_zsh_options: shell_profile.zsh_options().cloned(),
783            zsh_timeline,
784            zsh_timeline_index: 0,
785            #[cfg(feature = "benchmarking")]
786            benchmark_counters: None,
787        }
788    }
789
790    /// Get the current position in the input.
791    pub fn position(&self) -> Position {
792        self.position_map.position_uncached(self.offset)
793    }
794
795    pub(super) fn position_at_offset(&self, offset: usize) -> Position {
796        self.position_map.position_uncached(offset)
797    }
798
799    fn current_position(&mut self) -> Position {
800        #[cfg(feature = "benchmarking")]
801        self.maybe_record_current_position_call();
802        self.position_map.position(self.offset)
803    }
804
805    #[cfg(feature = "benchmarking")]
806    pub(crate) fn enable_benchmark_counters(&mut self) {
807        self.benchmark_counters = Some(LexerBenchmarkCounters::default());
808    }
809
810    #[cfg(feature = "benchmarking")]
811    pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
812        self.benchmark_counters.unwrap_or_default()
813    }
814
815    #[cfg(feature = "benchmarking")]
816    fn maybe_record_current_position_call(&mut self) {
817        if let Some(counters) = &mut self.benchmark_counters {
818            counters.current_position_calls += 1;
819        }
820    }
821
822    fn sync_offset_to_cursor(&mut self) {
823        if self.reinject_buf.is_empty()
824            && let Some(offset) = self.reinject_resume_offset.take()
825        {
826            self.offset = offset;
827        }
828    }
829
830    /// Get the next token kind from the input without decoding or materializing
831    /// any payload text.
832    pub fn next_token_kind(&mut self) -> Option<TokenKind> {
833        self.next_lexed_token().map(|token| token.kind)
834    }
835
836    fn peek_char(&mut self) -> Option<char> {
837        self.sync_offset_to_cursor();
838        if let Some(&ch) = self.reinject_buf.front() {
839            Some(ch)
840        } else {
841            self.cursor.first()
842        }
843    }
844
845    fn advance(&mut self) -> Option<char> {
846        self.sync_offset_to_cursor();
847        let ch = if !self.reinject_buf.is_empty() {
848            self.reinject_buf.pop_front()
849        } else {
850            self.cursor.bump()
851        };
852        if let Some(c) = ch {
853            self.offset += c.len_utf8();
854        }
855        ch
856    }
857
858    fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
859        self.reinject_buf
860            .iter()
861            .copied()
862            .chain(self.cursor.rest().chars())
863    }
864
865    fn second_char(&self) -> Option<char> {
866        match self.reinject_buf.len() {
867            0 => self.cursor.second(),
868            1 => self.cursor.first(),
869            _ => self.reinject_buf.get(1).copied(),
870        }
871    }
872
873    fn third_char(&self) -> Option<char> {
874        match self.reinject_buf.len() {
875            0 => self.cursor.third(),
876            1 => self.cursor.second(),
877            2 => self.cursor.first(),
878            _ => self.reinject_buf.get(2).copied(),
879        }
880    }
881
882    fn fourth_char(&self) -> Option<char> {
883        match self.reinject_buf.len() {
884            0 => self.cursor.rest().chars().nth(3),
885            1 => self.cursor.third(),
886            2 => self.cursor.second(),
887            3 => self.cursor.first(),
888            _ => self.reinject_buf.get(3).copied(),
889        }
890    }
891
892    fn consume_source_bytes(&mut self, byte_len: usize) {
893        debug_assert!(self.reinject_buf.is_empty());
894        self.sync_offset_to_cursor();
895        self.offset += byte_len;
896        self.cursor.skip_bytes(byte_len);
897    }
898
899    fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
900        debug_assert!(self.reinject_buf.is_empty());
901        self.offset += byte_len;
902    }
903
904    fn consume_ascii_chars(&mut self, count: usize) {
905        if self.reinject_buf.is_empty() {
906            self.consume_source_bytes(count);
907            return;
908        }
909
910        for _ in 0..count {
911            self.advance();
912        }
913    }
914
915    fn source_horizontal_whitespace_len(&self) -> usize {
916        self.cursor
917            .rest()
918            .as_bytes()
919            .iter()
920            .take_while(|byte| matches!(**byte, b' ' | b'\t'))
921            .count()
922    }
923
924    fn source_ascii_plain_word_len(&self) -> usize {
925        self.cursor
926            .rest()
927            .as_bytes()
928            .iter()
929            .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
930            .count()
931    }
932
933    fn find_double_quote_special(source: &str) -> Option<usize> {
934        source
935            .as_bytes()
936            .iter()
937            .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
938    }
939
940    fn ensure_capture_from_source(
941        &self,
942        capture: &mut Option<String>,
943        start: Position,
944        end: Position,
945    ) {
946        if capture.is_none() {
947            *capture = Some(self.input[start.offset..end.offset].to_string());
948        }
949    }
950
951    fn push_capture_char(capture: &mut Option<String>, ch: char) {
952        if let Some(text) = capture.as_mut() {
953            text.push(ch);
954        }
955    }
956
957    fn push_capture_str(capture: &mut Option<String>, text: &str) {
958        if let Some(current) = capture.as_mut() {
959            current.push_str(text);
960        }
961    }
962
963    fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
964        if let Some(timeline) = self.zsh_timeline.as_ref() {
965            while self.zsh_timeline_index < timeline.entries.len()
966                && timeline.entries[self.zsh_timeline_index].offset <= self.offset
967            {
968                self.zsh_timeline_index += 1;
969            }
970            return if self.zsh_timeline_index == 0 {
971                self.initial_zsh_options.as_ref()
972            } else {
973                Some(&timeline.entries[self.zsh_timeline_index - 1].state)
974            };
975        }
976
977        self.initial_zsh_options.as_ref()
978    }
979
980    fn comments_enabled(&mut self) -> bool {
981        !self
982            .current_zsh_options()
983            .is_some_and(|options| options.interactive_comments.is_definitely_off())
984    }
985
986    fn rc_quotes_enabled(&mut self) -> bool {
987        self.current_zsh_options()
988            .is_some_and(|options| options.rc_quotes.is_definitely_on())
989    }
990
991    fn ignore_braces_enabled(&mut self) -> bool {
992        self.current_zsh_options()
993            .is_some_and(|options| options.ignore_braces.is_definitely_on())
994    }
995
996    fn ignore_close_braces_enabled(&mut self) -> bool {
997        self.current_zsh_options().is_some_and(|options| {
998            options.ignore_braces.is_definitely_on()
999                || options.ignore_close_braces.is_definitely_on()
1000        })
1001    }
1002
1003    fn should_treat_hash_as_word_char(&mut self) -> bool {
1004        if !self.comments_enabled() {
1005            return true;
1006        }
1007        self.reinject_buf.is_empty()
1008            && (self
1009                .input
1010                .get(..self.offset)
1011                .and_then(|prefix| prefix.chars().next_back())
1012                .is_some_and(|prev| {
1013                    !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1014                })
1015                || self.is_inside_unclosed_double_paren_on_line())
1016    }
1017
1018    fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1019        capture
1020            .as_deref()
1021            .unwrap_or(&self.input[start.offset..self.offset])
1022    }
1023
1024    fn current_word_surface_is_single_char(
1025        &self,
1026        start: Position,
1027        capture: &Option<String>,
1028        target: char,
1029    ) -> bool {
1030        let text = self.current_word_text(start, capture);
1031        if !text.contains('\x00') {
1032            let mut encoded = [0; 4];
1033            return text == target.encode_utf8(&mut encoded);
1034        }
1035
1036        let mut chars = text.chars().filter(|&ch| ch != '\x00');
1037        matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1038    }
1039
1040    fn current_word_surface_last_char<'b>(
1041        &'b self,
1042        start: Position,
1043        capture: &'b Option<String>,
1044    ) -> Option<char> {
1045        self.current_word_text(start, capture)
1046            .chars()
1047            .rev()
1048            .find(|&ch| ch != '\x00')
1049    }
1050
1051    fn current_word_surface_ends_with_char(
1052        &self,
1053        start: Position,
1054        capture: &Option<String>,
1055        target: char,
1056    ) -> bool {
1057        self.current_word_surface_last_char(start, capture) == Some(target)
1058    }
1059
1060    fn current_word_surface_ends_with_extglob_prefix(
1061        &self,
1062        start: Position,
1063        capture: &Option<String>,
1064    ) -> bool {
1065        self.current_word_surface_last_char(start, capture)
1066            .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1067    }
1068
1069    /// Get the next source-backed token from the input, skipping line comments.
1070    pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1071        self.skip_whitespace();
1072        let start = self.current_position();
1073        let token = self.next_lexed_token_inner(false)?;
1074        let end = self.current_position();
1075        Some(token.with_span(Span::from_positions(start, end)))
1076    }
1077
1078    /// Get the next source-backed token from the input, preserving line comments.
1079    pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1080        self.skip_whitespace();
1081        let start = self.current_position();
1082        let token = self.next_lexed_token_inner(true)?;
1083        let end = self.current_position();
1084        Some(token.with_span(Span::from_positions(start, end)))
1085    }
1086
1087    /// Internal: get next token without recording position (called after whitespace skip)
1088    fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1089        let ch = self.peek_char()?;
1090
1091        match ch {
1092            '\n' => {
1093                self.consume_ascii_chars(1);
1094                Some(LexedToken::punctuation(TokenKind::Newline))
1095            }
1096            ';' => {
1097                if self.second_char() == Some(';') {
1098                    if self.third_char() == Some('&') {
1099                        self.consume_ascii_chars(3);
1100                        Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) // ;;&
1101                    } else {
1102                        self.consume_ascii_chars(2);
1103                        Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) // ;;
1104                    }
1105                } else if self.second_char() == Some('|') {
1106                    self.consume_ascii_chars(2);
1107                    Some(LexedToken::punctuation(TokenKind::SemiPipe)) // ;|
1108                } else if self.second_char() == Some('&') {
1109                    self.consume_ascii_chars(2);
1110                    Some(LexedToken::punctuation(TokenKind::SemiAmp)) // ;&
1111                } else {
1112                    self.consume_ascii_chars(1);
1113                    Some(LexedToken::punctuation(TokenKind::Semicolon))
1114                }
1115            }
1116            '|' => {
1117                if self.second_char() == Some('|') {
1118                    self.consume_ascii_chars(2);
1119                    Some(LexedToken::punctuation(TokenKind::Or))
1120                } else if self.second_char() == Some('&') {
1121                    self.consume_ascii_chars(2);
1122                    Some(LexedToken::punctuation(TokenKind::PipeBoth))
1123                } else {
1124                    self.consume_ascii_chars(1);
1125                    Some(LexedToken::punctuation(TokenKind::Pipe))
1126                }
1127            }
1128            '&' => {
1129                if self.second_char() == Some('&') {
1130                    self.consume_ascii_chars(2);
1131                    Some(LexedToken::punctuation(TokenKind::And))
1132                } else if self.second_char() == Some('>') {
1133                    if self.third_char() == Some('>') {
1134                        self.consume_ascii_chars(3);
1135                        Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1136                    } else {
1137                        self.consume_ascii_chars(2);
1138                        Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1139                    }
1140                } else if self.second_char() == Some('|') {
1141                    self.consume_ascii_chars(2);
1142                    Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1143                } else if self.second_char() == Some('!') {
1144                    self.consume_ascii_chars(2);
1145                    Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1146                } else {
1147                    self.consume_ascii_chars(1);
1148                    Some(LexedToken::punctuation(TokenKind::Background))
1149                }
1150            }
1151            '>' => {
1152                if self.second_char() == Some('>') {
1153                    if self.third_char() == Some('|') {
1154                        self.consume_ascii_chars(3);
1155                    } else {
1156                        self.consume_ascii_chars(2);
1157                    }
1158                    Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1159                } else if self.second_char() == Some('|') {
1160                    self.consume_ascii_chars(2);
1161                    Some(LexedToken::punctuation(TokenKind::Clobber))
1162                } else if self.second_char() == Some('(') {
1163                    self.consume_ascii_chars(2);
1164                    Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1165                } else if self.second_char() == Some('&') {
1166                    self.consume_ascii_chars(2);
1167                    Some(LexedToken::punctuation(TokenKind::DupOutput))
1168                } else {
1169                    self.consume_ascii_chars(1);
1170                    Some(LexedToken::punctuation(TokenKind::RedirectOut))
1171                }
1172            }
1173            '<' => {
1174                if self.second_char() == Some('<') {
1175                    if self.third_char() == Some('<') {
1176                        self.consume_ascii_chars(3);
1177                        Some(LexedToken::punctuation(TokenKind::HereString))
1178                    } else if self.third_char() == Some('-') {
1179                        self.consume_ascii_chars(3);
1180                        Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1181                    } else {
1182                        self.consume_ascii_chars(2);
1183                        Some(LexedToken::punctuation(TokenKind::HereDoc))
1184                    }
1185                } else if self.second_char() == Some('>') {
1186                    self.consume_ascii_chars(2);
1187                    Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1188                } else if self.second_char() == Some('(') {
1189                    self.consume_ascii_chars(2);
1190                    Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1191                } else if self.second_char() == Some('&') {
1192                    self.consume_ascii_chars(2);
1193                    Some(LexedToken::punctuation(TokenKind::DupInput))
1194                } else {
1195                    self.consume_ascii_chars(1);
1196                    Some(LexedToken::punctuation(TokenKind::RedirectIn))
1197                }
1198            }
1199            '(' => {
1200                if self.second_char() == Some('(') {
1201                    self.consume_ascii_chars(2);
1202                    Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1203                } else {
1204                    self.consume_ascii_chars(1);
1205                    Some(LexedToken::punctuation(TokenKind::LeftParen))
1206                }
1207            }
1208            ')' => {
1209                if self.second_char() == Some(')') {
1210                    self.consume_ascii_chars(2);
1211                    Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1212                } else {
1213                    self.consume_ascii_chars(1);
1214                    Some(LexedToken::punctuation(TokenKind::RightParen))
1215                }
1216            }
1217            '{' => {
1218                let start = self.current_position();
1219                if self.ignore_braces_enabled() {
1220                    self.consume_ascii_chars(1);
1221                    match self.peek_char() {
1222                        Some(' ') | Some('\t') | Some('\n') | None => {
1223                            Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1224                        }
1225                        _ => self.read_word_starting_with("{", start),
1226                    }
1227                } else if self.looks_like_brace_expansion() {
1228                    // Look ahead to see if this is a brace expansion like {a,b,c} or {1..5}
1229                    // vs a brace group like { cmd; }
1230                    // Note: { must be followed by space/newline to be a brace group
1231                    self.read_brace_expansion_word()
1232                } else if self.is_brace_group_start() {
1233                    self.advance();
1234                    Some(LexedToken::punctuation(TokenKind::LeftBrace))
1235                } else if self.brace_literal_starts_case_pattern_delimiter() {
1236                    self.read_word_starting_with("{", start)
1237                } else {
1238                    self.read_brace_literal_word()
1239                }
1240            }
1241            '}' => {
1242                self.consume_ascii_chars(1);
1243                if self.ignore_close_braces_enabled() {
1244                    Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1245                } else {
1246                    Some(LexedToken::punctuation(TokenKind::RightBrace))
1247                }
1248            }
1249            '[' => {
1250                let start = self.current_position();
1251                self.consume_ascii_chars(1);
1252                if self.peek_char() == Some('[')
1253                    && matches!(
1254                        self.second_char(),
1255                        Some(' ') | Some('\t') | Some('\n') | None
1256                    )
1257                {
1258                    self.consume_ascii_chars(1);
1259                    Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1260                } else {
1261                    // `[` can start the test command when followed by whitespace, or it can be
1262                    // ordinary word text such as a glob bracket expression.
1263                    //
1264                    // Read the whole token with the normal word scanner so forms like `[[z]`,
1265                    // `[hello"]"`, and `[+(])` stay attached to one word instead of producing
1266                    // structural tokens mid-word.
1267                    match self.peek_char() {
1268                        Some(' ') | Some('\t') | Some('\n') | None => {
1269                            Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1270                        }
1271                        _ => self.read_word_starting_with("[", start),
1272                    }
1273                }
1274            }
1275            ']' => {
1276                if self.second_char() == Some(']') {
1277                    self.consume_ascii_chars(2);
1278                    Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1279                } else {
1280                    self.consume_ascii_chars(1);
1281                    Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1282                }
1283            }
1284            '\'' => self.read_single_quoted_string(),
1285            '"' => self.read_double_quoted_string(),
1286            '#' => {
1287                if self.should_treat_hash_as_word_char() {
1288                    let start = self.current_position();
1289                    return self.read_word_starting_with("#", start);
1290                }
1291                if preserve_comments {
1292                    self.read_comment();
1293                    Some(LexedToken::comment())
1294                } else {
1295                    self.skip_comment();
1296                    self.next_lexed_token_inner(false)
1297                }
1298            }
1299            // Handle file descriptor redirects like 2> or 2>&1
1300            '0'..='9' => self.read_word_or_fd_redirect(),
1301            _ => self.read_word(),
1302        }
1303    }
1304
1305    fn skip_whitespace(&mut self) {
1306        while let Some(ch) = self.peek_char() {
1307            if self.reinject_buf.is_empty() {
1308                let whitespace_len = self.source_horizontal_whitespace_len();
1309                if whitespace_len > 0 {
1310                    self.consume_source_bytes(whitespace_len);
1311                    continue;
1312                }
1313
1314                if self.cursor.rest().starts_with("\\\n") {
1315                    self.consume_source_bytes(2);
1316                    continue;
1317                }
1318            }
1319
1320            if ch == ' ' || ch == '\t' {
1321                self.consume_ascii_chars(1);
1322            } else if ch == '\\' {
1323                // Check for backslash-newline (line continuation) between tokens
1324                if self.second_char() == Some('\n') {
1325                    self.consume_ascii_chars(2);
1326                } else {
1327                    break;
1328                }
1329            } else {
1330                break;
1331            }
1332        }
1333    }
1334
1335    fn skip_comment(&mut self) {
1336        if self.reinject_buf.is_empty() {
1337            let end = self
1338                .cursor
1339                .find_byte(b'\n')
1340                .unwrap_or(self.cursor.rest().len());
1341            self.consume_source_bytes(end);
1342            return;
1343        }
1344
1345        while let Some(ch) = self.peek_char() {
1346            if ch == '\n' {
1347                break;
1348            }
1349            self.advance();
1350        }
1351    }
1352
1353    fn read_comment(&mut self) {
1354        debug_assert_eq!(self.peek_char(), Some('#'));
1355
1356        if self.reinject_buf.is_empty() {
1357            let rest = self.cursor.rest();
1358            let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1359            self.consume_source_bytes(end);
1360            return;
1361        }
1362
1363        self.advance(); // consume '#'
1364
1365        while let Some(ch) = self.peek_char() {
1366            if ch == '\n' {
1367                break;
1368            }
1369            self.advance();
1370        }
1371    }
1372
1373    fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1374        if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1375            return false;
1376        }
1377
1378        let line_start = self.input[..self.offset]
1379            .rfind('\n')
1380            .map_or(0, |index| index + 1);
1381        let prefix = &self.input[line_start..self.offset];
1382        line_has_unclosed_double_paren(prefix)
1383    }
1384
1385    /// Check if this is a file descriptor redirect (e.g., 2>, 2>>, 2>&1)
1386    /// or just a regular word starting with a digit
1387    fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1388        if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1389            let Some(fd) = first_digit.to_digit(10) else {
1390                unreachable!("peeked ASCII digit should convert to a base-10 digit");
1391            };
1392            let fd = fd as i32;
1393
1394            match (self.second_char(), self.third_char()) {
1395                (Some('>'), Some('>')) => {
1396                    if self.fourth_char() == Some('|') {
1397                        self.consume_ascii_chars(4);
1398                    } else {
1399                        self.consume_ascii_chars(3);
1400                    }
1401                    return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1402                }
1403                (Some('>'), Some('|')) => {
1404                    self.consume_ascii_chars(3);
1405                    return Some(LexedToken::fd(TokenKind::Clobber, fd));
1406                }
1407                (Some('>'), Some('&')) => {
1408                    self.consume_ascii_chars(3);
1409
1410                    let mut target_str = String::with_capacity(4);
1411                    while let Some(c) = self.peek_char() {
1412                        if c.is_ascii_digit() {
1413                            target_str.push(c);
1414                            self.advance();
1415                        } else {
1416                            break;
1417                        }
1418                    }
1419
1420                    if target_str.is_empty() {
1421                        return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1422                    }
1423
1424                    let target_fd: i32 = target_str.parse().unwrap_or(1);
1425                    return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1426                }
1427                (Some('>'), _) => {
1428                    self.consume_ascii_chars(2);
1429                    return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1430                }
1431                (Some('<'), Some('&')) => {
1432                    self.consume_ascii_chars(3);
1433
1434                    let mut target_str = String::with_capacity(4);
1435                    while let Some(c) = self.peek_char() {
1436                        if c.is_ascii_digit() || c == '-' {
1437                            target_str.push(c);
1438                            self.advance();
1439                            if c == '-' {
1440                                break;
1441                            }
1442                        } else {
1443                            break;
1444                        }
1445                    }
1446
1447                    if target_str == "-" {
1448                        return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1449                    }
1450                    let target_fd: i32 = target_str.parse().unwrap_or(0);
1451                    return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1452                }
1453                (Some('<'), Some('>')) => {
1454                    self.consume_ascii_chars(3);
1455                    return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1456                }
1457                (Some('<'), Some('<')) => {}
1458                (Some('<'), _) => {
1459                    self.consume_ascii_chars(2);
1460                    return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1461                }
1462                _ => {}
1463            }
1464        }
1465
1466        // Not a fd redirect pattern, read as regular word
1467        self.read_word()
1468    }
1469
1470    fn read_word_starting_with(
1471        &mut self,
1472        _prefix: &str,
1473        start: Position,
1474    ) -> Option<LexedToken<'a>> {
1475        let segment = match self.read_unquoted_segment(start) {
1476            Ok(segment) => segment,
1477            Err(kind) => return Some(LexedToken::error(kind)),
1478        };
1479        if segment.as_str().is_empty() {
1480            return None;
1481        }
1482        let mut lexed_word = LexedWord::from_segment(segment);
1483        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1484            return Some(LexedToken::error(kind));
1485        }
1486        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1487    }
1488
1489    fn read_word(&mut self) -> Option<LexedToken<'a>> {
1490        let start = self.current_position();
1491
1492        if self.reinject_buf.is_empty() {
1493            let ascii_len = self.source_ascii_plain_word_len();
1494            let chunk = if ascii_len > 0
1495                && self
1496                    .cursor
1497                    .rest()
1498                    .as_bytes()
1499                    .get(ascii_len)
1500                    .is_none_or(|byte| byte.is_ascii())
1501            {
1502                self.consume_source_bytes(ascii_len);
1503                &self.input[start.offset..self.offset]
1504            } else {
1505                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1506                self.advance_scanned_source_bytes(chunk.len());
1507                chunk
1508            };
1509            if !chunk.is_empty() {
1510                let continues = matches!(
1511                    self.peek_char(),
1512                    Some(next)
1513                        if Self::is_word_char(next)
1514                            || next == '$'
1515                            || matches!(next, '\'' | '"')
1516                            || next == '{'
1517                            || (next == '\\' && self.second_char() == Some('\n'))
1518                            || (next == '('
1519                                && (chunk.ends_with('=')
1520                                    || Self::word_can_take_parenthesized_suffix(chunk)))
1521                );
1522
1523                if !continues {
1524                    let end = self.current_position();
1525                    return Some(LexedToken::borrowed_word(
1526                        TokenKind::Word,
1527                        &self.input[start.offset..self.offset],
1528                        Some(Span::from_positions(start, end)),
1529                    ));
1530                }
1531
1532                if self.peek_char() == Some('(')
1533                    && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1534                {
1535                    return self.read_complex_word(start);
1536                }
1537
1538                let end = self.current_position();
1539                return self.finish_segmented_word(LexedWord::borrowed(
1540                    LexedWordSegmentKind::Plain,
1541                    &self.input[start.offset..self.offset],
1542                    Some(Span::from_positions(start, end)),
1543                ));
1544            }
1545        }
1546
1547        self.read_complex_word(start)
1548    }
1549
1550    fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1551        if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1552            return Some(LexedToken::error(kind));
1553        }
1554
1555        Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1556    }
1557
1558    fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1559        if self.peek_char() == Some('$') {
1560            match self.second_char() {
1561                Some('\'') => return self.read_dollar_single_quoted_string(),
1562                Some('"') => return self.read_dollar_double_quoted_string(),
1563                _ => {}
1564            }
1565        }
1566
1567        let segment = match self.read_unquoted_segment(start) {
1568            Ok(segment) => segment,
1569            Err(kind) => return Some(LexedToken::error(kind)),
1570        };
1571
1572        if segment.as_str().is_empty() {
1573            return None;
1574        }
1575
1576        self.finish_segmented_word(LexedWord::from_segment(segment))
1577    }
1578
1579    fn read_unquoted_segment(
1580        &mut self,
1581        start: Position,
1582    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1583        let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1584        while let Some(ch) = self.peek_char() {
1585            if ch == '"' || ch == '\'' {
1586                break;
1587            } else if ch == '$' {
1588                if matches!(self.second_char(), Some('\'') | Some('"'))
1589                    && (self.current_position().offset > start.offset
1590                        || word.as_ref().is_some_and(|word| !word.is_empty()))
1591                {
1592                    break;
1593                }
1594
1595                // Handle variable references and command substitution
1596                self.advance();
1597
1598                Self::push_capture_char(&mut word, ch); // push the '$'
1599
1600                // Check for $[ / $( / ${ forms before falling back to variable text.
1601                if self.peek_char() == Some('[') {
1602                    Self::push_capture_char(&mut word, '[');
1603                    self.advance();
1604                    if !self.read_legacy_arithmetic_into(&mut word, start) {
1605                        return Err(LexerErrorKind::CommandSubstitution);
1606                    }
1607                } else if self.peek_char() == Some('(') {
1608                    if self.second_char() == Some('(') {
1609                        if !self.read_arithmetic_expansion_into(&mut word) {
1610                            return Err(LexerErrorKind::CommandSubstitution);
1611                        }
1612                    } else {
1613                        Self::push_capture_char(&mut word, '(');
1614                        self.advance();
1615                        if !self.read_command_subst_into(&mut word) {
1616                            return Err(LexerErrorKind::CommandSubstitution);
1617                        }
1618                    }
1619                } else if self.peek_char() == Some('{') {
1620                    // ${VAR} format — track nested braces so ${a[${#b[@]}]}
1621                    // doesn't stop at the inner }.
1622                    Self::push_capture_char(&mut word, '{');
1623                    self.advance();
1624                    let _ = self.read_param_expansion_into(&mut word, start);
1625                } else {
1626                    // Check for special single-character variables ($?, $#, $@, $*, $!, $$, $-, $0-$9)
1627                    if let Some(c) = self.peek_char() {
1628                        if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1629                            || c.is_ascii_digit()
1630                        {
1631                            Self::push_capture_char(&mut word, c);
1632                            self.advance();
1633                        } else {
1634                            // Read variable name (alphanumeric + _)
1635                            while let Some(c) = self.peek_char() {
1636                                if c.is_ascii_alphanumeric() || c == '_' {
1637                                    Self::push_capture_char(&mut word, c);
1638                                    self.advance();
1639                                } else {
1640                                    break;
1641                                }
1642                            }
1643                        }
1644                    }
1645                }
1646            } else if ch == '{' {
1647                if self.looks_like_mid_word_brace_segment() {
1648                    // Keep balanced {...} forms attached to the current word so
1649                    // plain literals like foo{bar} and brace expansions stay intact.
1650                    Self::push_capture_char(&mut word, ch);
1651                    self.advance();
1652                    self.consume_mid_word_brace_segment(&mut word);
1653                } else {
1654                    // Unmatched literal braces in regexes like ^{ should not swallow
1655                    // trailing delimiters such as ]] or then.
1656                    Self::push_capture_char(&mut word, ch);
1657                    self.advance();
1658                }
1659            } else if ch == '`' {
1660                // Preserve legacy backticks verbatim so the parser can keep the
1661                // original syntax form.
1662                let capture_end = self.current_position();
1663                self.ensure_capture_from_source(&mut word, start, capture_end);
1664                Self::push_capture_char(&mut word, ch);
1665                self.advance(); // consume opening `
1666                let mut closed = false;
1667                while let Some(c) = self.peek_char() {
1668                    Self::push_capture_char(&mut word, c);
1669                    self.advance();
1670                    if c == '`' {
1671                        closed = true;
1672                        break;
1673                    }
1674                    if c == '\\'
1675                        && let Some(next) = self.peek_char()
1676                    {
1677                        Self::push_capture_char(&mut word, next);
1678                        self.advance();
1679                    }
1680                }
1681                if !closed {
1682                    return Err(LexerErrorKind::BacktickSubstitution);
1683                }
1684            } else if ch == '\\' {
1685                let capture_end = self.current_position();
1686                self.ensure_capture_from_source(&mut word, start, capture_end);
1687                self.advance();
1688                if let Some(next) = self.peek_char() {
1689                    if next == '\n' {
1690                        // Line continuation: skip backslash + newline
1691                        self.advance();
1692                    } else {
1693                        // Escaped character: backslash quotes the next char
1694                        // (quote removal — only the literal char survives).
1695                        // Preserve source/decoded alignment with a sentinel so
1696                        // downstream word decoding keeps later spans anchored.
1697                        Self::push_capture_char(&mut word, '\x00');
1698                        Self::push_capture_char(&mut word, next);
1699                        self.advance();
1700                        if next == '{'
1701                            && self.current_word_surface_is_single_char(start, &word, '{')
1702                            && self.escaped_brace_sequence_looks_like_brace_expansion()
1703                        {
1704                            let mut depth = 1;
1705                            while let Some(c) = self.peek_char() {
1706                                Self::push_capture_char(&mut word, c);
1707                                self.advance();
1708                                match c {
1709                                    '{' => depth += 1,
1710                                    '}' => {
1711                                        depth -= 1;
1712                                        if depth == 0 {
1713                                            break;
1714                                        }
1715                                    }
1716                                    _ => {}
1717                                }
1718                            }
1719                        }
1720                    }
1721                } else {
1722                    Self::push_capture_char(&mut word, '\\');
1723                }
1724            } else if ch == '('
1725                && self.current_word_surface_ends_with_char(start, &word, '=')
1726                && self.looks_like_assoc_assign()
1727            {
1728                // Associative compound assignment: var=([k]="v" ...) — keep entire
1729                // (...) as part of word so declare -A m=([k]="v") stays one token.
1730                Self::push_capture_char(&mut word, ch);
1731                self.advance();
1732                let mut depth = 1;
1733                while let Some(c) = self.peek_char() {
1734                    Self::push_capture_char(&mut word, c);
1735                    self.advance();
1736                    match c {
1737                        '(' => depth += 1,
1738                        ')' => {
1739                            depth -= 1;
1740                            if depth == 0 {
1741                                break;
1742                            }
1743                        }
1744                        '"' => {
1745                            while let Some(qc) = self.peek_char() {
1746                                Self::push_capture_char(&mut word, qc);
1747                                self.advance();
1748                                if qc == '"' {
1749                                    break;
1750                                }
1751                                if qc == '\\'
1752                                    && let Some(esc) = self.peek_char()
1753                                {
1754                                    Self::push_capture_char(&mut word, esc);
1755                                    self.advance();
1756                                }
1757                            }
1758                        }
1759                        '\'' => {
1760                            while let Some(qc) = self.peek_char() {
1761                                Self::push_capture_char(&mut word, qc);
1762                                self.advance();
1763                                if qc == '\'' {
1764                                    break;
1765                                }
1766                            }
1767                        }
1768                        '\\' => {
1769                            if let Some(esc) = self.peek_char() {
1770                                Self::push_capture_char(&mut word, esc);
1771                                self.advance();
1772                            }
1773                        }
1774                        _ => {}
1775                    }
1776                }
1777            } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1778            {
1779                // Extglob: @(...), ?(...), *(...), +(...), !(...)
1780                // Consume through matching ) including nested parens
1781                Self::push_capture_char(&mut word, ch);
1782                self.advance();
1783                let mut depth = 1;
1784                while let Some(c) = self.peek_char() {
1785                    Self::push_capture_char(&mut word, c);
1786                    self.advance();
1787                    match c {
1788                        '(' => depth += 1,
1789                        ')' => {
1790                            depth -= 1;
1791                            if depth == 0 {
1792                                break;
1793                            }
1794                        }
1795                        '\\' => {
1796                            if let Some(esc) = self.peek_char() {
1797                                Self::push_capture_char(&mut word, esc);
1798                                self.advance();
1799                            }
1800                        }
1801                        _ => {}
1802                    }
1803                }
1804            } else if Self::is_plain_word_char(ch) {
1805                if self.reinject_buf.is_empty() {
1806                    let ascii_len = self.source_ascii_plain_word_len();
1807                    let chunk = if ascii_len > 0
1808                        && self
1809                            .cursor
1810                            .rest()
1811                            .as_bytes()
1812                            .get(ascii_len)
1813                            .is_none_or(|byte| byte.is_ascii())
1814                    {
1815                        self.consume_source_bytes(ascii_len);
1816                        &self.input[self.offset - ascii_len..self.offset]
1817                    } else {
1818                        let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1819                        self.advance_scanned_source_bytes(chunk.len());
1820                        chunk
1821                    };
1822                    Self::push_capture_str(&mut word, chunk);
1823                } else {
1824                    Self::push_capture_char(&mut word, ch);
1825                    self.advance();
1826                }
1827            } else {
1828                break;
1829            }
1830        }
1831
1832        if let Some(word) = word {
1833            let span = Some(Span::from_positions(start, self.current_position()));
1834            Ok(LexedWordSegment::owned_with_spans(
1835                LexedWordSegmentKind::Plain,
1836                word,
1837                span,
1838                span,
1839            ))
1840        } else {
1841            let end = self.current_position();
1842            Ok(LexedWordSegment::borrowed(
1843                LexedWordSegmentKind::Plain,
1844                &self.input[start.offset..self.offset],
1845                Some(Span::from_positions(start, end)),
1846            ))
1847        }
1848    }
1849
1850    fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1851        let segment = match self.read_single_quoted_segment() {
1852            Ok(segment) => segment,
1853            Err(kind) => return Some(LexedToken::error(kind)),
1854        };
1855        let mut word = LexedWord::from_segment(segment);
1856        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1857            return Some(LexedToken::error(kind));
1858        }
1859
1860        Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1861    }
1862
1863    fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1864        debug_assert_eq!(self.peek_char(), Some('\''));
1865
1866        let wrapper_start = self.current_position();
1867        self.consume_ascii_chars(1); // consume opening '
1868        let content_start = self.current_position();
1869        let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1870        let mut content_end = content_start;
1871        let mut content = String::with_capacity(16);
1872        let mut closed = false;
1873
1874        if can_borrow {
1875            let rest = self.cursor.rest();
1876            if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1877                self.consume_source_bytes(quote_index);
1878                content_end = self.current_position();
1879                self.consume_ascii_chars(1); // consume closing '
1880                closed = true;
1881            } else {
1882                self.consume_source_bytes(rest.len());
1883            }
1884        }
1885
1886        while let Some(ch) = self.peek_char() {
1887            if closed {
1888                break;
1889            }
1890            if ch == '\'' {
1891                if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1892                    if !can_borrow {
1893                        content.push('\'');
1894                    }
1895                    self.advance();
1896                    self.advance();
1897                    continue;
1898                }
1899                content_end = self.current_position();
1900                self.consume_ascii_chars(1); // consume closing '
1901                closed = true;
1902                break;
1903            }
1904            if !can_borrow {
1905                content.push(ch);
1906            }
1907            self.advance();
1908        }
1909
1910        if !closed {
1911            return Err(LexerErrorKind::SingleQuote);
1912        }
1913
1914        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1915        let content_span = Some(Span::from_positions(content_start, content_end));
1916
1917        if can_borrow {
1918            Ok(LexedWordSegment::borrowed_with_spans(
1919                LexedWordSegmentKind::SingleQuoted,
1920                &self.input[content_start.offset..content_end.offset],
1921                content_span,
1922                wrapper_span,
1923            ))
1924        } else {
1925            Ok(LexedWordSegment::owned_with_spans(
1926                LexedWordSegmentKind::SingleQuoted,
1927                content,
1928                content_span,
1929                wrapper_span,
1930            ))
1931        }
1932    }
1933
1934    fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1935        let segment = match self.read_dollar_single_quoted_segment() {
1936            Ok(segment) => segment,
1937            Err(kind) => return Some(LexedToken::error(kind)),
1938        };
1939        let mut word = LexedWord::from_segment(segment);
1940        if let Err(kind) = self.append_segmented_continuation(&mut word) {
1941            return Some(LexedToken::error(kind));
1942        }
1943
1944        let kind = if word.single_segment().is_some() {
1945            TokenKind::LiteralWord
1946        } else {
1947            TokenKind::Word
1948        };
1949
1950        Some(LexedToken::with_word_payload(kind, word))
1951    }
1952
1953    fn read_dollar_single_quoted_segment(
1954        &mut self,
1955    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1956        debug_assert_eq!(self.peek_char(), Some('$'));
1957        debug_assert_eq!(self.second_char(), Some('\''));
1958
1959        let wrapper_start = self.current_position();
1960        self.consume_ascii_chars(2); // consume $'
1961        let content_start = self.current_position();
1962        let mut out = String::with_capacity(16);
1963
1964        while let Some(ch) = self.peek_char() {
1965            if ch == '\'' {
1966                let content_end = self.current_position();
1967                self.advance();
1968                let wrapper_span =
1969                    Some(Span::from_positions(wrapper_start, self.current_position()));
1970                let content_span = Some(Span::from_positions(content_start, content_end));
1971                return Ok(LexedWordSegment::owned_with_spans(
1972                    LexedWordSegmentKind::DollarSingleQuoted,
1973                    out,
1974                    content_span,
1975                    wrapper_span,
1976                ));
1977            }
1978
1979            if ch == '\\' {
1980                self.advance();
1981                if let Some(esc) = self.peek_char() {
1982                    self.advance();
1983                    match esc {
1984                        'n' => out.push('\n'),
1985                        't' => out.push('\t'),
1986                        'r' => out.push('\r'),
1987                        'a' => out.push('\x07'),
1988                        'b' => out.push('\x08'),
1989                        'f' => out.push('\x0C'),
1990                        'v' => out.push('\x0B'),
1991                        'e' | 'E' => out.push('\x1B'),
1992                        '\\' => out.push('\\'),
1993                        '\'' => out.push('\''),
1994                        '"' => out.push('"'),
1995                        '?' => out.push('?'),
1996                        'c' => {
1997                            if let Some(control) = self.peek_char() {
1998                                self.advance();
1999                                out.push(((control as u32 & 0x1F) as u8) as char);
2000                            } else {
2001                                out.push('\\');
2002                                out.push('c');
2003                            }
2004                        }
2005                        'x' => {
2006                            let mut hex = String::new();
2007                            for _ in 0..2 {
2008                                if let Some(h) = self.peek_char() {
2009                                    if h.is_ascii_hexdigit() {
2010                                        hex.push(h);
2011                                        self.advance();
2012                                    } else {
2013                                        break;
2014                                    }
2015                                }
2016                            }
2017                            if let Ok(val) = u8::from_str_radix(&hex, 16) {
2018                                out.push(val as char);
2019                            }
2020                        }
2021                        'u' => {
2022                            let mut hex = String::new();
2023                            for _ in 0..4 {
2024                                if let Some(h) = self.peek_char() {
2025                                    if h.is_ascii_hexdigit() {
2026                                        hex.push(h);
2027                                        self.advance();
2028                                    } else {
2029                                        break;
2030                                    }
2031                                }
2032                            }
2033                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2034                                && let Some(c) = char::from_u32(val)
2035                            {
2036                                out.push(c);
2037                            }
2038                        }
2039                        'U' => {
2040                            let mut hex = String::new();
2041                            for _ in 0..8 {
2042                                if let Some(h) = self.peek_char() {
2043                                    if h.is_ascii_hexdigit() {
2044                                        hex.push(h);
2045                                        self.advance();
2046                                    } else {
2047                                        break;
2048                                    }
2049                                }
2050                            }
2051                            if let Ok(val) = u32::from_str_radix(&hex, 16)
2052                                && let Some(c) = char::from_u32(val)
2053                            {
2054                                out.push(c);
2055                            }
2056                        }
2057                        '0'..='7' => {
2058                            let mut oct = String::new();
2059                            oct.push(esc);
2060                            for _ in 0..2 {
2061                                if let Some(o) = self.peek_char() {
2062                                    if o.is_ascii_digit() && o < '8' {
2063                                        oct.push(o);
2064                                        self.advance();
2065                                    } else {
2066                                        break;
2067                                    }
2068                                }
2069                            }
2070                            if let Ok(val) = u8::from_str_radix(&oct, 8) {
2071                                out.push(val as char);
2072                            }
2073                        }
2074                        _ => {
2075                            out.push('\\');
2076                            out.push(esc);
2077                        }
2078                    }
2079                } else {
2080                    out.push('\\');
2081                }
2082                continue;
2083            }
2084
2085            out.push(ch);
2086            self.advance();
2087        }
2088
2089        Err(LexerErrorKind::SingleQuote)
2090    }
2091
2092    fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2093        let start = self.current_position();
2094
2095        if self.reinject_buf.is_empty() {
2096            let ascii_len = self.source_ascii_plain_word_len();
2097            let chunk = if ascii_len > 0
2098                && self
2099                    .cursor
2100                    .rest()
2101                    .as_bytes()
2102                    .get(ascii_len)
2103                    .is_none_or(|byte| byte.is_ascii())
2104            {
2105                self.consume_source_bytes(ascii_len);
2106                &self.input[start.offset..self.offset]
2107            } else {
2108                let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2109                self.advance_scanned_source_bytes(chunk.len());
2110                chunk
2111            };
2112            if chunk.is_empty() {
2113                return None;
2114            }
2115
2116            let end = self.current_position();
2117            return Some(LexedWordSegment::borrowed(
2118                LexedWordSegmentKind::Plain,
2119                &self.input[start.offset..self.offset],
2120                Some(Span::from_positions(start, end)),
2121            ));
2122        }
2123
2124        let ch = self.peek_char()?;
2125        if !Self::is_plain_word_char(ch) {
2126            return None;
2127        }
2128
2129        let mut text = String::with_capacity(16);
2130        while let Some(ch) = self.peek_char() {
2131            if !Self::is_plain_word_char(ch) {
2132                break;
2133            }
2134            text.push(ch);
2135            self.advance();
2136        }
2137
2138        Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2139    }
2140
2141    /// After a closing quote, read any adjacent quoted or unquoted word chars
2142    /// into `word`. Handles concatenation like `'foo'"bar"baz`.
2143    fn append_segmented_continuation(
2144        &mut self,
2145        word: &mut LexedWord<'a>,
2146    ) -> Result<(), LexerErrorKind> {
2147        loop {
2148            match self.peek_char() {
2149                Some('\\') if self.second_char() == Some('\n') => {
2150                    self.advance();
2151                    self.advance();
2152                    continue;
2153                }
2154                Some('\'') => {
2155                    word.push_segment(self.read_single_quoted_segment()?);
2156                }
2157                Some('"') => {
2158                    word.push_segment(self.read_double_quoted_segment()?);
2159                }
2160                Some('$') if self.second_char() == Some('\'') => {
2161                    word.push_segment(self.read_dollar_single_quoted_segment()?);
2162                }
2163                Some('$') if self.second_char() == Some('"') => {
2164                    word.push_segment(self.read_dollar_double_quoted_segment()?);
2165                }
2166                Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2167                    let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2168                        unreachable!("peeked '(' should produce a suffix segment");
2169                    };
2170                    word.push_segment(segment);
2171                }
2172                _ => {
2173                    if let Some(segment) = self.read_plain_continuation_segment() {
2174                        word.push_segment(segment);
2175                        continue;
2176                    }
2177
2178                    let start = self.current_position();
2179                    let plain = self.read_unquoted_segment(start)?;
2180                    if plain.as_str().is_empty() {
2181                        break;
2182                    }
2183                    word.push_segment(plain);
2184                }
2185            }
2186        }
2187
2188        Ok(())
2189    }
2190
2191    fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2192        debug_assert_eq!(self.peek_char(), Some('('));
2193
2194        let start = self.current_position();
2195        let mut depth = 0usize;
2196        let mut escaped = false;
2197        let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2198
2199        while let Some(ch) = self.peek_char() {
2200            if let Some(text) = text.as_mut() {
2201                text.push(ch);
2202            }
2203            self.advance();
2204
2205            if escaped {
2206                escaped = false;
2207                continue;
2208            }
2209
2210            match ch {
2211                '\\' => escaped = true,
2212                '(' => depth += 1,
2213                ')' => {
2214                    depth = depth.saturating_sub(1);
2215                    if depth == 0 {
2216                        break;
2217                    }
2218                }
2219                _ => {}
2220            }
2221        }
2222
2223        let end = self.current_position();
2224        let span = Some(Span::from_positions(start, end));
2225        if let Some(text) = text {
2226            Some(LexedWordSegment::owned_with_spans(
2227                LexedWordSegmentKind::Plain,
2228                text,
2229                span,
2230                span,
2231            ))
2232        } else {
2233            Some(LexedWordSegment::borrowed_with_spans(
2234                LexedWordSegmentKind::Plain,
2235                &self.input[start.offset..end.offset],
2236                span,
2237                span,
2238            ))
2239        }
2240    }
2241
2242    fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2243        self.read_double_quoted_word(false)
2244    }
2245
2246    fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2247        self.read_double_quoted_word(true)
2248    }
2249
2250    fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2251        let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2252            Ok(segment) => segment,
2253            Err(kind) => return Some(LexedToken::error(kind)),
2254        };
2255        let mut word = LexedWord::from_segment(segment);
2256        if let Err(kind) = self.append_segmented_continuation(&mut word) {
2257            return Some(LexedToken::error(kind));
2258        }
2259
2260        let kind = if word.single_segment().is_some() {
2261            TokenKind::QuotedWord
2262        } else {
2263            TokenKind::Word
2264        };
2265
2266        Some(LexedToken::with_word_payload(kind, word))
2267    }
2268
2269    fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2270        self.read_double_quoted_segment_with_dollar(false)
2271    }
2272
2273    fn read_dollar_double_quoted_segment(
2274        &mut self,
2275    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2276        self.read_double_quoted_segment_with_dollar(true)
2277    }
2278
2279    fn read_double_quoted_segment_with_dollar(
2280        &mut self,
2281        dollar: bool,
2282    ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2283        if dollar {
2284            debug_assert_eq!(self.peek_char(), Some('$'));
2285            debug_assert_eq!(self.second_char(), Some('"'));
2286        } else {
2287            debug_assert_eq!(self.peek_char(), Some('"'));
2288        }
2289
2290        let wrapper_start = self.current_position();
2291        if dollar {
2292            self.consume_ascii_chars(2); // consume $"
2293        } else {
2294            self.consume_ascii_chars(1); // consume opening "
2295        }
2296        let content_start = self.current_position();
2297        let mut content_end = content_start;
2298        let mut simple = self.reinject_buf.is_empty();
2299        let mut borrowable = self.reinject_buf.is_empty();
2300        let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2301        let mut closed = false;
2302
2303        while let Some(ch) = self.peek_char() {
2304            if simple {
2305                if self.reinject_buf.is_empty() {
2306                    let rest = self.cursor.rest();
2307                    match Self::find_double_quote_special(rest) {
2308                        Some(index) if index > 0 => {
2309                            self.consume_source_bytes(index);
2310                            continue;
2311                        }
2312                        None => {
2313                            self.consume_source_bytes(rest.len());
2314                            return Err(LexerErrorKind::DoubleQuote);
2315                        }
2316                        _ => {}
2317                    }
2318                }
2319
2320                match ch {
2321                    '"' => {
2322                        content_end = self.current_position();
2323                        self.consume_ascii_chars(1); // consume closing "
2324                        closed = true;
2325                        break;
2326                    }
2327                    '\\' | '$' | '`' => {
2328                        simple = false;
2329                        if ch == '`' {
2330                            borrowable = false;
2331                            let capture_end = self.current_position();
2332                            self.ensure_capture_from_source(
2333                                &mut content,
2334                                content_start,
2335                                capture_end,
2336                            );
2337                        }
2338                    }
2339                    _ => {
2340                        self.advance();
2341                    }
2342                }
2343                if simple {
2344                    continue;
2345                }
2346            }
2347
2348            match ch {
2349                '"' => {
2350                    if borrowable {
2351                        content_end = self.current_position();
2352                    }
2353                    self.consume_ascii_chars(1); // consume closing "
2354                    closed = true;
2355                    break;
2356                }
2357                '\\' => {
2358                    let escape_start = self.current_position();
2359                    self.advance();
2360                    if let Some(next) = self.peek_char() {
2361                        match next {
2362                            '\n' => {
2363                                borrowable = false;
2364                                self.ensure_capture_from_source(
2365                                    &mut content,
2366                                    content_start,
2367                                    escape_start,
2368                                );
2369                                self.advance();
2370                            }
2371                            '$' => {
2372                                borrowable = false;
2373                                self.ensure_capture_from_source(
2374                                    &mut content,
2375                                    content_start,
2376                                    escape_start,
2377                                );
2378                                Self::push_capture_char(&mut content, '\x00');
2379                                Self::push_capture_char(&mut content, '$');
2380                                self.advance();
2381                            }
2382                            '"' | '\\' | '`' => {
2383                                borrowable = false;
2384                                self.ensure_capture_from_source(
2385                                    &mut content,
2386                                    content_start,
2387                                    escape_start,
2388                                );
2389                                if next == '\\' {
2390                                    Self::push_capture_char(&mut content, '\x00');
2391                                }
2392                                if next == '`' {
2393                                    Self::push_capture_char(&mut content, '\x00');
2394                                }
2395                                Self::push_capture_char(&mut content, next);
2396                                self.advance();
2397                                content_end = self.current_position();
2398                            }
2399                            _ => {
2400                                Self::push_capture_char(&mut content, '\\');
2401                                Self::push_capture_char(&mut content, next);
2402                                self.advance();
2403                                content_end = self.current_position();
2404                            }
2405                        }
2406                    }
2407                }
2408                '$' => {
2409                    Self::push_capture_char(&mut content, '$');
2410                    self.advance();
2411                    if self.peek_char() == Some('(') {
2412                        if self.second_char() == Some('(') {
2413                            self.read_arithmetic_expansion_into(&mut content);
2414                        } else {
2415                            Self::push_capture_char(&mut content, '(');
2416                            self.advance();
2417                            self.read_command_subst_into(&mut content);
2418                        }
2419                    } else if self.peek_char() == Some('{') {
2420                        Self::push_capture_char(&mut content, '{');
2421                        self.advance();
2422                        borrowable &= self.read_param_expansion_into(&mut content, content_start);
2423                    }
2424                    content_end = self.current_position();
2425                }
2426                '`' => {
2427                    borrowable = false;
2428                    let capture_end = self.current_position();
2429                    self.ensure_capture_from_source(&mut content, content_start, capture_end);
2430                    Self::push_capture_char(&mut content, '`');
2431                    self.advance(); // consume opening `
2432                    while let Some(c) = self.peek_char() {
2433                        Self::push_capture_char(&mut content, c);
2434                        self.advance();
2435                        if c == '`' {
2436                            break;
2437                        }
2438                        if c == '\\'
2439                            && let Some(next) = self.peek_char()
2440                        {
2441                            Self::push_capture_char(&mut content, next);
2442                            self.advance();
2443                        }
2444                    }
2445                    content_end = self.current_position();
2446                }
2447                _ => {
2448                    Self::push_capture_char(&mut content, ch);
2449                    self.advance();
2450                    content_end = self.current_position();
2451                }
2452            }
2453        }
2454
2455        if !closed {
2456            return Err(LexerErrorKind::DoubleQuote);
2457        }
2458
2459        let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2460        let content_span = Some(Span::from_positions(content_start, content_end));
2461
2462        if borrowable {
2463            Ok(LexedWordSegment::borrowed_with_spans(
2464                if dollar {
2465                    LexedWordSegmentKind::DollarDoubleQuoted
2466                } else {
2467                    LexedWordSegmentKind::DoubleQuoted
2468                },
2469                &self.input[content_start.offset..content_end.offset],
2470                content_span,
2471                wrapper_span,
2472            ))
2473        } else {
2474            Ok(LexedWordSegment::owned_with_spans(
2475                if dollar {
2476                    LexedWordSegmentKind::DollarDoubleQuoted
2477                } else {
2478                    LexedWordSegmentKind::DoubleQuoted
2479                },
2480                content.unwrap_or_default(),
2481                content_span,
2482                wrapper_span,
2483            ))
2484        }
2485    }
2486
2487    fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2488        debug_assert_eq!(self.peek_char(), Some('('));
2489        debug_assert_eq!(self.second_char(), Some('('));
2490
2491        Self::push_capture_char(content, '(');
2492        self.advance();
2493        Self::push_capture_char(content, '(');
2494        self.advance();
2495
2496        let mut depth = 2;
2497        while let Some(c) = self.peek_char() {
2498            match c {
2499                '\\' => {
2500                    Self::push_capture_char(content, c);
2501                    self.advance();
2502                    if let Some(next) = self.peek_char() {
2503                        Self::push_capture_char(content, next);
2504                        self.advance();
2505                    }
2506                }
2507                '\'' => {
2508                    Self::push_capture_char(content, c);
2509                    self.advance();
2510                    while let Some(quoted) = self.peek_char() {
2511                        Self::push_capture_char(content, quoted);
2512                        self.advance();
2513                        if quoted == '\'' {
2514                            break;
2515                        }
2516                    }
2517                }
2518                '"' => {
2519                    let mut escaped = false;
2520                    Self::push_capture_char(content, c);
2521                    self.advance();
2522                    while let Some(quoted) = self.peek_char() {
2523                        Self::push_capture_char(content, quoted);
2524                        self.advance();
2525                        if escaped {
2526                            escaped = false;
2527                            continue;
2528                        }
2529                        match quoted {
2530                            '\\' => escaped = true,
2531                            '"' => break,
2532                            _ => {}
2533                        }
2534                    }
2535                }
2536                '`' => {
2537                    let mut escaped = false;
2538                    Self::push_capture_char(content, c);
2539                    self.advance();
2540                    while let Some(quoted) = self.peek_char() {
2541                        Self::push_capture_char(content, quoted);
2542                        self.advance();
2543                        if escaped {
2544                            escaped = false;
2545                            continue;
2546                        }
2547                        match quoted {
2548                            '\\' => escaped = true,
2549                            '`' => break,
2550                            _ => {}
2551                        }
2552                    }
2553                }
2554                '(' => {
2555                    Self::push_capture_char(content, c);
2556                    self.advance();
2557                    depth += 1;
2558                }
2559                ')' => {
2560                    Self::push_capture_char(content, c);
2561                    self.advance();
2562                    depth -= 1;
2563                    if depth == 0 {
2564                        return true;
2565                    }
2566                }
2567                _ => {
2568                    Self::push_capture_char(content, c);
2569                    self.advance();
2570                }
2571            }
2572        }
2573
2574        false
2575    }
2576
2577    fn read_legacy_arithmetic_into(
2578        &mut self,
2579        content: &mut Option<String>,
2580        segment_start: Position,
2581    ) -> bool {
2582        let mut bracket_depth = 1;
2583
2584        while let Some(c) = self.peek_char() {
2585            match c {
2586                '\\' => {
2587                    Self::push_capture_char(content, c);
2588                    self.advance();
2589                    if let Some(next) = self.peek_char() {
2590                        Self::push_capture_char(content, next);
2591                        self.advance();
2592                    }
2593                }
2594                '\'' => {
2595                    Self::push_capture_char(content, c);
2596                    self.advance();
2597                    while let Some(quoted) = self.peek_char() {
2598                        Self::push_capture_char(content, quoted);
2599                        self.advance();
2600                        if quoted == '\'' {
2601                            break;
2602                        }
2603                    }
2604                }
2605                '"' => {
2606                    let mut escaped = false;
2607                    Self::push_capture_char(content, c);
2608                    self.advance();
2609                    while let Some(quoted) = self.peek_char() {
2610                        Self::push_capture_char(content, quoted);
2611                        self.advance();
2612                        if escaped {
2613                            escaped = false;
2614                            continue;
2615                        }
2616                        match quoted {
2617                            '\\' => escaped = true,
2618                            '"' => break,
2619                            _ => {}
2620                        }
2621                    }
2622                }
2623                '`' => {
2624                    let mut escaped = false;
2625                    Self::push_capture_char(content, c);
2626                    self.advance();
2627                    while let Some(quoted) = self.peek_char() {
2628                        Self::push_capture_char(content, quoted);
2629                        self.advance();
2630                        if escaped {
2631                            escaped = false;
2632                            continue;
2633                        }
2634                        match quoted {
2635                            '\\' => escaped = true,
2636                            '`' => break,
2637                            _ => {}
2638                        }
2639                    }
2640                }
2641                '[' => {
2642                    Self::push_capture_char(content, c);
2643                    self.advance();
2644                    bracket_depth += 1;
2645                }
2646                ']' => {
2647                    Self::push_capture_char(content, c);
2648                    self.advance();
2649                    bracket_depth -= 1;
2650                    if bracket_depth == 0 {
2651                        return true;
2652                    }
2653                }
2654                '$' => {
2655                    Self::push_capture_char(content, c);
2656                    self.advance();
2657                    if self.peek_char() == Some('(') {
2658                        if self.second_char() == Some('(') {
2659                            if !self.read_arithmetic_expansion_into(content) {
2660                                return false;
2661                            }
2662                        } else {
2663                            Self::push_capture_char(content, '(');
2664                            self.advance();
2665                            if !self.read_command_subst_into(content) {
2666                                return false;
2667                            }
2668                        }
2669                    } else if self.peek_char() == Some('{') {
2670                        Self::push_capture_char(content, '{');
2671                        self.advance();
2672                        if !self.read_param_expansion_into(content, segment_start) {
2673                            return false;
2674                        }
2675                    } else if self.peek_char() == Some('[') {
2676                        Self::push_capture_char(content, '[');
2677                        self.advance();
2678                        if !self.read_legacy_arithmetic_into(content, segment_start) {
2679                            return false;
2680                        }
2681                    }
2682                }
2683                _ => {
2684                    Self::push_capture_char(content, c);
2685                    self.advance();
2686                }
2687            }
2688        }
2689
2690        false
2691    }
2692
2693    /// Read command substitution content after `$(`, handling nested parens and quotes.
2694    /// Appends chars to `content` and adds the closing `)`.
2695    /// `subst_depth` tracks nesting to prevent stack overflow.
2696    fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2697        self.read_command_subst_into_depth(content, 0)
2698    }
2699
2700    fn flush_command_subst_keyword(
2701        current_word: &mut String,
2702        pending_case_headers: &mut usize,
2703        case_clause_depths: &mut SmallVec<[usize; 4]>,
2704        depth: usize,
2705        word_started_at_command_start: &mut bool,
2706    ) {
2707        if current_word.is_empty() {
2708            *word_started_at_command_start = false;
2709            return;
2710        }
2711
2712        match current_word.as_str() {
2713            "case" if *word_started_at_command_start => *pending_case_headers += 1,
2714            "in" if *pending_case_headers > 0 => {
2715                *pending_case_headers -= 1;
2716                case_clause_depths.push(depth);
2717            }
2718            "esac" if *word_started_at_command_start => {
2719                case_clause_depths.pop();
2720            }
2721            _ => {}
2722        }
2723
2724        current_word.clear();
2725        *word_started_at_command_start = false;
2726    }
2727
2728    fn read_command_subst_heredoc_delimiter_into(
2729        &mut self,
2730        content: &mut Option<String>,
2731    ) -> Option<String> {
2732        while let Some(ch) = self.peek_char() {
2733            if !matches!(ch, ' ' | '\t') {
2734                break;
2735            }
2736            Self::push_capture_char(content, ch);
2737            self.advance();
2738        }
2739
2740        let mut cooked = String::new();
2741        let mut in_single = false;
2742        let mut in_double = false;
2743        let mut escaped = false;
2744        let mut saw_any = false;
2745
2746        while let Some(ch) = self.peek_char() {
2747            if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2748                break;
2749            }
2750
2751            saw_any = true;
2752            Self::push_capture_char(content, ch);
2753            self.advance();
2754
2755            if escaped {
2756                cooked.push(ch);
2757                escaped = false;
2758                continue;
2759            }
2760
2761            match ch {
2762                '\\' if !in_single => escaped = true,
2763                '\'' if !in_double => in_single = !in_single,
2764                '"' if !in_single => in_double = !in_double,
2765                _ => cooked.push(ch),
2766            }
2767        }
2768
2769        saw_any.then_some(cooked)
2770    }
2771
2772    fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2773        Self::push_capture_char(content, '`');
2774        self.advance();
2775        while let Some(ch) = self.peek_char() {
2776            Self::push_capture_char(content, ch);
2777            self.advance();
2778            if ch == '\\' {
2779                if let Some(esc) = self.peek_char() {
2780                    Self::push_capture_char(content, esc);
2781                    self.advance();
2782                }
2783                continue;
2784            }
2785            if ch == '`' {
2786                break;
2787            }
2788        }
2789    }
2790
2791    fn read_command_subst_pending_heredoc_into(
2792        &mut self,
2793        content: &mut Option<String>,
2794        delimiter: &str,
2795        strip_tabs: bool,
2796    ) -> bool {
2797        loop {
2798            let mut line = String::new();
2799            let mut saw_newline = false;
2800
2801            while let Some(ch) = self.peek_char() {
2802                self.advance();
2803                if ch == '\n' {
2804                    saw_newline = true;
2805                    break;
2806                }
2807                line.push(ch);
2808            }
2809
2810            Self::push_capture_str(content, &line);
2811            if saw_newline {
2812                Self::push_capture_char(content, '\n');
2813            }
2814
2815            if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2816                return true;
2817            }
2818        }
2819    }
2820
2821    fn read_command_subst_into_depth(
2822        &mut self,
2823        content: &mut Option<String>,
2824        subst_depth: usize,
2825    ) -> bool {
2826        if subst_depth >= self.max_subst_depth {
2827            // Depth limit exceeded — consume until matching ')' and emit error token
2828            let mut depth = 1;
2829            while let Some(c) = self.peek_char() {
2830                self.advance();
2831                match c {
2832                    '(' => depth += 1,
2833                    ')' => {
2834                        depth -= 1;
2835                        if depth == 0 {
2836                            Self::push_capture_char(content, ')');
2837                            return true;
2838                        }
2839                    }
2840                    _ => {}
2841                }
2842            }
2843            return false;
2844        }
2845
2846        let mut depth = 1;
2847        let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2848        let mut pending_case_headers = 0usize;
2849        let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2850        let mut current_word = String::with_capacity(16);
2851        let mut at_command_start = true;
2852        let mut expecting_redirection_target = false;
2853        let mut current_word_started_at_command_start = false;
2854        while let Some(c) = self.peek_char() {
2855            match c {
2856                '#' if !self.should_treat_hash_as_word_char() => {
2857                    let had_word = !current_word.is_empty();
2858                    Self::flush_command_subst_keyword(
2859                        &mut current_word,
2860                        &mut pending_case_headers,
2861                        &mut case_clause_depths,
2862                        depth,
2863                        &mut current_word_started_at_command_start,
2864                    );
2865                    if had_word && expecting_redirection_target {
2866                        expecting_redirection_target = false;
2867                    }
2868                    Self::push_capture_char(content, '#');
2869                    self.advance();
2870                    while let Some(comment_ch) = self.peek_char() {
2871                        Self::push_capture_char(content, comment_ch);
2872                        self.advance();
2873                        if comment_ch == '\n' {
2874                            for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2875                                if !self.read_command_subst_pending_heredoc_into(
2876                                    content, &delimiter, strip_tabs,
2877                                ) {
2878                                    return false;
2879                                }
2880                            }
2881                            at_command_start = true;
2882                            expecting_redirection_target = false;
2883                            break;
2884                        }
2885                    }
2886                }
2887                '(' => {
2888                    Self::flush_command_subst_keyword(
2889                        &mut current_word,
2890                        &mut pending_case_headers,
2891                        &mut case_clause_depths,
2892                        depth,
2893                        &mut current_word_started_at_command_start,
2894                    );
2895                    depth += 1;
2896                    Self::push_capture_char(content, c);
2897                    self.advance();
2898                    at_command_start = true;
2899                    expecting_redirection_target = false;
2900                }
2901                ')' => {
2902                    Self::flush_command_subst_keyword(
2903                        &mut current_word,
2904                        &mut pending_case_headers,
2905                        &mut case_clause_depths,
2906                        depth,
2907                        &mut current_word_started_at_command_start,
2908                    );
2909                    if case_clause_depths
2910                        .last()
2911                        .is_some_and(|case_depth| *case_depth == depth)
2912                    {
2913                        Self::push_capture_char(content, ')');
2914                        self.advance();
2915                        at_command_start = true;
2916                        expecting_redirection_target = false;
2917                        continue;
2918                    }
2919                    depth -= 1;
2920                    self.advance();
2921                    if depth == 0 {
2922                        Self::push_capture_char(content, ')');
2923                        return true;
2924                    }
2925                    Self::push_capture_char(content, c);
2926                    at_command_start = false;
2927                    expecting_redirection_target = false;
2928                }
2929                '"' => {
2930                    let had_word = !current_word.is_empty();
2931                    Self::flush_command_subst_keyword(
2932                        &mut current_word,
2933                        &mut pending_case_headers,
2934                        &mut case_clause_depths,
2935                        depth,
2936                        &mut current_word_started_at_command_start,
2937                    );
2938                    if had_word && expecting_redirection_target {
2939                        expecting_redirection_target = false;
2940                    }
2941                    // Nested double-quoted string inside $()
2942                    Self::push_capture_char(content, '"');
2943                    self.advance();
2944                    while let Some(qc) = self.peek_char() {
2945                        match qc {
2946                            '"' => {
2947                                Self::push_capture_char(content, '"');
2948                                self.advance();
2949                                break;
2950                            }
2951                            '\\' => {
2952                                Self::push_capture_char(content, '\\');
2953                                self.advance();
2954                                if let Some(esc) = self.peek_char() {
2955                                    Self::push_capture_char(content, esc);
2956                                    self.advance();
2957                                }
2958                            }
2959                            '$' => {
2960                                Self::push_capture_char(content, '$');
2961                                self.advance();
2962                                if self.peek_char() == Some('(') {
2963                                    if self.second_char() == Some('(') {
2964                                        if !self.read_arithmetic_expansion_into(content) {
2965                                            return false;
2966                                        }
2967                                    } else {
2968                                        Self::push_capture_char(content, '(');
2969                                        self.advance();
2970                                        if !self
2971                                            .read_command_subst_into_depth(content, subst_depth + 1)
2972                                        {
2973                                            return false;
2974                                        }
2975                                    }
2976                                }
2977                            }
2978                            _ => {
2979                                Self::push_capture_char(content, qc);
2980                                self.advance();
2981                            }
2982                        }
2983                    }
2984                    if expecting_redirection_target {
2985                        expecting_redirection_target = false;
2986                    } else {
2987                        at_command_start = false;
2988                    }
2989                }
2990                '\'' => {
2991                    let had_word = !current_word.is_empty();
2992                    Self::flush_command_subst_keyword(
2993                        &mut current_word,
2994                        &mut pending_case_headers,
2995                        &mut case_clause_depths,
2996                        depth,
2997                        &mut current_word_started_at_command_start,
2998                    );
2999                    if had_word && expecting_redirection_target {
3000                        expecting_redirection_target = false;
3001                    }
3002                    // Single-quoted string inside $()
3003                    Self::push_capture_char(content, '\'');
3004                    self.advance();
3005                    while let Some(qc) = self.peek_char() {
3006                        Self::push_capture_char(content, qc);
3007                        self.advance();
3008                        if qc == '\'' {
3009                            break;
3010                        }
3011                    }
3012                    if expecting_redirection_target {
3013                        expecting_redirection_target = false;
3014                    } else {
3015                        at_command_start = false;
3016                    }
3017                }
3018                '`' => {
3019                    let had_word = !current_word.is_empty();
3020                    Self::flush_command_subst_keyword(
3021                        &mut current_word,
3022                        &mut pending_case_headers,
3023                        &mut case_clause_depths,
3024                        depth,
3025                        &mut current_word_started_at_command_start,
3026                    );
3027                    if had_word && expecting_redirection_target {
3028                        expecting_redirection_target = false;
3029                    }
3030                    self.read_command_subst_backtick_segment_into(content);
3031                    if expecting_redirection_target {
3032                        expecting_redirection_target = false;
3033                    } else {
3034                        at_command_start = false;
3035                    }
3036                }
3037                '$' if self.second_char() == Some('\'') => {
3038                    let had_word = !current_word.is_empty();
3039                    Self::flush_command_subst_keyword(
3040                        &mut current_word,
3041                        &mut pending_case_headers,
3042                        &mut case_clause_depths,
3043                        depth,
3044                        &mut current_word_started_at_command_start,
3045                    );
3046                    if had_word && expecting_redirection_target {
3047                        expecting_redirection_target = false;
3048                    }
3049                    Self::push_capture_char(content, '$');
3050                    self.advance();
3051                    Self::push_capture_char(content, '\'');
3052                    self.advance();
3053                    while let Some(qc) = self.peek_char() {
3054                        Self::push_capture_char(content, qc);
3055                        self.advance();
3056                        if qc == '\\' {
3057                            if let Some(esc) = self.peek_char() {
3058                                Self::push_capture_char(content, esc);
3059                                self.advance();
3060                            }
3061                            continue;
3062                        }
3063                        if qc == '\'' {
3064                            break;
3065                        }
3066                    }
3067                    if expecting_redirection_target {
3068                        expecting_redirection_target = false;
3069                    } else {
3070                        at_command_start = false;
3071                    }
3072                }
3073                '\\' => {
3074                    let had_word = !current_word.is_empty();
3075                    Self::flush_command_subst_keyword(
3076                        &mut current_word,
3077                        &mut pending_case_headers,
3078                        &mut case_clause_depths,
3079                        depth,
3080                        &mut current_word_started_at_command_start,
3081                    );
3082                    if had_word && expecting_redirection_target {
3083                        expecting_redirection_target = false;
3084                    }
3085                    Self::push_capture_char(content, '\\');
3086                    self.advance();
3087                    if let Some(esc) = self.peek_char() {
3088                        Self::push_capture_char(content, esc);
3089                        self.advance();
3090                    }
3091                    if expecting_redirection_target {
3092                        expecting_redirection_target = false;
3093                    } else {
3094                        at_command_start = false;
3095                    }
3096                }
3097                '<' if self.second_char() == Some('<') => {
3098                    let word_was_redirection_fd = current_word_started_at_command_start
3099                        && !current_word.is_empty()
3100                        && current_word.chars().all(|current| current.is_ascii_digit());
3101                    Self::flush_command_subst_keyword(
3102                        &mut current_word,
3103                        &mut pending_case_headers,
3104                        &mut case_clause_depths,
3105                        depth,
3106                        &mut current_word_started_at_command_start,
3107                    );
3108                    if word_was_redirection_fd {
3109                        at_command_start = true;
3110                    }
3111
3112                    Self::push_capture_char(content, '<');
3113                    self.advance();
3114                    Self::push_capture_char(content, '<');
3115                    self.advance();
3116
3117                    if self.peek_char() == Some('<') {
3118                        Self::push_capture_char(content, '<');
3119                        self.advance();
3120                        expecting_redirection_target = true;
3121                        continue;
3122                    }
3123
3124                    let strip_tabs = if self.peek_char() == Some('-') {
3125                        Self::push_capture_char(content, '-');
3126                        self.advance();
3127                        true
3128                    } else {
3129                        false
3130                    };
3131
3132                    if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3133                    {
3134                        pending_heredocs.push((delimiter, strip_tabs));
3135                        expecting_redirection_target = false;
3136                    } else {
3137                        expecting_redirection_target = true;
3138                    }
3139                }
3140                '>' | '<' => {
3141                    let word_was_redirection_fd = current_word_started_at_command_start
3142                        && !current_word.is_empty()
3143                        && current_word.chars().all(|current| current.is_ascii_digit());
3144                    Self::flush_command_subst_keyword(
3145                        &mut current_word,
3146                        &mut pending_case_headers,
3147                        &mut case_clause_depths,
3148                        depth,
3149                        &mut current_word_started_at_command_start,
3150                    );
3151                    if word_was_redirection_fd {
3152                        at_command_start = true;
3153                    }
3154                    Self::push_capture_char(content, c);
3155                    self.advance();
3156                    expecting_redirection_target = true;
3157                }
3158                '\n' => {
3159                    Self::flush_command_subst_keyword(
3160                        &mut current_word,
3161                        &mut pending_case_headers,
3162                        &mut case_clause_depths,
3163                        depth,
3164                        &mut current_word_started_at_command_start,
3165                    );
3166                    Self::push_capture_char(content, '\n');
3167                    self.advance();
3168                    for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3169                        if !self.read_command_subst_pending_heredoc_into(
3170                            content, &delimiter, strip_tabs,
3171                        ) {
3172                            return false;
3173                        }
3174                    }
3175                    at_command_start = true;
3176                    expecting_redirection_target = false;
3177                }
3178                _ => {
3179                    if c.is_ascii_alphanumeric() || c == '_' {
3180                        if current_word.is_empty()
3181                            && !expecting_redirection_target
3182                            && at_command_start
3183                        {
3184                            current_word_started_at_command_start = true;
3185                            at_command_start = false;
3186                        }
3187                        current_word.push(c);
3188                    } else {
3189                        let had_word = !current_word.is_empty();
3190                        Self::flush_command_subst_keyword(
3191                            &mut current_word,
3192                            &mut pending_case_headers,
3193                            &mut case_clause_depths,
3194                            depth,
3195                            &mut current_word_started_at_command_start,
3196                        );
3197                        if had_word && expecting_redirection_target {
3198                            expecting_redirection_target = false;
3199                        }
3200                        match c {
3201                            ' ' | '\t' => {}
3202                            ';' | '|' | '&' => {
3203                                at_command_start = true;
3204                                expecting_redirection_target = false;
3205                            }
3206                            _ => {
3207                                if !expecting_redirection_target {
3208                                    at_command_start = false;
3209                                }
3210                            }
3211                        }
3212                    }
3213                    Self::push_capture_char(content, c);
3214                    self.advance();
3215                }
3216            }
3217        }
3218
3219        false
3220    }
3221
3222    /// Read parameter expansion content after `${`, handling nested braces and quotes.
3223    /// In bash, quotes inside `${...}` (e.g. `${arr["key"]}`) don't terminate the
3224    /// outer double-quoted string. Appends chars including closing `}` to `content`.
3225    fn read_param_expansion_into(
3226        &mut self,
3227        content: &mut Option<String>,
3228        segment_start: Position,
3229    ) -> bool {
3230        let mut borrowable = true;
3231        let mut depth = 1;
3232        let mut literal_brace_depth = 0usize;
3233        let mut in_single = false;
3234        let mut in_double = false;
3235        let mut double_quote_depth = 0usize;
3236        while let Some(c) = self.peek_char() {
3237            if in_single {
3238                match c {
3239                    '\\' => {
3240                        let escape_start = self.current_position();
3241                        if self.second_char() == Some('"') {
3242                            self.advance();
3243                            borrowable = false;
3244                            self.ensure_capture_from_source(content, segment_start, escape_start);
3245                            Self::push_capture_char(content, '"');
3246                            self.advance();
3247                        } else {
3248                            Self::push_capture_char(content, '\\');
3249                            self.advance();
3250                        }
3251                    }
3252                    '\'' => {
3253                        Self::push_capture_char(content, c);
3254                        self.advance();
3255                        in_single = false;
3256                    }
3257                    _ => {
3258                        Self::push_capture_char(content, c);
3259                        self.advance();
3260                    }
3261                }
3262                continue;
3263            }
3264
3265            match c {
3266                '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3267                    self.advance();
3268                    Self::push_capture_char(content, '}');
3269                    if depth == 1
3270                        && literal_brace_depth > 0
3271                        && self.has_later_top_level_param_expansion_closer(depth)
3272                    {
3273                        literal_brace_depth -= 1;
3274                        continue;
3275                    }
3276                    depth -= 1;
3277                    if depth == 0 {
3278                        break;
3279                    }
3280                }
3281                '{' if !in_single && !in_double => {
3282                    literal_brace_depth += 1;
3283                    Self::push_capture_char(content, '{');
3284                    self.advance();
3285                }
3286                '"' => {
3287                    // Quotes inside ${...} are part of the expansion, not string delimiters
3288                    Self::push_capture_char(content, '"');
3289                    self.advance();
3290                    in_double = !in_double;
3291                    double_quote_depth = if in_double { depth } else { 0 };
3292                }
3293                '\'' => {
3294                    Self::push_capture_char(content, '\'');
3295                    self.advance();
3296                    if !in_double {
3297                        in_single = true;
3298                    }
3299                }
3300                '\\' => {
3301                    // Inside ${...} within double quotes, same escape rules apply:
3302                    // \", \\, \$, \` produce the escaped char; others keep backslash
3303                    let escape_start = self.current_position();
3304                    self.advance();
3305                    if let Some(esc) = self.peek_char() {
3306                        match esc {
3307                            '$' => {
3308                                borrowable = false;
3309                                self.ensure_capture_from_source(
3310                                    content,
3311                                    segment_start,
3312                                    escape_start,
3313                                );
3314                                Self::push_capture_char(content, '\x00');
3315                                Self::push_capture_char(content, '$');
3316                                self.advance();
3317                            }
3318                            '"' | '\\' | '`' => {
3319                                borrowable = false;
3320                                self.ensure_capture_from_source(
3321                                    content,
3322                                    segment_start,
3323                                    escape_start,
3324                                );
3325                                Self::push_capture_char(content, esc);
3326                                self.advance();
3327                            }
3328                            '}' => {
3329                                // \} should be a literal } without closing the expansion
3330                                Self::push_capture_char(content, '\\');
3331                                Self::push_capture_char(content, '}');
3332                                self.advance();
3333                                literal_brace_depth = literal_brace_depth.saturating_sub(1);
3334                            }
3335                            _ => {
3336                                Self::push_capture_char(content, '\\');
3337                                Self::push_capture_char(content, esc);
3338                                self.advance();
3339                            }
3340                        }
3341                    } else {
3342                        Self::push_capture_char(content, '\\');
3343                    }
3344                }
3345                '$' => {
3346                    Self::push_capture_char(content, '$');
3347                    self.advance();
3348                    if self.peek_char() == Some('(') {
3349                        if self.second_char() == Some('(') {
3350                            if !self.read_arithmetic_expansion_into(content) {
3351                                borrowable = false;
3352                            }
3353                        } else {
3354                            Self::push_capture_char(content, '(');
3355                            self.advance();
3356                            self.read_command_subst_into(content);
3357                        }
3358                    } else if self.peek_char() == Some('{') {
3359                        Self::push_capture_char(content, '{');
3360                        self.advance();
3361                        borrowable &= self.read_param_expansion_into(content, segment_start);
3362                    }
3363                }
3364                _ => {
3365                    Self::push_capture_char(content, c);
3366                    self.advance();
3367                }
3368            }
3369        }
3370        borrowable
3371    }
3372
3373    fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3374        let mut chars = self.lookahead_chars().peekable();
3375        let mut depth = target_depth;
3376        let mut in_single = false;
3377        let mut in_double = false;
3378        let mut double_quote_depth = 0usize;
3379
3380        while let Some(ch) = chars.next() {
3381            if in_single {
3382                match ch {
3383                    '\'' => in_single = false,
3384                    '\\' if chars.peek() == Some(&'"') => {
3385                        chars.next();
3386                    }
3387                    '\\' => {}
3388                    _ => {}
3389                }
3390                continue;
3391            }
3392
3393            if in_double {
3394                match ch {
3395                    '"' => {
3396                        in_double = false;
3397                        double_quote_depth = 0;
3398                    }
3399                    '\\' => {
3400                        chars.next();
3401                    }
3402                    '$' if chars.peek() == Some(&'{') => {
3403                        chars.next();
3404                        depth += 1;
3405                    }
3406                    '}' if depth > double_quote_depth => {
3407                        depth -= 1;
3408                    }
3409                    _ => {}
3410                }
3411                continue;
3412            }
3413
3414            match ch {
3415                '\n' if depth == target_depth => return false,
3416                '\'' => in_single = true,
3417                '"' => {
3418                    in_double = true;
3419                    double_quote_depth = depth;
3420                }
3421                '\\' => {
3422                    chars.next();
3423                }
3424                '$' if chars.peek() == Some(&'{') => {
3425                    chars.next();
3426                    depth += 1;
3427                }
3428                '}' => {
3429                    if depth == target_depth {
3430                        return true;
3431                    }
3432                    depth -= 1;
3433                }
3434                _ => {}
3435            }
3436        }
3437
3438        false
3439    }
3440
3441    /// Check if the content starting with { looks like a brace expansion
3442    /// Brace expansion: {a,b,c} or {1..5} (contains , or ..)
3443    /// Brace group: { cmd; } (contains spaces, semicolons, newlines)
3444    /// Caps lookahead to prevent O(n^2) scanning when input
3445    /// contains many unmatched `{` characters (issue #997).
3446    fn looks_like_brace_expansion(&self) -> bool {
3447        const MAX_LOOKAHEAD: usize = 10_000;
3448
3449        let mut chars = self.lookahead_chars();
3450
3451        // Skip the opening {
3452        if chars.next() != Some('{') {
3453            return false;
3454        }
3455
3456        let mut depth = 1;
3457        let mut paren_depth = 0usize;
3458        let mut has_comma = false;
3459        let mut has_dot_dot = false;
3460        let mut escaped = false;
3461        let mut in_single = false;
3462        let mut in_double = false;
3463        let mut in_backtick = false;
3464        let mut prev_char = None;
3465        let mut scanned = 0usize;
3466
3467        for ch in chars {
3468            scanned += 1;
3469            if scanned > MAX_LOOKAHEAD {
3470                return false;
3471            }
3472
3473            let brace_surface_active = !in_single && !in_double && !in_backtick;
3474            let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3475
3476            match ch {
3477                _ if escaped => {
3478                    escaped = false;
3479                }
3480                '\\' if !in_single => escaped = true,
3481                '\'' if !in_double && !in_backtick => in_single = !in_single,
3482                '"' if !in_single && !in_backtick => in_double = !in_double,
3483                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3484                '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3485                    paren_depth += 1
3486                }
3487                ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3488                '{' if !in_single && !in_double && !in_backtick => depth += 1,
3489                '}' if !in_single && !in_double && !in_backtick => {
3490                    depth -= 1;
3491                    if depth == 0 {
3492                        // Found matching }, check if we have brace expansion markers
3493                        return has_comma || has_dot_dot;
3494                    }
3495                }
3496                ',' if at_top_level => has_comma = true,
3497                '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3498                // Brace groups have whitespace/newlines/semicolons at depth 1
3499                ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3500                _ => {}
3501            }
3502            prev_char = Some(ch);
3503        }
3504
3505        false
3506    }
3507
3508    fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3509        let mut brace_depth = 1usize;
3510        let mut paren_depth = 0usize;
3511        let mut escaped = false;
3512        let mut in_single = false;
3513        let mut in_double = false;
3514        let mut in_backtick = false;
3515        let mut prev_char = None;
3516
3517        while let Some(ch) = self.peek_char() {
3518            Self::push_capture_char(word, ch);
3519            self.advance();
3520
3521            if escaped {
3522                escaped = false;
3523                prev_char = Some(ch);
3524                continue;
3525            }
3526
3527            match ch {
3528                '\\' if !in_single => escaped = true,
3529                '\'' if !in_double && !in_backtick => in_single = !in_single,
3530                '"' if !in_single && !in_backtick => in_double = !in_double,
3531                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3532                '(' if !in_single
3533                    && !in_double
3534                    && !in_backtick
3535                    && (paren_depth > 0 || prev_char == Some('$')) =>
3536                {
3537                    paren_depth += 1
3538                }
3539                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3540                    paren_depth -= 1
3541                }
3542                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3543                '}' if !in_single && !in_double && !in_backtick => {
3544                    brace_depth -= 1;
3545                    if brace_depth == 0 {
3546                        break;
3547                    }
3548                }
3549                _ => {}
3550            }
3551
3552            prev_char = Some(ch);
3553        }
3554    }
3555
3556    fn consume_brace_word_body(&mut self, word: &mut String) {
3557        let mut brace_depth = 1usize;
3558        let mut paren_depth = 0usize;
3559        let mut escaped = false;
3560        let mut in_single = false;
3561        let mut in_double = false;
3562        let mut in_backtick = false;
3563        let mut prev_char = None;
3564
3565        while let Some(ch) = self.peek_char() {
3566            word.push(ch);
3567            self.advance();
3568
3569            if escaped {
3570                escaped = false;
3571                prev_char = Some(ch);
3572                continue;
3573            }
3574
3575            match ch {
3576                '\\' if !in_single => escaped = true,
3577                '\'' if !in_double && !in_backtick => in_single = !in_single,
3578                '"' if !in_single && !in_backtick => in_double = !in_double,
3579                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3580                '(' if !in_single
3581                    && !in_double
3582                    && !in_backtick
3583                    && (paren_depth > 0 || prev_char == Some('$')) =>
3584                {
3585                    paren_depth += 1
3586                }
3587                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3588                    paren_depth -= 1
3589                }
3590                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3591                '}' if !in_single && !in_double && !in_backtick => {
3592                    brace_depth -= 1;
3593                    if brace_depth == 0 {
3594                        break;
3595                    }
3596                }
3597                _ => {}
3598            }
3599
3600            prev_char = Some(ch);
3601        }
3602    }
3603
3604    /// Check whether a mid-word `{...}` segment can stay attached to the current
3605    /// word without crossing a top-level word boundary.
3606    fn looks_like_mid_word_brace_segment(&self) -> bool {
3607        const MAX_LOOKAHEAD: usize = 10_000;
3608
3609        let mut chars = self.lookahead_chars();
3610        if chars.next() != Some('{') {
3611            return false;
3612        }
3613
3614        let mut brace_depth = 1;
3615        let mut paren_depth = 0usize;
3616        let mut escaped = false;
3617        let mut in_single = false;
3618        let mut in_double = false;
3619        let mut in_backtick = false;
3620        let mut prev_char = None;
3621        let mut scanned = 0usize;
3622
3623        for ch in chars {
3624            scanned += 1;
3625            if scanned > MAX_LOOKAHEAD {
3626                return false;
3627            }
3628
3629            if !in_single
3630                && !in_double
3631                && !in_backtick
3632                && !escaped
3633                && brace_depth == 1
3634                && paren_depth == 0
3635                && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3636            {
3637                return false;
3638            }
3639
3640            if escaped {
3641                escaped = false;
3642                prev_char = Some(ch);
3643                continue;
3644            }
3645
3646            match ch {
3647                '\\' => escaped = true,
3648                '\'' if !in_double && !in_backtick => in_single = !in_single,
3649                '"' if !in_single && !in_backtick => in_double = !in_double,
3650                '`' if !in_single && !in_double => in_backtick = !in_backtick,
3651                '(' if !in_single
3652                    && !in_double
3653                    && !in_backtick
3654                    && (paren_depth > 0 || prev_char == Some('$')) =>
3655                {
3656                    paren_depth += 1
3657                }
3658                ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3659                    paren_depth -= 1
3660                }
3661                '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3662                '}' if !in_single && !in_double && !in_backtick => {
3663                    brace_depth -= 1;
3664                    if brace_depth == 0 {
3665                        return true;
3666                    }
3667                }
3668                _ => {}
3669            }
3670
3671            prev_char = Some(ch);
3672        }
3673
3674        false
3675    }
3676
3677    /// Check if { is followed by whitespace (brace group start)
3678    fn is_brace_group_start(&self) -> bool {
3679        let mut chars = self.lookahead_chars();
3680        // Skip the opening {
3681        if chars.next() != Some('{') {
3682            return false;
3683        }
3684        // If next char is whitespace or newline, it's a brace group
3685        matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3686    }
3687
3688    /// Check whether the text after an escaped `{` looks like a brace-expansion
3689    /// surface that should stay attached to the current word, e.g. `\{a,b}`.
3690    fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3691        const MAX_LOOKAHEAD: usize = 10_000;
3692
3693        let mut chars = self.lookahead_chars();
3694        let mut depth = 1;
3695        let mut has_comma = false;
3696        let mut has_dot_dot = false;
3697        let mut prev_char = None;
3698        let mut scanned = 0usize;
3699
3700        for ch in chars.by_ref() {
3701            scanned += 1;
3702            if scanned > MAX_LOOKAHEAD {
3703                return false;
3704            }
3705            match ch {
3706                '{' => depth += 1,
3707                '}' => {
3708                    depth -= 1;
3709                    if depth == 0 {
3710                        return has_comma || has_dot_dot;
3711                    }
3712                }
3713                ',' if depth == 1 => has_comma = true,
3714                '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3715                ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3716                _ => {}
3717            }
3718            prev_char = Some(ch);
3719        }
3720
3721        false
3722    }
3723
3724    fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3725        let mut chars = self.lookahead_chars();
3726        if chars.next() != Some('{') {
3727            return false;
3728        }
3729        chars.next() == Some(')')
3730    }
3731
3732    /// Read a {literal} pattern without comma/dot-dot as a word
3733    fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3734        let mut word = String::with_capacity(16);
3735
3736        if let Some('{') = self.peek_char() {
3737            word.push('{');
3738            self.advance();
3739        } else {
3740            return None;
3741        }
3742
3743        self.consume_brace_word_body(&mut word);
3744
3745        while let Some(ch) = self.peek_char() {
3746            if Self::is_word_char(ch) {
3747                if self.reinject_buf.is_empty() {
3748                    let chunk = self.cursor.eat_while(Self::is_word_char);
3749                    word.push_str(chunk);
3750                    self.advance_scanned_source_bytes(chunk.len());
3751                } else {
3752                    word.push(ch);
3753                    self.advance();
3754                }
3755            } else {
3756                break;
3757            }
3758        }
3759
3760        Some(LexedToken::owned_word(TokenKind::Word, word))
3761    }
3762
3763    /// Read a brace expansion pattern as a word
3764    fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3765        let mut word = String::with_capacity(16);
3766
3767        // Read the opening {
3768        if let Some('{') = self.peek_char() {
3769            word.push('{');
3770            self.advance();
3771        } else {
3772            return None;
3773        }
3774
3775        // Read until matching }
3776        self.consume_brace_word_body(&mut word);
3777
3778        // Continue reading any suffix after the brace pattern
3779        while let Some(ch) = self.peek_char() {
3780            if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3781                if ch == '{' {
3782                    // Another brace pattern - include it
3783                    word.push(ch);
3784                    self.advance();
3785                    self.consume_brace_word_body(&mut word);
3786                } else {
3787                    word.push(ch);
3788                    self.advance();
3789                }
3790            } else {
3791                break;
3792            }
3793        }
3794
3795        Some(LexedToken::owned_word(TokenKind::Word, word))
3796    }
3797
3798    /// Peek ahead (without consuming) to see if `=(` starts an associative
3799    /// compound assignment like `([key]=val ...)`.  Returns true when the
3800    /// first non-whitespace char after `(` is `[`.
3801    fn looks_like_assoc_assign(&self) -> bool {
3802        let mut chars = self.lookahead_chars();
3803        // Skip the `(` we haven't consumed yet
3804        if chars.next() != Some('(') {
3805            return false;
3806        }
3807        // Skip optional whitespace
3808        for ch in chars {
3809            match ch {
3810                ' ' | '\t' => continue,
3811                '[' => return true,
3812                _ => return false,
3813            }
3814        }
3815        false
3816    }
3817
3818    fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3819        text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3820    }
3821
3822    fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3823        word.segments().any(|segment| {
3824            matches!(
3825                segment.kind(),
3826                LexedWordSegmentKind::SingleQuoted
3827                    | LexedWordSegmentKind::DollarSingleQuoted
3828                    | LexedWordSegmentKind::DoubleQuoted
3829                    | LexedWordSegmentKind::DollarDoubleQuoted
3830            )
3831        }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3832    }
3833
3834    fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3835        text.contains(['*', '?'])
3836            || text.ends_with('}') && text.contains("${")
3837            || text.ends_with(']')
3838                && text
3839                    .rfind('[')
3840                    .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3841    }
3842
3843    fn is_word_char(ch: char) -> bool {
3844        !matches!(
3845            ch,
3846            ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3847        )
3848    }
3849
3850    const fn is_ascii_word_byte(byte: u8) -> bool {
3851        !matches!(
3852            byte,
3853            b' ' | b'\t'
3854                | b'\n'
3855                | b';'
3856                | b'|'
3857                | b'&'
3858                | b'>'
3859                | b'<'
3860                | b'('
3861                | b')'
3862                | b'{'
3863                | b'}'
3864                | b'\''
3865                | b'"'
3866        )
3867    }
3868
3869    const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3870        Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3871    }
3872
3873    fn is_plain_word_char(ch: char) -> bool {
3874        Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3875    }
3876
3877    /// Read here document content until the delimiter line is found
3878    pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3879        let mut content = String::with_capacity(64);
3880        let mut current_line = String::with_capacity(64);
3881
3882        // Save rest of current line (after the delimiter token on the command line).
3883        // For `cat <<EOF | sort`, this captures ` | sort` so the parser can
3884        // tokenize the pipe and subsequent command after the heredoc body.
3885        //
3886        // Quoted strings may span multiple lines (e.g., `cat <<EOF; echo "two\nthree"`),
3887        // so we track quoting state and continue across newlines until quotes close.
3888        let mut rest_of_line = String::with_capacity(32);
3889        let rest_of_line_start = self.current_position();
3890        let mut in_double_quote = false;
3891        let mut in_single_quote = false;
3892        let mut in_comment = false;
3893        let mut saw_non_whitespace_tail = false;
3894        let mut consecutive_backslashes = 0usize;
3895        let mut previous_tail_char = None;
3896        while let Some(ch) = self.peek_char() {
3897            self.advance();
3898            if in_comment {
3899                if ch == '\n' {
3900                    break;
3901                }
3902                rest_of_line.push(ch);
3903                previous_tail_char = Some(ch);
3904                continue;
3905            }
3906            if ch == '#'
3907                && !in_single_quote
3908                && !in_double_quote
3909                && self.comments_enabled()
3910                && heredoc_tail_hash_starts_comment(previous_tail_char)
3911            {
3912                in_comment = true;
3913                rest_of_line.push(ch);
3914                previous_tail_char = Some(ch);
3915                consecutive_backslashes = 0;
3916                continue;
3917            }
3918            let backslash_continues_line = ch == '\\'
3919                && !in_single_quote
3920                && self.peek_char() == Some('\n')
3921                && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3922                && consecutive_backslashes.is_multiple_of(2);
3923            if backslash_continues_line {
3924                rest_of_line.push(ch);
3925                rest_of_line.push('\n');
3926                self.advance();
3927                consecutive_backslashes = 0;
3928                continue;
3929            }
3930            if ch == '\n' && !in_double_quote && !in_single_quote {
3931                break;
3932            }
3933            if ch == '"' && !in_single_quote {
3934                in_double_quote = !in_double_quote;
3935            } else if ch == '\'' && !in_double_quote {
3936                in_single_quote = !in_single_quote;
3937            } else if ch == '\\' && in_double_quote {
3938                // Escaped char inside double quotes — skip the next char too
3939                rest_of_line.push(ch);
3940                if let Some(next) = self.peek_char() {
3941                    rest_of_line.push(next);
3942                    self.advance();
3943                }
3944                continue;
3945            }
3946            rest_of_line.push(ch);
3947            if !ch.is_whitespace() {
3948                saw_non_whitespace_tail = true;
3949            }
3950            if ch == '\\' && !in_single_quote {
3951                consecutive_backslashes += 1;
3952            } else {
3953                consecutive_backslashes = 0;
3954            }
3955            previous_tail_char = Some(ch);
3956        }
3957
3958        // If we just drained a heredoc replay buffer (for example when multiple
3959        // heredocs share one command line), resume tracking from the true cursor
3960        // position before we measure the body span.
3961        self.sync_offset_to_cursor();
3962        let content_start = self.current_position();
3963        let mut current_line_start = content_start;
3964        let content_end;
3965
3966        // Read lines until we find the delimiter
3967        loop {
3968            if self.reinject_buf.is_empty() {
3969                // When the body reading drains a reinject buffer (from a
3970                // previous heredoc on the same command line), the virtual
3971                // offset drifts away from the cursor. Snap it back before
3972                // any source-based work so spans and `post_heredoc_offset`
3973                // stay within bounds.
3974                self.sync_offset_to_cursor();
3975                let rest = self.cursor.rest();
3976                if rest.is_empty() {
3977                    content_end = self.current_position();
3978                    break;
3979                }
3980
3981                let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3982                let line = &rest[..line_len];
3983                let has_newline = line_len < rest.len();
3984
3985                if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3986                    content_end = current_line_start;
3987                    self.consume_source_bytes(line_len);
3988                    if has_newline {
3989                        self.consume_ascii_chars(1);
3990                    }
3991                    break;
3992                }
3993
3994                content.push_str(line);
3995                self.consume_source_bytes(line_len);
3996
3997                if has_newline {
3998                    self.consume_ascii_chars(1);
3999                    content.push('\n');
4000                    current_line_start = self.current_position();
4001                    continue;
4002                }
4003
4004                content_end = self.current_position();
4005                break;
4006            }
4007
4008            match self.peek_char() {
4009                Some('\n') => {
4010                    self.advance();
4011                    // Check if current line matches delimiter
4012                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4013                        content_end = current_line_start;
4014                        break;
4015                    }
4016                    content.push_str(&current_line);
4017                    content.push('\n');
4018                    current_line.clear();
4019                    current_line_start = self.current_position();
4020                }
4021                Some(ch) => {
4022                    current_line.push(ch);
4023                    self.advance();
4024                }
4025                None => {
4026                    // End of input - check last line
4027                    if heredoc_line_matches_delimiter(&current_line, delimiter, strip_tabs) {
4028                        content_end = current_line_start;
4029                        break;
4030                    }
4031                    if !current_line.is_empty() {
4032                        content.push_str(&current_line);
4033                    }
4034                    content_end = self.current_position();
4035                    break;
4036                }
4037            }
4038        }
4039
4040        // Re-inject the command-line tail so subsequent same-line tokens (pipes,
4041        // redirects, command words, additional heredocs) stay visible to the
4042        // parser. Always replay a terminating newline so parsing stops before
4043        // tokens that originally lived on later source lines, like `}` or `do`.
4044        let post_heredoc_offset = self.offset;
4045        self.offset = rest_of_line_start.offset;
4046        for ch in rest_of_line.chars() {
4047            self.reinject_buf.push_back(ch);
4048        }
4049        self.reinject_buf.push_back('\n');
4050        self.reinject_resume_offset = Some(post_heredoc_offset);
4051
4052        HeredocRead {
4053            content,
4054            content_span: Span::from_positions(content_start, content_end),
4055        }
4056    }
4057
4058    fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4059        let mut chars = self.cursor.rest().chars();
4060        if chars.next() != Some('\n') {
4061            return false;
4062        }
4063
4064        for ch in chars {
4065            if matches!(ch, ' ' | '\t') {
4066                continue;
4067            }
4068            if ch == '\n' {
4069                return false;
4070            }
4071            return matches!(ch, '|' | '&' | ';' | '<' | '>')
4072                || (ch == '#' && self.comments_enabled());
4073        }
4074
4075        false
4076    }
4077}
4078
4079fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4080    let line = if strip_tabs {
4081        line.trim_start_matches('\t')
4082    } else {
4083        line
4084    };
4085
4086    if line == delimiter {
4087        return true;
4088    }
4089
4090    let Some(trailing) = line.strip_prefix(delimiter) else {
4091        return false;
4092    };
4093
4094    trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4095}
4096
4097fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4098    previous_tail_char.is_none_or(|prev| {
4099        prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4100    })
4101}
4102
4103fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4104    let ch = input.get(index..)?.chars().next()?;
4105    Some((ch, index + ch.len_utf8()))
4106}
4107
4108fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4109    let mut index = 0usize;
4110    let mut depth = 0usize;
4111    let mut in_single = false;
4112    let mut in_double = false;
4113    let mut in_backtick = false;
4114    let mut escaped = false;
4115
4116    while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4117        let was_escaped = escaped;
4118        if ch == '\\' && !in_single {
4119            escaped = !escaped;
4120            index = next_index;
4121            continue;
4122        }
4123        escaped = false;
4124
4125        match ch {
4126            '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4127            '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4128            '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4129            '(' if !in_single
4130                && !in_double
4131                && !in_backtick
4132                && !was_escaped
4133                && prefix[next_index..].starts_with('(') =>
4134            {
4135                depth += 1;
4136                index = next_index + '('.len_utf8();
4137                continue;
4138            }
4139            ')' if !in_single
4140                && !in_double
4141                && !in_backtick
4142                && !was_escaped
4143                && prefix[next_index..].starts_with(')') =>
4144            {
4145                depth = depth.saturating_sub(1);
4146                index = next_index + ')'.len_utf8();
4147                continue;
4148            }
4149            _ => {}
4150        }
4151
4152        index = next_index;
4153    }
4154
4155    depth > 0
4156}
4157
4158fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4159    let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4160    let prefix = &input[line_start..index];
4161    line_has_unclosed_double_paren(prefix)
4162}
4163
4164fn hash_starts_comment(input: &str, index: usize) -> bool {
4165    if inside_unclosed_double_paren_on_line(input, index) {
4166        return false;
4167    }
4168
4169    let next = &input[index + '#'.len_utf8()..];
4170    input[..index]
4171        .chars()
4172        .next_back()
4173        .is_none_or(|prev| match prev {
4174            '(' => {
4175                let whitespace_index = next.find(char::is_whitespace);
4176                let close_index = next.find(')');
4177
4178                match (whitespace_index, close_index) {
4179                    (Some(whitespace), Some(close)) => whitespace < close,
4180                    (Some(_), None) | (None, None) => true,
4181                    (None, Some(_)) => false,
4182                }
4183            }
4184            _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4185        })
4186}
4187
4188fn heredoc_delimiter_is_terminator(
4189    ch: char,
4190    in_single: bool,
4191    in_double: bool,
4192    escaped: bool,
4193) -> bool {
4194    !in_single
4195        && !in_double
4196        && !escaped
4197        && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4198}
4199
4200fn scan_double_quoted_command_substitution_segment(
4201    input: &str,
4202    mut index: usize,
4203    subst_depth: usize,
4204) -> Option<usize> {
4205    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4206        match ch {
4207            '"' => return Some(next_index),
4208            '\\' => {
4209                index = next_index;
4210                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4211                    index = escaped_next;
4212                }
4213            }
4214            '$' if input[next_index..].starts_with('{') => {
4215                let consumed = scan_command_subst_parameter_expansion_len(
4216                    &input[next_index + '{'.len_utf8()..],
4217                    subst_depth,
4218                )?;
4219                index = next_index + '{'.len_utf8() + consumed;
4220            }
4221            '$' if input[next_index..].starts_with('(')
4222                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4223            {
4224                let consumed = scan_command_substitution_body_len_inner(
4225                    &input[next_index + '('.len_utf8()..],
4226                    subst_depth + 1,
4227                )?;
4228                index = next_index + '('.len_utf8() + consumed;
4229            }
4230            _ => index = next_index,
4231        }
4232    }
4233
4234    None
4235}
4236
4237fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4238    let mut index = 0usize;
4239    let mut in_single = false;
4240    let mut in_double = false;
4241    let mut in_ansi_c_single = false;
4242    let mut in_backtick = false;
4243    let mut escaped = false;
4244    let mut ansi_c_quote_pending = false;
4245
4246    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4247        let was_escaped = escaped;
4248        if ch == '\\' && !in_single {
4249            escaped = !escaped;
4250            index = next_index;
4251            ansi_c_quote_pending = false;
4252            continue;
4253        }
4254        escaped = false;
4255
4256        if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4257            if input[next_index..].starts_with('{')
4258                && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4259                    &input[next_index + '{'.len_utf8()..],
4260                    subst_depth,
4261                )
4262            {
4263                index = next_index + '{'.len_utf8() + consumed;
4264                ansi_c_quote_pending = false;
4265                continue;
4266            }
4267
4268            if input[next_index..].starts_with('(')
4269                && !input[next_index + '('.len_utf8()..].starts_with('(')
4270                && let Some(consumed) = scan_command_substitution_body_len_inner(
4271                    &input[next_index + '('.len_utf8()..],
4272                    subst_depth + 1,
4273                )
4274            {
4275                index = next_index + '('.len_utf8() + consumed;
4276                ansi_c_quote_pending = false;
4277                continue;
4278            }
4279        }
4280
4281        if !in_single
4282            && !in_ansi_c_single
4283            && !in_double
4284            && !in_backtick
4285            && !was_escaped
4286            && matches!(ch, '<' | '>')
4287            && input[next_index..].starts_with('(')
4288            && let Some(consumed) = scan_command_substitution_body_len_inner(
4289                &input[next_index + '('.len_utf8()..],
4290                subst_depth + 1,
4291            )
4292        {
4293            index = next_index + '('.len_utf8() + consumed;
4294            ansi_c_quote_pending = false;
4295            continue;
4296        }
4297
4298        match ch {
4299            '\'' if !in_double && !in_backtick && !was_escaped => {
4300                if in_ansi_c_single {
4301                    in_ansi_c_single = false;
4302                } else if !in_single && ansi_c_quote_pending {
4303                    in_ansi_c_single = true;
4304                } else {
4305                    in_single = !in_single;
4306                }
4307            }
4308            '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4309                in_double = !in_double
4310            }
4311            '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4312                in_backtick = !in_backtick
4313            }
4314            '}' if !in_single
4315                && !in_ansi_c_single
4316                && !in_double
4317                && !in_backtick
4318                && !was_escaped =>
4319            {
4320                return Some(next_index);
4321            }
4322            _ => {}
4323        }
4324
4325        ansi_c_quote_pending = ch == '$'
4326            && !in_single
4327            && !in_ansi_c_single
4328            && !in_double
4329            && !in_backtick
4330            && !was_escaped;
4331        index = next_index;
4332    }
4333
4334    None
4335}
4336
4337fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4338    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4339        if !matches!(ch, ' ' | '\t') {
4340            break;
4341        }
4342        index = next_index;
4343    }
4344
4345    let start = index;
4346    let mut cooked = String::new();
4347    let mut in_single = false;
4348    let mut in_double = false;
4349    let mut escaped = false;
4350
4351    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4352        if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4353            break;
4354        }
4355
4356        index = next_index;
4357        if escaped {
4358            cooked.push(ch);
4359            escaped = false;
4360            continue;
4361        }
4362
4363        match ch {
4364            '\\' if !in_single => escaped = true,
4365            '\'' if !in_double => in_single = !in_single,
4366            '"' if !in_single => in_double = !in_double,
4367            _ => cooked.push(ch),
4368        }
4369    }
4370
4371    (index > start).then_some((index, cooked))
4372}
4373
4374fn skip_command_subst_pending_heredoc(
4375    input: &str,
4376    mut index: usize,
4377    delimiter: &str,
4378    strip_tabs: bool,
4379) -> usize {
4380    while index <= input.len() {
4381        let rest = &input[index..];
4382        let line_len = rest.find('\n').unwrap_or(rest.len());
4383        let line = &rest[..line_len];
4384        let has_newline = line_len < rest.len();
4385
4386        index += line_len;
4387        if has_newline {
4388            index += '\n'.len_utf8();
4389        }
4390
4391        if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4392            return index;
4393        }
4394    }
4395
4396    index
4397}
4398
4399fn scan_command_subst_ansi_c_single_quoted_segment(
4400    input: &str,
4401    quote_index: usize,
4402) -> Option<usize> {
4403    let mut index = quote_index + '\''.len_utf8();
4404
4405    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4406        index = next_index;
4407        if ch == '\\' {
4408            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4409                index = escaped_next;
4410            }
4411            continue;
4412        }
4413
4414        if ch == '\'' {
4415            return Some(index);
4416        }
4417    }
4418
4419    None
4420}
4421
4422fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4423    let mut index = start;
4424
4425    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4426        index = next_index;
4427        if ch == '\\' {
4428            if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4429                index = escaped_next;
4430            }
4431            continue;
4432        }
4433
4434        if ch == '`' {
4435            return Some(index);
4436        }
4437    }
4438
4439    None
4440}
4441
4442fn flush_scanned_command_subst_keyword(
4443    current_word: &mut String,
4444    pending_case_headers: &mut usize,
4445    case_clause_depths: &mut SmallVec<[usize; 4]>,
4446    depth: usize,
4447    word_started_at_command_start: &mut bool,
4448) {
4449    if current_word.is_empty() {
4450        *word_started_at_command_start = false;
4451        return;
4452    }
4453
4454    match current_word.as_str() {
4455        "case" if *word_started_at_command_start => *pending_case_headers += 1,
4456        "in" if *pending_case_headers > 0 => {
4457            *pending_case_headers -= 1;
4458            case_clause_depths.push(depth);
4459        }
4460        "esac" if *word_started_at_command_start => {
4461            case_clause_depths.pop();
4462        }
4463        _ => {}
4464    }
4465
4466    current_word.clear();
4467    *word_started_at_command_start = false;
4468}
4469
4470fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4471    if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4472        return None;
4473    }
4474
4475    let mut index = 0usize;
4476    let mut depth = 1;
4477    let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4478    let mut pending_case_headers = 0usize;
4479    let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4480    let mut current_word = String::with_capacity(16);
4481    let mut at_command_start = true;
4482    let mut expecting_redirection_target = false;
4483    let mut current_word_started_at_command_start = false;
4484
4485    while let Some((ch, next_index)) = next_char_boundary(input, index) {
4486        match ch {
4487            '#' if hash_starts_comment(input, index) => {
4488                let had_word = !current_word.is_empty();
4489                flush_scanned_command_subst_keyword(
4490                    &mut current_word,
4491                    &mut pending_case_headers,
4492                    &mut case_clause_depths,
4493                    depth,
4494                    &mut current_word_started_at_command_start,
4495                );
4496                if had_word && expecting_redirection_target {
4497                    expecting_redirection_target = false;
4498                }
4499                index = next_index;
4500                while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4501                    index = comment_next;
4502                    if comment_ch == '\n' {
4503                        for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4504                            index = skip_command_subst_pending_heredoc(
4505                                input, index, &delimiter, strip_tabs,
4506                            );
4507                        }
4508                        at_command_start = true;
4509                        expecting_redirection_target = false;
4510                        break;
4511                    }
4512                }
4513            }
4514            '(' => {
4515                flush_scanned_command_subst_keyword(
4516                    &mut current_word,
4517                    &mut pending_case_headers,
4518                    &mut case_clause_depths,
4519                    depth,
4520                    &mut current_word_started_at_command_start,
4521                );
4522                depth += 1;
4523                index = next_index;
4524                at_command_start = true;
4525                expecting_redirection_target = false;
4526            }
4527            ')' => {
4528                flush_scanned_command_subst_keyword(
4529                    &mut current_word,
4530                    &mut pending_case_headers,
4531                    &mut case_clause_depths,
4532                    depth,
4533                    &mut current_word_started_at_command_start,
4534                );
4535                if case_clause_depths
4536                    .last()
4537                    .is_some_and(|case_depth| *case_depth == depth)
4538                {
4539                    index = next_index;
4540                    at_command_start = true;
4541                    expecting_redirection_target = false;
4542                    continue;
4543                }
4544                depth -= 1;
4545                index = next_index;
4546                if depth == 0 {
4547                    return Some(index);
4548                }
4549                at_command_start = false;
4550                expecting_redirection_target = false;
4551            }
4552            '"' => {
4553                let had_word = !current_word.is_empty();
4554                flush_scanned_command_subst_keyword(
4555                    &mut current_word,
4556                    &mut pending_case_headers,
4557                    &mut case_clause_depths,
4558                    depth,
4559                    &mut current_word_started_at_command_start,
4560                );
4561                if had_word && expecting_redirection_target {
4562                    expecting_redirection_target = false;
4563                }
4564                index = scan_double_quoted_command_substitution_segment(
4565                    input,
4566                    next_index,
4567                    subst_depth,
4568                )?;
4569                if expecting_redirection_target {
4570                    expecting_redirection_target = false;
4571                } else {
4572                    at_command_start = false;
4573                }
4574            }
4575            '\'' => {
4576                let had_word = !current_word.is_empty();
4577                flush_scanned_command_subst_keyword(
4578                    &mut current_word,
4579                    &mut pending_case_headers,
4580                    &mut case_clause_depths,
4581                    depth,
4582                    &mut current_word_started_at_command_start,
4583                );
4584                if had_word && expecting_redirection_target {
4585                    expecting_redirection_target = false;
4586                }
4587                index = next_index;
4588                while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4589                    index = quoted_next;
4590                    if quoted_ch == '\'' {
4591                        break;
4592                    }
4593                }
4594                if expecting_redirection_target {
4595                    expecting_redirection_target = false;
4596                } else {
4597                    at_command_start = false;
4598                }
4599            }
4600            '`' => {
4601                let had_word = !current_word.is_empty();
4602                flush_scanned_command_subst_keyword(
4603                    &mut current_word,
4604                    &mut pending_case_headers,
4605                    &mut case_clause_depths,
4606                    depth,
4607                    &mut current_word_started_at_command_start,
4608                );
4609                if had_word && expecting_redirection_target {
4610                    expecting_redirection_target = false;
4611                }
4612                index = scan_command_subst_backtick_segment(input, next_index)?;
4613                if expecting_redirection_target {
4614                    expecting_redirection_target = false;
4615                } else {
4616                    at_command_start = false;
4617                }
4618            }
4619            '$' if input[next_index..].starts_with('\'') => {
4620                let had_word = !current_word.is_empty();
4621                flush_scanned_command_subst_keyword(
4622                    &mut current_word,
4623                    &mut pending_case_headers,
4624                    &mut case_clause_depths,
4625                    depth,
4626                    &mut current_word_started_at_command_start,
4627                );
4628                if had_word && expecting_redirection_target {
4629                    expecting_redirection_target = false;
4630                }
4631                index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4632                if expecting_redirection_target {
4633                    expecting_redirection_target = false;
4634                } else {
4635                    at_command_start = false;
4636                }
4637            }
4638            '\\' => {
4639                let had_word = !current_word.is_empty();
4640                flush_scanned_command_subst_keyword(
4641                    &mut current_word,
4642                    &mut pending_case_headers,
4643                    &mut case_clause_depths,
4644                    depth,
4645                    &mut current_word_started_at_command_start,
4646                );
4647                if had_word && expecting_redirection_target {
4648                    expecting_redirection_target = false;
4649                }
4650                index = next_index;
4651                if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4652                    index = escaped_next;
4653                }
4654                if expecting_redirection_target {
4655                    expecting_redirection_target = false;
4656                } else {
4657                    at_command_start = false;
4658                }
4659            }
4660            '>' => {
4661                let word_was_redirection_fd = current_word_started_at_command_start
4662                    && !current_word.is_empty()
4663                    && current_word.chars().all(|current| current.is_ascii_digit());
4664                flush_scanned_command_subst_keyword(
4665                    &mut current_word,
4666                    &mut pending_case_headers,
4667                    &mut case_clause_depths,
4668                    depth,
4669                    &mut current_word_started_at_command_start,
4670                );
4671                if word_was_redirection_fd {
4672                    at_command_start = true;
4673                }
4674                index = next_index;
4675                expecting_redirection_target = true;
4676            }
4677            '<' if input[next_index..].starts_with('<') => {
4678                let word_was_redirection_fd = current_word_started_at_command_start
4679                    && !current_word.is_empty()
4680                    && current_word.chars().all(|current| current.is_ascii_digit());
4681                let had_word = !current_word.is_empty();
4682                flush_scanned_command_subst_keyword(
4683                    &mut current_word,
4684                    &mut pending_case_headers,
4685                    &mut case_clause_depths,
4686                    depth,
4687                    &mut current_word_started_at_command_start,
4688                );
4689                if had_word && expecting_redirection_target {
4690                    expecting_redirection_target = false;
4691                }
4692                if word_was_redirection_fd {
4693                    at_command_start = true;
4694                }
4695                if inside_unclosed_double_paren_on_line(input, index) {
4696                    index = next_index + '<'.len_utf8();
4697                    continue;
4698                }
4699
4700                if input[next_index + '<'.len_utf8()..].starts_with('<') {
4701                    index = next_index + '<'.len_utf8() + '<'.len_utf8();
4702                    expecting_redirection_target = true;
4703                    continue;
4704                }
4705
4706                let strip_tabs = input[next_index..].starts_with("<-");
4707                let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4708                if let Some((delimiter_index, delimiter)) =
4709                    scan_command_subst_heredoc_delimiter(input, delimiter_start)
4710                {
4711                    pending_heredocs.push((delimiter, strip_tabs));
4712                    index = delimiter_index;
4713                    expecting_redirection_target = false;
4714                } else {
4715                    index = next_index;
4716                    expecting_redirection_target = true;
4717                }
4718            }
4719            '\n' => {
4720                flush_scanned_command_subst_keyword(
4721                    &mut current_word,
4722                    &mut pending_case_headers,
4723                    &mut case_clause_depths,
4724                    depth,
4725                    &mut current_word_started_at_command_start,
4726                );
4727                index = next_index;
4728                for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4729                    index =
4730                        skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4731                }
4732                at_command_start = true;
4733                expecting_redirection_target = false;
4734            }
4735            '$' if input[next_index..].starts_with('{') => {
4736                let had_word = !current_word.is_empty();
4737                flush_scanned_command_subst_keyword(
4738                    &mut current_word,
4739                    &mut pending_case_headers,
4740                    &mut case_clause_depths,
4741                    depth,
4742                    &mut current_word_started_at_command_start,
4743                );
4744                if had_word && expecting_redirection_target {
4745                    expecting_redirection_target = false;
4746                }
4747                let consumed = scan_command_subst_parameter_expansion_len(
4748                    &input[next_index + '{'.len_utf8()..],
4749                    subst_depth,
4750                )?;
4751                index = next_index + '{'.len_utf8() + consumed;
4752                if expecting_redirection_target {
4753                    expecting_redirection_target = false;
4754                } else {
4755                    at_command_start = false;
4756                }
4757            }
4758            '$' if input[next_index..].starts_with('(')
4759                && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4760            {
4761                let had_word = !current_word.is_empty();
4762                flush_scanned_command_subst_keyword(
4763                    &mut current_word,
4764                    &mut pending_case_headers,
4765                    &mut case_clause_depths,
4766                    depth,
4767                    &mut current_word_started_at_command_start,
4768                );
4769                if had_word && expecting_redirection_target {
4770                    expecting_redirection_target = false;
4771                }
4772                let consumed = scan_command_substitution_body_len_inner(
4773                    &input[next_index + '('.len_utf8()..],
4774                    subst_depth + 1,
4775                )?;
4776                index = next_index + '('.len_utf8() + consumed;
4777                if expecting_redirection_target {
4778                    expecting_redirection_target = false;
4779                } else {
4780                    at_command_start = false;
4781                }
4782            }
4783            _ => {
4784                if ch.is_ascii_alphanumeric() || ch == '_' {
4785                    if current_word.is_empty() && !expecting_redirection_target && at_command_start
4786                    {
4787                        current_word_started_at_command_start = true;
4788                        at_command_start = false;
4789                    }
4790                    current_word.push(ch);
4791                } else {
4792                    let had_word = !current_word.is_empty();
4793                    flush_scanned_command_subst_keyword(
4794                        &mut current_word,
4795                        &mut pending_case_headers,
4796                        &mut case_clause_depths,
4797                        depth,
4798                        &mut current_word_started_at_command_start,
4799                    );
4800                    if had_word && expecting_redirection_target {
4801                        expecting_redirection_target = false;
4802                    }
4803                    match ch {
4804                        ' ' | '\t' => {}
4805                        ';' | '|' | '&' => {
4806                            at_command_start = true;
4807                            expecting_redirection_target = false;
4808                        }
4809                        _ => {
4810                            if !expecting_redirection_target {
4811                                at_command_start = false;
4812                            }
4813                        }
4814                    }
4815                }
4816                index = next_index;
4817            }
4818        }
4819    }
4820
4821    None
4822}
4823
4824pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4825    scan_command_substitution_body_len_inner(input, 0)
4826}
4827
4828#[cfg(test)]
4829mod tests {
4830    use super::*;
4831
4832    fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4833        match token.kind {
4834            kind if kind.is_word_like() => token.word_string(),
4835            TokenKind::Comment => token
4836                .span
4837                .slice(source)
4838                .strip_prefix('#')
4839                .map(str::to_string),
4840            TokenKind::Error => token
4841                .error_kind()
4842                .map(LexerErrorKind::message)
4843                .map(str::to_string),
4844            _ => None,
4845        }
4846    }
4847
4848    fn assert_next_token(
4849        lexer: &mut Lexer<'_>,
4850        expected_kind: TokenKind,
4851        expected_text: Option<&str>,
4852    ) {
4853        let token = lexer.next_lexed_token().unwrap();
4854        assert_eq!(token.kind, expected_kind);
4855        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4856    }
4857
4858    fn assert_next_token_with_comments(
4859        lexer: &mut Lexer<'_>,
4860        expected_kind: TokenKind,
4861        expected_text: Option<&str>,
4862    ) {
4863        let token = lexer.next_lexed_token_with_comments().unwrap();
4864        assert_eq!(token.kind, expected_kind);
4865        assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4866    }
4867
4868    fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4869        let mut lexer = Lexer::new(input);
4870
4871        while let Some(token) = lexer.next_lexed_token() {
4872            if token.kind == TokenKind::Newline {
4873                continue;
4874            }
4875
4876            assert_eq!(
4877                token.span.start.line, token.span.end.line,
4878                "token should stay on one line: {:?}",
4879                token
4880            );
4881        }
4882    }
4883
4884    #[test]
4885    fn test_simple_words() {
4886        let mut lexer = Lexer::new("echo hello world");
4887
4888        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4889        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4890        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4891        assert!(lexer.next_lexed_token().is_none());
4892    }
4893
4894    #[test]
4895    fn test_single_quoted_string() {
4896        let mut lexer = Lexer::new("echo 'hello world'");
4897
4898        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4899        // Single-quoted strings return LiteralWord (no variable expansion)
4900        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4901        assert!(lexer.next_lexed_token().is_none());
4902    }
4903
4904    #[test]
4905    fn test_double_quoted_string() {
4906        let mut lexer = Lexer::new("echo \"hello world\"");
4907
4908        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4909        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4910        assert!(lexer.next_lexed_token().is_none());
4911    }
4912
4913    #[test]
4914    fn test_brace_expansion_token_ignores_quoted_closers() {
4915        let mut lexer = Lexer::new("echo {\"}\",a}\n");
4916
4917        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4918        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
4919        assert_next_token(&mut lexer, TokenKind::Newline, None);
4920        assert!(lexer.next_lexed_token().is_none());
4921    }
4922
4923    #[test]
4924    fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
4925        let mut lexer = Lexer::new("echo {'a\\',b} next\n");
4926
4927        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4928        assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
4929        assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
4930        assert_next_token(&mut lexer, TokenKind::Newline, None);
4931        assert!(lexer.next_lexed_token().is_none());
4932    }
4933
4934    #[test]
4935    fn test_double_quoted_expansion_token_keeps_source_backing() {
4936        let source = r#""$bar""#;
4937        let mut lexer = Lexer::new(source);
4938
4939        let token = lexer.next_lexed_token().unwrap();
4940        assert_eq!(token.kind, TokenKind::QuotedWord);
4941        assert_eq!(token.word_text(), Some("$bar"));
4942
4943        let word = token.word().unwrap();
4944        let segment = word.single_segment().unwrap();
4945        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4946        assert_eq!(segment.span().unwrap().slice(source), "$bar");
4947    }
4948
4949    #[test]
4950    fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4951        let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4952        let mut lexer = Lexer::new(source);
4953
4954        let token = lexer.next_lexed_token().unwrap();
4955        assert_eq!(token.kind, TokenKind::QuotedWord);
4956        assert_eq!(
4957            token.word_text(),
4958            Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4959        );
4960    }
4961
4962    #[test]
4963    fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4964        let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4965        let mut lexer = Lexer::new(source);
4966
4967        let token = lexer.next_lexed_token().unwrap();
4968        assert_eq!(token.kind, TokenKind::QuotedWord);
4969        assert_eq!(
4970            token.word_text(),
4971            Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4972        );
4973    }
4974
4975    #[test]
4976    fn test_mixed_word_keeps_segment_kinds() {
4977        let source = r#"foo"bar"'baz'"#;
4978        let mut lexer = Lexer::new(source);
4979
4980        let token = lexer.next_lexed_token().unwrap();
4981        assert_eq!(token.kind, TokenKind::Word);
4982
4983        let word = token.word().unwrap();
4984        let segments: Vec<_> = word
4985            .segments()
4986            .map(|segment| (segment.kind(), segment.as_str().to_string()))
4987            .collect();
4988
4989        assert_eq!(
4990            segments,
4991            vec![
4992                (LexedWordSegmentKind::Plain, "foo".to_string()),
4993                (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4994                (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4995            ]
4996        );
4997        assert_eq!(word.joined_text(), "foobarbaz");
4998        assert_eq!(
4999            word.segments()
5000                .next()
5001                .and_then(LexedWordSegment::span)
5002                .unwrap()
5003                .slice(source),
5004            "foo"
5005        );
5006    }
5007
5008    #[test]
5009    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5010        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5011
5012        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5013        let body = &source[..consumed];
5014
5015        assert!(body.contains("field, direction"));
5016        assert!(body.ends_with(')'));
5017    }
5018
5019    #[test]
5020    fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5021        let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5022
5023        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5024        let body = &source[..consumed];
5025
5026        assert!(body.contains("printf '%s' y"));
5027        assert!(body.ends_with(')'));
5028    }
5029
5030    #[test]
5031    fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5032        let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5033
5034        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5035        let body = &source[..consumed];
5036
5037        assert!(body.contains("printf %s 1,2"));
5038        assert!(body.ends_with(')'));
5039    }
5040
5041    #[test]
5042    fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5043        let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5044
5045        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5046        let body = &source[..consumed];
5047
5048        assert!(body.contains("field, direction"));
5049        assert!(body.ends_with(')'));
5050    }
5051
5052    #[test]
5053    fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5054        let source = "printf %s ${x//foo/)},1)\"";
5055
5056        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5057        let body = &source[..consumed];
5058
5059        assert!(body.contains("${x//foo/)},1"));
5060        assert!(body.ends_with(')'));
5061    }
5062
5063    #[test]
5064    fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5065        let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5066
5067        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5068        let body = &source[..consumed];
5069
5070        assert!(body.contains("printf %s 1,2"));
5071        assert!(body.ends_with(')'));
5072    }
5073
5074    #[test]
5075    fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5076        let source = "[[ \"$buf\" == (#b)(*) ]]";
5077        let index = source.find('#').expect("expected hash");
5078
5079        assert!(!hash_starts_comment(source, index));
5080    }
5081
5082    #[test]
5083    fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5084        let source = "(#comment with )";
5085        let index = source.find('#').expect("expected hash");
5086
5087        assert!(hash_starts_comment(source, index));
5088    }
5089
5090    #[test]
5091    fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5092        let source = "(( #c < 256 ))";
5093        let index = source.find('#').expect("expected hash");
5094
5095        assert!(!hash_starts_comment(source, index));
5096    }
5097
5098    #[test]
5099    fn test_hash_starts_comment_respects_quoted_double_parens() {
5100        let source = "printf '((' # comment";
5101        let index = source.find('#').expect("expected hash");
5102
5103        assert!(hash_starts_comment(source, index));
5104    }
5105
5106    #[test]
5107    fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5108        let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5109
5110        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5111        let body = &source[..consumed];
5112
5113        assert!(body.contains("printf %s 1,2"));
5114        assert!(body.ends_with(')'));
5115    }
5116
5117    #[test]
5118    fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5119        let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5120
5121        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5122        let body = &source[..consumed];
5123
5124        assert!(body.contains("printf %s 1,2"));
5125        assert!(body.ends_with(')'));
5126    }
5127
5128    #[test]
5129    fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5130        let source = "((x<<2))\nprintf %s 1,2\n)\"";
5131
5132        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5133        let body = &source[..consumed];
5134
5135        assert!(body.contains("printf %s 1,2"));
5136        assert!(body.ends_with(')'));
5137    }
5138
5139    #[test]
5140    fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5141        let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5142
5143        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5144        let body = &source[..consumed];
5145
5146        assert!(body.contains("printf %s 1,2"));
5147        assert!(body.ends_with("))"));
5148    }
5149
5150    #[test]
5151    fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5152        let source = "printf %s 1,2; echo case in)\"";
5153
5154        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5155        let body = &source[..consumed];
5156
5157        assert!(body.contains("echo case in"));
5158        assert!(body.ends_with(')'));
5159    }
5160
5161    #[test]
5162    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5163        let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5164
5165        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5166        let body = &source[..consumed];
5167
5168        assert!(body.contains("$'a\\'b'"));
5169        assert!(body.contains("printf %s 1,2"));
5170        assert!(body.ends_with(')'));
5171    }
5172
5173    #[test]
5174    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5175        let source = "printf %s `echo foo)`; printf %s ok)\"";
5176
5177        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5178        let body = &source[..consumed];
5179
5180        assert!(body.contains("`echo foo)`"));
5181        assert!(body.contains("printf %s ok"));
5182        assert!(body.ends_with(')'));
5183    }
5184
5185    #[test]
5186    fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5187        let source = "printf %s ${x/`echo }`/foo)},1)\"";
5188
5189        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5190        let body = &source[..consumed];
5191
5192        assert!(body.contains("${x/`echo }`/foo)},1"));
5193        assert!(body.ends_with(')'));
5194    }
5195
5196    #[test]
5197    fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5198     {
5199        let source = "printf %s ${x/<(echo })/foo)},1)\"";
5200
5201        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5202        let body = &source[..consumed];
5203
5204        assert!(body.contains("${x/<(echo })/foo)},1"));
5205        assert!(body.ends_with(')'));
5206    }
5207
5208    #[test]
5209    fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5210        let source = "printf %s 1,2; echo case in)";
5211
5212        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5213        let body = &source[..consumed];
5214
5215        assert_eq!(body, source);
5216    }
5217
5218    #[test]
5219    fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5220        let source = "printf %s $'a\\'b'; printf %s 1,2)";
5221
5222        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5223        let body = &source[..consumed];
5224
5225        assert_eq!(body, source);
5226    }
5227
5228    #[test]
5229    fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5230        let source = "printf %s `echo foo)`; printf %s ok)";
5231
5232        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5233        let body = &source[..consumed];
5234
5235        assert_eq!(body, source);
5236    }
5237
5238    #[test]
5239    fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5240        let source = "echo \"$line\" | cut -d' ' -f2-)";
5241
5242        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5243        let body = &source[..consumed];
5244
5245        assert_eq!(body, source);
5246    }
5247
5248    #[test]
5249    fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5250        let source = "echo \"${@}\" | tr -d '[:space:]')";
5251
5252        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5253        let body = &source[..consumed];
5254
5255        assert_eq!(body, source);
5256    }
5257
5258    #[test]
5259    fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5260        let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5261
5262        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5263        let body = &source[..consumed];
5264
5265        assert_eq!(body, source);
5266    }
5267
5268    #[test]
5269    fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5270        let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5271
5272        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5273        let body = &source[..consumed];
5274
5275        assert_eq!(body, source);
5276    }
5277
5278    #[test]
5279    fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5280        let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5281        let mut lexer = Lexer::new(source);
5282
5283        let first = lexer.next_lexed_token().expect("expected first token");
5284        assert!(first.kind.is_word_like(), "{:?}", first.kind);
5285        assert_eq!(first.word_string().as_deref(), Some("echo"));
5286
5287        let second = lexer.next_lexed_token().expect("expected second token");
5288        assert!(second.kind.is_word_like(), "{:?}", second.kind);
5289        assert_eq!(
5290            second.word_string().as_deref(),
5291            Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5292        );
5293    }
5294
5295    #[test]
5296    fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5297        let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5298        let start = source.find("$(").expect("expected command substitution") + 2;
5299        let consumed =
5300            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5301        assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5302    }
5303
5304    #[test]
5305    fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5306        let source = "echo $(echo $(basename $filename .fuzz))";
5307        let start = source.find("$(").expect("expected command substitution") + 2;
5308        let consumed =
5309            scan_command_substitution_body_len(&source[start..]).expect("expected match");
5310        assert_eq!(
5311            &source[start..start + consumed],
5312            "echo $(basename $filename .fuzz))"
5313        );
5314    }
5315
5316    #[test]
5317    fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5318        let source = "\n       [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n    )";
5319        let consumed = scan_command_substitution_body_len(source).expect("expected match");
5320        assert_eq!(consumed, source.len());
5321    }
5322
5323    #[test]
5324    fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5325        let source = "'foo'bar";
5326        let mut lexer = Lexer::new(source);
5327
5328        let token = lexer.next_lexed_token().unwrap();
5329        assert_eq!(token.kind, TokenKind::LiteralWord);
5330
5331        let word = token.word().unwrap();
5332        let segments: Vec<_> = word
5333            .segments()
5334            .map(|segment| (segment.kind(), segment.as_str().to_string()))
5335            .collect();
5336
5337        assert_eq!(
5338            segments,
5339            vec![
5340                (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5341                (LexedWordSegmentKind::Plain, "bar".to_string()),
5342            ]
5343        );
5344        assert_eq!(word.joined_text(), "foobar");
5345        assert_eq!(
5346            word.segments()
5347                .nth(1)
5348                .and_then(LexedWordSegment::span)
5349                .unwrap()
5350                .slice(source),
5351            "bar"
5352        );
5353    }
5354
5355    #[test]
5356    fn test_unquoted_command_substitution_word_keeps_source_backing() {
5357        let source = "$(printf hi)";
5358        let mut lexer = Lexer::new(source);
5359
5360        let token = lexer.next_lexed_token().unwrap();
5361        assert_eq!(token.kind, TokenKind::Word);
5362
5363        let word = token.word().unwrap();
5364        let segment = word.single_segment().unwrap();
5365        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5366        assert_eq!(segment.as_str(), source);
5367        assert_eq!(segment.span().unwrap().slice(source), source);
5368    }
5369
5370    #[test]
5371    fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5372        let source = "${arr[$RANDOM % ${#arr[@]}]}";
5373        let mut lexer = Lexer::new(source);
5374
5375        let token = lexer.next_lexed_token().unwrap();
5376        assert_eq!(token.kind, TokenKind::Word);
5377
5378        let word = token.word().unwrap();
5379        let segment = word.single_segment().unwrap();
5380        assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5381        assert_eq!(segment.as_str(), source);
5382        assert_eq!(segment.span().unwrap().slice(source), source);
5383    }
5384
5385    #[test]
5386    fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5387        let source = "\"foo\"$(printf hi)";
5388        let mut lexer = Lexer::new(source);
5389
5390        let token = lexer.next_lexed_token().unwrap();
5391        assert_eq!(token.kind, TokenKind::Word);
5392
5393        let word = token.word().unwrap();
5394        let continuation = word.segments().nth(1).unwrap();
5395        assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5396        assert_eq!(continuation.as_str(), "$(printf hi)");
5397        assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5398    }
5399
5400    #[test]
5401    fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5402        let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5403        let mut lexer = Lexer::new(source);
5404
5405        let token = lexer.next_lexed_token().unwrap();
5406        assert_eq!(token.kind, TokenKind::QuotedWord);
5407
5408        let word = token.word().unwrap();
5409        let segment = word.single_segment().unwrap();
5410        assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5411        assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5412        assert_eq!(
5413            segment.span().unwrap().slice(source),
5414            "${arr[$RANDOM % ${#arr[@]}]}"
5415        );
5416    }
5417
5418    #[test]
5419    fn test_ansi_c_control_escape_can_consume_quote() {
5420        let mut lexer = Lexer::new("echo $'\\c''");
5421
5422        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5423        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5424        assert!(lexer.next_lexed_token().is_none());
5425    }
5426
5427    #[test]
5428    fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5429        let source = r#"out_line="${out_line//'"'/'\"'}"
5430"#;
5431        let mut lexer = Lexer::new(source);
5432
5433        assert_next_token(
5434            &mut lexer,
5435            TokenKind::Word,
5436            Some(r#"out_line=${out_line//'"'/'"'}"#),
5437        );
5438        assert_next_token(&mut lexer, TokenKind::Newline, None);
5439        assert!(lexer.next_lexed_token().is_none());
5440    }
5441
5442    #[test]
5443    fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5444        let source = r#"out_line="${out_line//'"'/'\"'}"
5445echo "Error: Missing python3!"
5446cat << 'EOF' > "${pywrapper}"
5447import os
5448EOF
5449"#;
5450        let mut lexer = Lexer::new(source);
5451
5452        assert_next_token(
5453            &mut lexer,
5454            TokenKind::Word,
5455            Some(r#"out_line=${out_line//'"'/'"'}"#),
5456        );
5457        assert_next_token(&mut lexer, TokenKind::Newline, None);
5458        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5459        assert_next_token(
5460            &mut lexer,
5461            TokenKind::QuotedWord,
5462            Some("Error: Missing python3!"),
5463        );
5464        assert_next_token(&mut lexer, TokenKind::Newline, None);
5465        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5466        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5467        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5468        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5469        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5470    }
5471
5472    #[test]
5473    fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5474        let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5475        let mut lexer = Lexer::new(source);
5476
5477        let token = lexer.next_lexed_token().unwrap();
5478        assert_eq!(token.kind, TokenKind::Word);
5479        assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5480        assert!(token.source_slice(source).is_none());
5481        assert_eq!(
5482            token.word_string().as_deref(),
5483            Some("crypt=${crypt//\\/\\\\}")
5484        );
5485        assert_next_token(&mut lexer, TokenKind::Newline, None);
5486        assert!(lexer.next_lexed_token().is_none());
5487    }
5488
5489    #[test]
5490    fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5491        let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n  if true; then\n    txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n  fi\n}\n";
5492        let mut lexer = Lexer::new(source);
5493
5494        assert_next_token(
5495            &mut lexer,
5496            TokenKind::Word,
5497            Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5498        );
5499        assert_next_token(&mut lexer, TokenKind::Newline, None);
5500        assert_next_token(&mut lexer, TokenKind::Newline, None);
5501        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5502        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5503        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5504        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5505        assert_next_token(&mut lexer, TokenKind::Newline, None);
5506        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5507        assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5508        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5509        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5510        assert_next_token(&mut lexer, TokenKind::Newline, None);
5511        assert_next_token(
5512            &mut lexer,
5513            TokenKind::Word,
5514            Some(
5515                "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5516            ),
5517        );
5518        assert_next_token(&mut lexer, TokenKind::Newline, None);
5519        assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5520        assert_next_token(&mut lexer, TokenKind::Newline, None);
5521        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5522        assert_next_token(&mut lexer, TokenKind::Newline, None);
5523        assert!(lexer.next_lexed_token().is_none());
5524    }
5525
5526    #[test]
5527    fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5528        let source = "case \"$word\" in\n  {) : ;;\n  :) : ;;\nesac\n";
5529        let mut lexer = Lexer::new(source);
5530
5531        assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5532        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5533        assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5534        assert_next_token(&mut lexer, TokenKind::Newline, None);
5535        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5536        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5537        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5538        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5539        assert_next_token(&mut lexer, TokenKind::Newline, None);
5540        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5541        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5542        assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5543        assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5544        assert_next_token(&mut lexer, TokenKind::Newline, None);
5545        assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5546        assert_next_token(&mut lexer, TokenKind::Newline, None);
5547        assert!(lexer.next_lexed_token().is_none());
5548    }
5549
5550    #[test]
5551    fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5552        let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5553        let mut lexer = Lexer::new(source);
5554
5555        assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5556        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5557        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5558        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5559        assert_next_token(&mut lexer, TokenKind::And, None);
5560        assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5561        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5562        assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5563        assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5564        assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5565        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5566        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5567        assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5568        assert_next_token(&mut lexer, TokenKind::Newline, None);
5569        assert!(lexer.next_lexed_token().is_none());
5570    }
5571
5572    #[test]
5573    fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5574        let source = "echo -{$(echo a),b}-\n";
5575        let mut lexer = Lexer::new(source);
5576
5577        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5578        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5579        assert_next_token(&mut lexer, TokenKind::Newline, None);
5580        assert!(lexer.next_lexed_token().is_none());
5581    }
5582
5583    #[test]
5584    fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5585        let source = "echo -{$((1 + 2)),b}-\n";
5586        let mut lexer = Lexer::new(source);
5587
5588        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5589        assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5590        assert_next_token(&mut lexer, TokenKind::Newline, None);
5591        assert!(lexer.next_lexed_token().is_none());
5592    }
5593
5594    #[test]
5595    fn test_operators() {
5596        let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5597
5598        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5599        assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5600        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5601        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5602        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5603        assert_next_token(&mut lexer, TokenKind::And, None);
5604        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5605        assert_next_token(&mut lexer, TokenKind::Or, None);
5606        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5607        assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5608        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5609        assert_next_token(&mut lexer, TokenKind::Background, None);
5610        assert!(lexer.next_lexed_token().is_none());
5611    }
5612
5613    #[test]
5614    fn test_double_left_bracket_requires_separator() {
5615        let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5616
5617        assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5618        assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5619        assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5620        assert_next_token(&mut lexer, TokenKind::Newline, None);
5621        assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5622        assert_next_token(&mut lexer, TokenKind::Newline, None);
5623        assert!(lexer.next_lexed_token().is_none());
5624    }
5625
5626    #[test]
5627    fn test_redirects() {
5628        let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5629
5630        assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5631        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5632        assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5633        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5634        assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5635        assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5636        assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5637        assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5638        assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5639        let token = lexer.next_lexed_token().unwrap();
5640        assert_eq!(token.kind, TokenKind::Clobber);
5641        assert_eq!(token.fd_value(), Some(2));
5642        assert_eq!(token_text(&token, lexer.input), None);
5643        assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5644        assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5645        assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5646        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5647        assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5648        assert_next_token(&mut lexer, TokenKind::HereString, None);
5649        assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5650        assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5651        assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5652        assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5653        assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5654    }
5655
5656    #[test]
5657    fn test_comment() {
5658        let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5659
5660        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5661        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5662        assert_next_token(&mut lexer, TokenKind::Newline, None);
5663        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5664        assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5665    }
5666
5667    #[test]
5668    fn test_comment_token_with_span() {
5669        let mut lexer = Lexer::new("# lead\necho hi # tail");
5670
5671        let comment = lexer.next_lexed_token_with_comments().unwrap();
5672        assert_eq!(comment.kind, TokenKind::Comment);
5673        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5674        assert_eq!(comment.span.start.line, 1);
5675        assert_eq!(comment.span.start.column, 1);
5676        assert_eq!(comment.span.end.line, 1);
5677        assert_eq!(comment.span.end.column, 7);
5678
5679        assert_next_token(&mut lexer, TokenKind::Newline, None);
5680        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5681        assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5682
5683        let inline = lexer.next_lexed_token_with_comments().unwrap();
5684        assert_eq!(inline.kind, TokenKind::Comment);
5685        assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5686        assert_eq!(inline.span.start.line, 2);
5687        assert_eq!(inline.span.start.column, 9);
5688    }
5689
5690    #[test]
5691    fn test_comment_token_preserves_hash_boundaries() {
5692        let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5693
5694        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5695        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5696        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5697        assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5698        assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5699        assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5700        assert!(lexer.next_lexed_token_with_comments().is_none());
5701    }
5702
5703    #[test]
5704    fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5705        let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5706
5707        let mut saw_comment = false;
5708        while let Some(token) = lexer.next_lexed_token_with_comments() {
5709            if token.kind == TokenKind::Comment {
5710                saw_comment = true;
5711                break;
5712            }
5713        }
5714
5715        assert!(
5716            !saw_comment,
5717            "zsh inline glob controls inside [[ ]] should not lex as comments"
5718        );
5719    }
5720
5721    #[test]
5722    fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5723        let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5724
5725        let mut saw_comment = false;
5726        while let Some(token) = lexer.next_lexed_token_with_comments() {
5727            if token.kind == TokenKind::Comment {
5728                saw_comment = true;
5729                break;
5730            }
5731        }
5732
5733        assert!(
5734            !saw_comment,
5735            "zsh arithmetic char literals inside (( )) should not lex as comments"
5736        );
5737    }
5738
5739    #[test]
5740    fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5741        let mut lexer = Lexer::new(
5742            "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5743        );
5744
5745        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5746        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5747        assert_next_token(
5748            &mut lexer,
5749            TokenKind::LiteralWord,
5750            Some("\\e]133;C;cmdline_url=%s\\a"),
5751        );
5752        assert_next_token(
5753            &mut lexer,
5754            TokenKind::QuotedWord,
5755            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5756        );
5757        assert_next_token(&mut lexer, TokenKind::Newline, None);
5758    }
5759
5760    #[test]
5761    fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5762        let mut lexer = Lexer::new(
5763            "() {\n  builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5764        );
5765
5766        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5767        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5768        assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5769        assert_next_token(&mut lexer, TokenKind::Newline, None);
5770        assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5771        assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5772        assert_next_token(
5773            &mut lexer,
5774            TokenKind::LiteralWord,
5775            Some("\\e]133;C;cmdline_url=%s\\a"),
5776        );
5777        assert_next_token(
5778            &mut lexer,
5779            TokenKind::QuotedWord,
5780            Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5781        );
5782        assert_next_token(&mut lexer, TokenKind::Newline, None);
5783        assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5784        assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5785        assert_next_token(&mut lexer, TokenKind::Newline, None);
5786    }
5787
5788    #[test]
5789    fn test_variable_words() {
5790        let mut lexer = Lexer::new("echo $HOME $USER");
5791
5792        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5793        assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5794        assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5795        assert!(lexer.next_lexed_token().is_none());
5796    }
5797
5798    #[test]
5799    fn test_pipeline_tokens() {
5800        let mut lexer = Lexer::new("echo hello | cat");
5801
5802        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5803        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5804        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5805        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5806        assert!(lexer.next_lexed_token().is_none());
5807    }
5808
5809    #[test]
5810    fn test_read_heredoc() {
5811        // Simulate state after reading "cat <<EOF" - positioned at newline before content
5812        let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5813        let content = lexer.read_heredoc("EOF", false);
5814        assert_eq!(content.content, "hello\nworld\n");
5815    }
5816
5817    #[test]
5818    fn test_read_heredoc_single_line() {
5819        let mut lexer = Lexer::new("\ntest\nEOF");
5820        let content = lexer.read_heredoc("EOF", false);
5821        assert_eq!(content.content, "test\n");
5822    }
5823
5824    #[test]
5825    fn test_read_heredoc_full_scenario() {
5826        // Full scenario: "cat <<EOF\nhello\nworld\nEOF"
5827        let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5828
5829        // Parser would read these tokens
5830        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5831        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5832        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5833
5834        // Now read heredoc content
5835        let content = lexer.read_heredoc("EOF", false);
5836        assert_eq!(content.content, "hello\nworld\n");
5837    }
5838
5839    #[test]
5840    fn test_read_heredoc_with_redirect() {
5841        // Rest-of-line (> file.txt) is re-injected into the lexer buffer
5842        let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5843        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5844        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5845        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5846        let content = lexer.read_heredoc("EOF", false);
5847        assert_eq!(content.content, "hello\n");
5848        // The redirect tokens are now available from the lexer
5849        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5850        assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5851    }
5852
5853    #[test]
5854    fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5855        let source = "cat <<EOF | grep hello \\\n  | sort \\\n  > out.txt\nhello\nEOF\n";
5856        let mut lexer = Lexer::new(source);
5857
5858        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5859        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5860        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5861
5862        let heredoc = lexer.read_heredoc("EOF", false);
5863        assert_eq!(heredoc.content, "hello\n");
5864
5865        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5866        assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5867        assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5868        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5869        assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5870        assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5871        assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5872    }
5873
5874    #[test]
5875    fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5876        let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5877        let mut lexer = Lexer::new(source);
5878
5879        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5880        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5881        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5882
5883        let heredoc = lexer.read_heredoc("EOF", false);
5884        assert_eq!(heredoc.content, "1\n2\n3\n");
5885    }
5886
5887    #[test]
5888    fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5889        let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5890        let mut lexer = Lexer::new(source);
5891
5892        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5893        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5894        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5895
5896        let heredoc = lexer.read_heredoc("EOF", false);
5897        assert_eq!(heredoc.content, "body\n");
5898    }
5899
5900    #[test]
5901    fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5902        let source = "cat <<EOF # note \\\nbody\nEOF\n";
5903        let mut lexer = Lexer::new(source);
5904
5905        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5906        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5907        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5908
5909        let heredoc = lexer.read_heredoc("EOF", false);
5910        assert_eq!(heredoc.content, "body\n");
5911    }
5912
5913    #[test]
5914    fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5915        let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5916        let mut lexer = Lexer::new(source);
5917
5918        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5919        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5920        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5921        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5922
5923        let heredoc = lexer.read_heredoc("EOF", false);
5924        assert_eq!(heredoc.content, "body\n");
5925
5926        assert_next_token(&mut lexer, TokenKind::RightParen, None);
5927    }
5928
5929    #[test]
5930    fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5931        let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5932        let mut lexer = Lexer::new(source);
5933
5934        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5935        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5936        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5937
5938        let heredoc = lexer.read_heredoc("EOF", false);
5939        assert_eq!(heredoc.content, "1\n");
5940
5941        assert_next_token(&mut lexer, TokenKind::Pipe, None);
5942        assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5943    }
5944
5945    #[test]
5946    fn test_read_heredoc_with_redirect_preserves_following_spans() {
5947        let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5948        let mut lexer = Lexer::new(source);
5949
5950        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5951        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5952        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5953
5954        let heredoc = lexer.read_heredoc("EOF", false);
5955        assert_eq!(heredoc.content, "hello\n");
5956
5957        let redirect = lexer.next_lexed_token_with_comments().unwrap();
5958        assert_eq!(redirect.kind, TokenKind::RedirectOut);
5959        assert_eq!(redirect.span.slice(source), ">");
5960
5961        let target = lexer.next_lexed_token_with_comments().unwrap();
5962        assert_eq!(target.kind, TokenKind::Word);
5963        assert_eq!(
5964            token_text(&target, lexer.input).as_deref(),
5965            Some("file.txt")
5966        );
5967        assert_eq!(target.span.slice(source), "file.txt");
5968
5969        let newline = lexer.next_lexed_token_with_comments().unwrap();
5970        assert_eq!(newline.kind, TokenKind::Newline);
5971        assert_eq!(newline.span.slice(source), "\n");
5972
5973        let comment = lexer.next_lexed_token_with_comments().unwrap();
5974        assert_eq!(comment.kind, TokenKind::Comment);
5975        assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5976        assert_eq!(comment.span.slice(source), "# done");
5977    }
5978
5979    #[test]
5980    fn test_comment_with_unicode() {
5981        // Comment containing multi-byte UTF-8 characters
5982        let source = "# café résumé\necho ok";
5983        let mut lexer = Lexer::new(source);
5984
5985        let comment = lexer.next_lexed_token_with_comments().unwrap();
5986        assert_eq!(comment.kind, TokenKind::Comment);
5987        assert_eq!(
5988            token_text(&comment, lexer.input).as_deref(),
5989            Some(" café résumé")
5990        );
5991        // Span should cover exactly the comment bytes (including #)
5992        let start = comment.span.start.offset;
5993        let end = comment.span.end.offset;
5994        assert_eq!(start, 0);
5995        assert_eq!(&source[start..end], "# café résumé");
5996        assert!(source.is_char_boundary(start));
5997        assert!(source.is_char_boundary(end));
5998
5999        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6000        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6001    }
6002
6003    #[test]
6004    fn test_comment_with_cjk_characters() {
6005        // CJK characters are 3-byte UTF-8; offsets must land on char boundaries
6006        let source = "# 你好世界\necho ok";
6007        let mut lexer = Lexer::new(source);
6008
6009        let comment = lexer.next_lexed_token_with_comments().unwrap();
6010        assert_eq!(comment.kind, TokenKind::Comment);
6011        assert_eq!(
6012            token_text(&comment, lexer.input).as_deref(),
6013            Some(" 你好世界")
6014        );
6015        let start = comment.span.start.offset;
6016        let end = comment.span.end.offset;
6017        assert_eq!(&source[start..end], "# 你好世界");
6018        assert!(source.is_char_boundary(start));
6019        assert!(source.is_char_boundary(end));
6020    }
6021
6022    #[test]
6023    fn test_heredoc_with_comments_inside() {
6024        // Comments inside heredoc body should NOT appear as comment tokens
6025        let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6026        let mut lexer = Lexer::new(source);
6027
6028        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6029        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6030        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6031
6032        let heredoc = lexer.read_heredoc("EOF", false);
6033        assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6034
6035        // After heredoc, replayed line termination should appear before
6036        // tokens from following source lines.
6037        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6038        let comment = lexer.next_lexed_token_with_comments().unwrap();
6039        assert_eq!(comment.kind, TokenKind::Comment);
6040        assert_eq!(
6041            token_text(&comment, lexer.input).as_deref(),
6042            Some(" real comment")
6043        );
6044    }
6045
6046    #[test]
6047    fn test_heredoc_with_hash_in_variable() {
6048        // ${var#pattern} inside heredoc should not produce comment tokens
6049        let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6050        let mut lexer = Lexer::new(source);
6051
6052        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6053        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6054        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6055
6056        let heredoc = lexer.read_heredoc("EOF", false);
6057        assert_eq!(heredoc.content, "val=${x#prefix}\n");
6058    }
6059
6060    #[test]
6061    fn test_heredoc_span_does_not_leak() {
6062        // Heredoc content span must be within source bounds and must not
6063        // overlap with content before or after.
6064        let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6065        let mut lexer = Lexer::new(source);
6066
6067        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6068        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6069        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6070
6071        let heredoc = lexer.read_heredoc("EOF", false);
6072        let start = heredoc.content_span.start.offset;
6073        let end = heredoc.content_span.end.offset;
6074        assert!(
6075            end <= source.len(),
6076            "heredoc span end ({end}) exceeds source length ({})",
6077            source.len()
6078        );
6079        assert_eq!(&source[start..end], "hello\nworld\n");
6080
6081        // Tokens after heredoc should still parse correctly
6082        assert_next_token(&mut lexer, TokenKind::Newline, None);
6083        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6084        assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6085    }
6086
6087    #[test]
6088    fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6089        let source = "\
6090cat <<\\_ACEOF
6091Use these variables to override the choices made by `configure' or to help
6092it to find libraries and programs with nonstandard names/locations.
6093_ACEOF
6094ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6095ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6096";
6097        let mut lexer = Lexer::new(source);
6098
6099        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6100        assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6101        let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6102        assert_eq!(delimiter.kind, TokenKind::Word);
6103        assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6104
6105        let heredoc = lexer.read_heredoc("_ACEOF", false);
6106        assert_eq!(
6107            heredoc.content,
6108            "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6109        );
6110
6111        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6112
6113        let first = lexer.next_lexed_token_with_comments().unwrap();
6114        assert_eq!(first.kind, TokenKind::Word);
6115        assert_eq!(
6116            first.span.slice(source),
6117            "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6118        );
6119        let first_segments = first
6120            .word()
6121            .unwrap()
6122            .segments()
6123            .map(|segment| {
6124                (
6125                    segment.kind(),
6126                    segment.as_str().to_string(),
6127                    segment.span().map(|span| span.slice(source).to_string()),
6128                )
6129            })
6130            .collect::<Vec<_>>();
6131        assert_eq!(
6132            first_segments,
6133            vec![
6134                (
6135                    LexedWordSegmentKind::Plain,
6136                    "ac_dir_suffix=/".to_string(),
6137                    Some("ac_dir_suffix=/".to_string()),
6138                ),
6139                (
6140                    LexedWordSegmentKind::Plain,
6141                    "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6142                    Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6143                ),
6144            ]
6145        );
6146
6147        assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6148
6149        let second = lexer.next_lexed_token_with_comments().unwrap();
6150        assert_eq!(second.kind, TokenKind::Word);
6151        assert_eq!(
6152            second.span.slice(source),
6153            "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6154        );
6155        let second_segments = second
6156            .word()
6157            .unwrap()
6158            .segments()
6159            .map(|segment| {
6160                (
6161                    segment.kind(),
6162                    segment.as_str().to_string(),
6163                    segment.span().map(|span| span.slice(source).to_string()),
6164                )
6165            })
6166            .collect::<Vec<_>>();
6167        assert_eq!(
6168            second_segments,
6169            vec![
6170                (
6171                    LexedWordSegmentKind::Plain,
6172                    "ac_top_builddir_sub=".to_string(),
6173                    Some("ac_top_builddir_sub=".to_string()),
6174                ),
6175                (
6176                    LexedWordSegmentKind::Plain,
6177                    "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6178                    Some(
6179                        "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6180                            .to_string(),
6181                    ),
6182                ),
6183            ]
6184        );
6185    }
6186
6187    #[test]
6188    fn test_heredoc_with_unicode_content() {
6189        // Heredoc containing multi-byte characters; spans must be on char boundaries
6190        let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6191        let mut lexer = Lexer::new(source);
6192
6193        assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6194        assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6195        assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6196
6197        let heredoc = lexer.read_heredoc("EOF", false);
6198        assert_eq!(heredoc.content, "# 你好\ncafé\n");
6199        let start = heredoc.content_span.start.offset;
6200        let end = heredoc.content_span.end.offset;
6201        assert!(
6202            source.is_char_boundary(start),
6203            "heredoc span start ({start}) not on char boundary"
6204        );
6205        assert!(
6206            source.is_char_boundary(end),
6207            "heredoc span end ({end}) not on char boundary"
6208        );
6209        assert_eq!(&source[start..end], "# 你好\ncafé\n");
6210    }
6211
6212    #[test]
6213    fn test_assoc_compound_assignment() {
6214        // declare -A m=([foo]="bar" [baz]="qux") should keep the compound
6215        // assignment as a single Word token
6216        let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6217        assert_next_token(
6218            &mut lexer,
6219            TokenKind::Word,
6220            Some(r#"m=([foo]="bar" [baz]="qux")"#),
6221        );
6222        assert!(lexer.next_lexed_token().is_none());
6223    }
6224
6225    #[test]
6226    fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6227        let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6228        let mut lexer = Lexer::new(source);
6229
6230        let token = lexer.next_lexed_token().unwrap();
6231        assert_eq!(token.kind, TokenKind::Word);
6232        assert_eq!(token.span.slice(source), source);
6233        assert!(lexer.next_lexed_token().is_none());
6234    }
6235
6236    #[test]
6237    fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6238        let source = r#"foo\_bar@(baz|qux)"#;
6239        let mut lexer = Lexer::new(source);
6240
6241        let token = lexer.next_lexed_token().unwrap();
6242        assert_eq!(token.kind, TokenKind::Word);
6243        assert_eq!(token.span.slice(source), source);
6244        assert!(lexer.next_lexed_token().is_none());
6245    }
6246
6247    #[test]
6248    fn test_indexed_array_not_collapsed() {
6249        // arr=("hello world") should NOT be collapsed — parser handles
6250        // quoted elements token-by-token via the LeftParen path
6251        let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6252        assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6253        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6254    }
6255
6256    #[test]
6257    fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6258        let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6259        let mut lexer = Lexer::new(source);
6260
6261        assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6262        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6263
6264        let token = lexer.next_lexed_token().unwrap();
6265        assert_eq!(token.kind, TokenKind::Word);
6266        assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6267
6268        let word = token.word().unwrap();
6269        let segments: Vec<_> = word
6270            .segments()
6271            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6272            .collect();
6273        assert_eq!(
6274            segments,
6275            vec![
6276                (
6277                    LexedWordSegmentKind::DoubleQuoted,
6278                    "$plugin_dir".to_string()
6279                ),
6280                (LexedWordSegmentKind::Plain, "/*".to_string()),
6281                (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6282            ]
6283        );
6284
6285        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6286        assert!(lexer.next_lexed_token().is_none());
6287    }
6288
6289    #[test]
6290    fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6291        let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6292        let mut lexer = Lexer::new(source);
6293
6294        assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6295        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6296
6297        let token = lexer.next_lexed_token().unwrap();
6298        assert_eq!(token.kind, TokenKind::Word);
6299        assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6300
6301        let word = token.word().unwrap();
6302        let segments: Vec<_> = word
6303            .segments()
6304            .map(|segment| (segment.kind(), segment.as_str().to_string()))
6305            .collect();
6306        assert_eq!(
6307            segments,
6308            vec![
6309                (
6310                    LexedWordSegmentKind::DoubleQuoted,
6311                    "$__GREP_CACHE_FILE".to_string()
6312                ),
6313                (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6314            ]
6315        );
6316
6317        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6318        assert!(lexer.next_lexed_token().is_none());
6319    }
6320
6321    #[test]
6322    fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6323        let source = r#"$dir/${~pats}(N)"#;
6324        let mut lexer = Lexer::new(source);
6325
6326        let token = lexer.next_lexed_token().unwrap();
6327        assert_eq!(token.kind, TokenKind::Word);
6328        assert_eq!(token.span.slice(source), source);
6329        assert!(lexer.next_lexed_token().is_none());
6330    }
6331
6332    #[test]
6333    fn test_dollar_word_does_not_absorb_function_parens() {
6334        let mut lexer = Lexer::new(r#"foo$x()"#);
6335
6336        assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6337        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6338        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6339        assert!(lexer.next_lexed_token().is_none());
6340    }
6341
6342    #[test]
6343    fn test_command_substitution_word_does_not_absorb_function_parens() {
6344        let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6345
6346        assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6347        assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6348        assert_next_token(&mut lexer, TokenKind::RightParen, None);
6349        assert!(lexer.next_lexed_token().is_none());
6350    }
6351
6352    /// Regression test for fuzz crash: single digit at EOF should not panic
6353    /// (crash-13c5f6f887a11b2296d67f9857975d63b205ac4b)
6354    #[test]
6355    fn test_digit_at_eof_no_panic() {
6356        // A lone digit with no following redirect operator must not panic
6357        let mut lexer = Lexer::new("2");
6358        let token = lexer.next_lexed_token();
6359        assert!(token.is_some());
6360    }
6361
6362    /// Issue #599: Nested ${...} inside unquoted ${...} must be a single token.
6363    #[test]
6364    fn test_nested_brace_expansion_single_token() {
6365        // ${arr[${#arr[@]} - 1]} should be ONE word token, not split at inner }
6366        let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6367        assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6368        // No more tokens — everything was consumed
6369        assert!(lexer.next_lexed_token().is_none());
6370    }
6371
6372    /// Simple ${var} still works after brace depth change.
6373    #[test]
6374    fn test_simple_brace_expansion_unchanged() {
6375        let mut lexer = Lexer::new("${foo}");
6376        assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6377        assert!(lexer.next_lexed_token().is_none());
6378    }
6379
6380    #[test]
6381    fn test_nvm_fixture_lexes_without_stalling() {
6382        let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6383        let mut lexer = Lexer::new(input);
6384        let mut tokens = 0usize;
6385
6386        while lexer.next_lexed_token().is_some() {
6387            tokens += 1;
6388            assert!(
6389                tokens < 100_000,
6390                "lexer should continue making progress on the nvm fixture"
6391            );
6392        }
6393
6394        assert!(tokens > 0, "nvm fixture should produce at least one token");
6395    }
6396
6397    #[test]
6398    fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6399        let input = concat!(
6400            "case \"${_input_type:-}\" in\n",
6401            "  html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6402            "  org)  _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6403            "esac\n",
6404        );
6405
6406        assert_non_newline_tokens_stay_on_one_line(input);
6407
6408        let mut lexer = Lexer::new(input);
6409        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6410            .map(|token| (token.kind, token_text(&token, input)))
6411            .collect::<Vec<_>>();
6412        assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6413        assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6414    }
6415
6416    #[test]
6417    fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6418        let input = concat!(
6419            "case $2 in\n",
6420            "  cygwin*) bin='cygwin32/bin' ;|\n",
6421            "esac\n",
6422        );
6423
6424        let mut lexer = Lexer::new(input);
6425        let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6426            .map(|token| (token.kind, token_text(&token, input)))
6427            .collect::<Vec<_>>();
6428
6429        assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6430        assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6431        assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6432    }
6433
6434    #[test]
6435    fn test_inline_if_with_array_append_stays_line_local() {
6436        let input = concat!(
6437            "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6438            "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6439        );
6440
6441        assert_non_newline_tokens_stay_on_one_line(input);
6442    }
6443
6444    #[test]
6445    fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6446        let source = "unsetopt interactive_comments\n#literal\n";
6447        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6448        let mut lexer = Lexer::with_profile(source, &profile);
6449
6450        assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6451        assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6452        assert_next_token(&mut lexer, TokenKind::Newline, None);
6453        assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6454    }
6455
6456    #[test]
6457    fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6458        let source = "setopt rc_quotes\nprint 'a''b'\n";
6459        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6460        let mut lexer = Lexer::with_profile(source, &profile);
6461
6462        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6463        assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6464        assert_next_token(&mut lexer, TokenKind::Newline, None);
6465        assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6466        assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6467    }
6468
6469    #[test]
6470    fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6471        let source = "setopt ignore_braces\n{ echo }\n";
6472        let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6473        let mut lexer = Lexer::with_profile(source, &profile);
6474
6475        assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6476        assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6477        assert_next_token(&mut lexer, TokenKind::Newline, None);
6478        assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6479        assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6480        assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6481    }
6482
6483    #[test]
6484    fn test_heredoc_in_arithmetic_fuzz_crash() {
6485        // Regression test: the fuzzer found that heredoc re-injection inside
6486        // arithmetic context can push self.offset past self.input.len(),
6487        // causing a panic in read_unquoted_segment's borrowed-slice path.
6488        let data: &[u8] = &[
6489            35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6490            40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6491            33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6492            119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6493            0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6494            119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6495            122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6496            122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6497            122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6498            49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6499            119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6500            122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6501            61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6502            32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6503            122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6504            39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6505            122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6506            32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6507            0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6508        ];
6509        let input = std::str::from_utf8(data).unwrap();
6510        let script = format!("echo $(({input}))\n");
6511        // Must not panic.
6512        let _ = crate::parser::Parser::new(&script).parse();
6513    }
6514}