1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub(crate) fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub(crate) const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub(crate) fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub(crate) fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub(crate) fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub(crate) const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
338pub struct LexedToken<'a> {
339 pub kind: TokenKind,
341 pub span: Span,
343 pub(crate) flags: TokenFlags,
344 payload: TokenPayload<'a>,
345}
346
347impl<'a> LexedToken<'a> {
348 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
349 match kind {
350 TokenKind::Word => LexedWordSegmentKind::Plain,
351 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
352 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
353 _ => LexedWordSegmentKind::Composite,
354 }
355 }
356
357 pub(crate) fn punctuation(kind: TokenKind) -> Self {
358 Self {
359 kind,
360 span: Span::new(),
361 flags: TokenFlags::empty(),
362 payload: TokenPayload::None,
363 }
364 }
365
366 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
367 let flags = if word.has_cooked_text() {
368 TokenFlags::cooked_text()
369 } else {
370 TokenFlags::empty()
371 };
372
373 Self {
374 kind,
375 span: Span::new(),
376 flags,
377 payload: TokenPayload::Word(word),
378 }
379 }
380
381 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
382 Self::with_word_payload(
383 kind,
384 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
385 )
386 }
387
388 fn owned_word(kind: TokenKind, text: String) -> Self {
389 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
390 }
391
392 fn comment() -> Self {
393 Self {
394 kind: TokenKind::Comment,
395 span: Span::new(),
396 flags: TokenFlags::empty(),
397 payload: TokenPayload::None,
398 }
399 }
400
401 fn fd(kind: TokenKind, fd: i32) -> Self {
402 Self {
403 kind,
404 span: Span::new(),
405 flags: TokenFlags::empty(),
406 payload: TokenPayload::Fd(fd),
407 }
408 }
409
410 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
411 Self {
412 kind,
413 span: Span::new(),
414 flags: TokenFlags::empty(),
415 payload: TokenPayload::FdPair(src_fd, dst_fd),
416 }
417 }
418
419 fn error(kind: LexerErrorKind) -> Self {
420 Self {
421 kind: TokenKind::Error,
422 span: Span::new(),
423 flags: TokenFlags::empty(),
424 payload: TokenPayload::Error(kind),
425 }
426 }
427
428 pub(crate) fn with_span(mut self, span: Span) -> Self {
429 self.span = span;
430 self
431 }
432
433 pub(crate) fn rebased(mut self, base: Position) -> Self {
434 self.span = self.span.rebased(base);
435 self.payload = match self.payload {
436 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
437 payload => payload,
438 };
439 self
440 }
441
442 pub(crate) fn with_synthetic_flag(mut self) -> Self {
443 self.flags = self.flags.with_synthetic();
444 self
445 }
446
447 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
448 let payload = match self.payload {
449 TokenPayload::None => TokenPayload::None,
450 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
451 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
452 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
453 TokenPayload::Error(kind) => TokenPayload::Error(kind),
454 };
455
456 LexedToken {
457 kind: self.kind,
458 span: self.span,
459 flags: self.flags,
460 payload,
461 }
462 }
463
464 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
465 let payload = match self.payload {
466 TokenPayload::None => TokenPayload::None,
467 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
468 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
469 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
470 TokenPayload::Error(kind) => TokenPayload::Error(kind),
471 };
472
473 LexedToken {
474 kind: self.kind,
475 span: self.span,
476 flags: self.flags,
477 payload,
478 }
479 }
480
481 pub(crate) fn word_text(&self) -> Option<&str> {
483 self.kind
484 .is_word_like()
485 .then_some(())
486 .and_then(|_| match &self.payload {
487 TokenPayload::Word(word) => word.text(),
488 _ => None,
489 })
490 }
491
492 pub(crate) fn word_string(&self) -> Option<String> {
494 self.kind
495 .is_word_like()
496 .then_some(())
497 .and_then(|_| match &self.payload {
498 TokenPayload::Word(word) => Some(word.joined_text()),
499 _ => None,
500 })
501 }
502
503 pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
505 match &self.payload {
506 TokenPayload::Word(word) => Some(word),
507 _ => None,
508 }
509 }
510
511 pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
513 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
514 return None;
515 }
516
517 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
518 .then(|| &source[self.span.start.offset..self.span.end.offset])
519 }
520
521 pub(crate) fn fd_value(&self) -> Option<i32> {
523 match self.payload {
524 TokenPayload::Fd(fd) => Some(fd),
525 _ => None,
526 }
527 }
528
529 pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
531 match self.payload {
532 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
533 _ => None,
534 }
535 }
536
537 pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
539 match self.payload {
540 TokenPayload::Error(kind) => Some(kind),
541 _ => None,
542 }
543 }
544}
545
546#[derive(Debug, Clone, PartialEq)]
548pub(crate) struct HeredocRead {
549 pub content: String,
551 pub content_span: Span,
553}
554
555const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
558const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
559
560#[derive(Clone, Debug)]
561struct Cursor<'a> {
562 rest: &'a str,
563}
564
565impl<'a> Cursor<'a> {
566 fn new(source: &'a str) -> Self {
567 Self { rest: source }
568 }
569
570 fn first(&self) -> Option<char> {
571 self.rest.chars().next()
572 }
573
574 fn second(&self) -> Option<char> {
575 let mut chars = self.rest.chars();
576 chars.next()?;
577 chars.next()
578 }
579
580 fn third(&self) -> Option<char> {
581 let mut chars = self.rest.chars();
582 chars.next()?;
583 chars.next()?;
584 chars.next()
585 }
586
587 fn bump(&mut self) -> Option<char> {
588 let ch = self.first()?;
589 self.rest = &self.rest[ch.len_utf8()..];
590 Some(ch)
591 }
592
593 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
594 let start = self.rest;
595 let mut end = 0;
596
597 for ch in start.chars() {
598 if !predicate(ch) {
599 break;
600 }
601 end += ch.len_utf8();
602 }
603
604 self.rest = &start[end..];
605 &start[..end]
606 }
607
608 fn rest(&self) -> &'a str {
609 self.rest
610 }
611
612 fn skip_bytes(&mut self, count: usize) {
613 self.rest = &self.rest[count..];
614 }
615
616 fn find_byte(&self, byte: u8) -> Option<usize> {
617 memchr(byte, self.rest.as_bytes())
618 }
619}
620
621#[derive(Clone, Debug)]
622struct PositionMap<'a> {
623 source: &'a str,
624 line_starts: Arc<[usize]>,
625 cached: Position,
626}
627
628#[cfg(feature = "benchmarking")]
629#[derive(Clone, Copy, Debug, Default)]
630pub(crate) struct LexerBenchmarkCounters {
631 pub(crate) current_position_calls: u64,
632}
633
634impl<'a> PositionMap<'a> {
635 fn new(source: &'a str) -> Self {
636 let mut line_starts =
637 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
638 line_starts.push(0);
639 line_starts.extend(
640 source
641 .bytes()
642 .enumerate()
643 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
644 );
645
646 Self {
647 source,
648 line_starts: line_starts.into(),
649 cached: Position::new(),
650 }
651 }
652
653 fn position(&mut self, offset: usize) -> Position {
654 if offset == self.cached.offset {
655 return self.cached;
656 }
657
658 let position = if offset > self.cached.offset && offset <= self.source.len() {
659 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
660 } else {
661 self.position_uncached(offset)
662 };
663 self.cached = position;
664 position
665 }
666
667 fn position_uncached(&self, offset: usize) -> Position {
668 let offset = offset.min(self.source.len());
669 let line_index = self
670 .line_starts
671 .partition_point(|start| *start <= offset)
672 .saturating_sub(1);
673 let line_start = self.line_starts[line_index];
674 let line_text = &self.source[line_start..offset];
675 let column = if line_text.is_ascii() {
676 line_text.len() + 1
677 } else {
678 line_text.chars().count() + 1
679 };
680
681 Position {
682 line: line_index + 1,
683 column,
684 offset,
685 }
686 }
687
688 fn advance_from(mut position: Position, text: &str) -> Position {
689 position.offset += text.len();
690 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
691 if newline_count == 0 {
692 position.column += if text.is_ascii() {
693 text.len()
694 } else {
695 text.chars().count()
696 };
697 return position;
698 }
699
700 position.line += newline_count;
701 let tail_start = memrchr(b'\n', text.as_bytes())
702 .map(|index| index + 1)
703 .unwrap_or_default();
704 let tail = &text[tail_start..];
705 position.column = if tail.is_ascii() {
706 tail.len() + 1
707 } else {
708 tail.chars().count() + 1
709 };
710 position
711 }
712}
713
714#[derive(Clone)]
720pub struct Lexer<'a> {
721 input: &'a str,
722 offset: usize,
724 cursor: Cursor<'a>,
725 position_map: PositionMap<'a>,
726 reinject_buf: VecDeque<char>,
729 reinject_resume_offset: Option<usize>,
731 max_subst_depth: usize,
733 initial_zsh_options: Option<ZshOptionState>,
734 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
735 zsh_timeline_index: usize,
736 #[cfg(feature = "benchmarking")]
737 benchmark_counters: Option<LexerBenchmarkCounters>,
738}
739
740impl<'a> Lexer<'a> {
741 pub fn new(input: &'a str) -> Self {
743 Self::with_max_subst_depth_and_profile(
744 input,
745 DEFAULT_MAX_SUBST_DEPTH,
746 &ShellProfile::native(super::ShellDialect::Bash),
747 None,
748 )
749 }
750
751 pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
754 Self::with_max_subst_depth_and_profile(
755 input,
756 max_depth,
757 &ShellProfile::native(super::ShellDialect::Bash),
758 None,
759 )
760 }
761
762 #[cfg(test)]
764 fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
765 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
766 .then(|| ZshOptionTimeline::build(input, shell_profile))
767 .flatten()
768 .map(Arc::new);
769 Self::with_max_subst_depth_and_profile(
770 input,
771 DEFAULT_MAX_SUBST_DEPTH,
772 shell_profile,
773 zsh_timeline,
774 )
775 }
776
777 pub(crate) fn with_max_subst_depth_and_profile(
778 input: &'a str,
779 max_depth: usize,
780 shell_profile: &ShellProfile,
781 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
782 ) -> Self {
783 Self {
784 input,
785 offset: 0,
786 cursor: Cursor::new(input),
787 position_map: PositionMap::new(input),
788 reinject_buf: VecDeque::new(),
789 reinject_resume_offset: None,
790 max_subst_depth: max_depth,
791 initial_zsh_options: shell_profile.zsh_options().cloned(),
792 zsh_timeline,
793 zsh_timeline_index: 0,
794 #[cfg(feature = "benchmarking")]
795 benchmark_counters: None,
796 }
797 }
798
799 pub(super) fn position_at_offset(&self, offset: usize) -> Position {
800 self.position_map.position_uncached(offset)
801 }
802
803 fn current_position(&mut self) -> Position {
804 #[cfg(feature = "benchmarking")]
805 self.maybe_record_current_position_call();
806 self.position_map.position(self.offset)
807 }
808
809 #[cfg(feature = "benchmarking")]
810 pub(crate) fn enable_benchmark_counters(&mut self) {
811 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
812 }
813
814 #[cfg(feature = "benchmarking")]
815 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
816 self.benchmark_counters.unwrap_or_default()
817 }
818
819 #[cfg(feature = "benchmarking")]
820 fn maybe_record_current_position_call(&mut self) {
821 if let Some(counters) = &mut self.benchmark_counters {
822 counters.current_position_calls += 1;
823 }
824 }
825
826 fn sync_offset_to_cursor(&mut self) {
827 if self.reinject_buf.is_empty()
828 && let Some(offset) = self.reinject_resume_offset.take()
829 {
830 self.offset = offset;
831 }
832 }
833
834 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
840 self.next_lexed_token().map(|token| token.kind)
841 }
842
843 fn peek_char(&mut self) -> Option<char> {
844 self.sync_offset_to_cursor();
845 if let Some(&ch) = self.reinject_buf.front() {
846 Some(ch)
847 } else {
848 self.cursor.first()
849 }
850 }
851
852 fn advance(&mut self) -> Option<char> {
853 self.sync_offset_to_cursor();
854 let ch = if !self.reinject_buf.is_empty() {
855 self.reinject_buf.pop_front()
856 } else {
857 self.cursor.bump()
858 };
859 if let Some(c) = ch {
860 self.offset += c.len_utf8();
861 }
862 ch
863 }
864
865 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
866 self.reinject_buf
867 .iter()
868 .copied()
869 .chain(self.cursor.rest().chars())
870 }
871
872 fn second_char(&self) -> Option<char> {
873 match self.reinject_buf.len() {
874 0 => self.cursor.second(),
875 1 => self.cursor.first(),
876 _ => self.reinject_buf.get(1).copied(),
877 }
878 }
879
880 fn third_char(&self) -> Option<char> {
881 match self.reinject_buf.len() {
882 0 => self.cursor.third(),
883 1 => self.cursor.second(),
884 2 => self.cursor.first(),
885 _ => self.reinject_buf.get(2).copied(),
886 }
887 }
888
889 fn fourth_char(&self) -> Option<char> {
890 match self.reinject_buf.len() {
891 0 => self.cursor.rest().chars().nth(3),
892 1 => self.cursor.third(),
893 2 => self.cursor.second(),
894 3 => self.cursor.first(),
895 _ => self.reinject_buf.get(3).copied(),
896 }
897 }
898
899 fn consume_source_bytes(&mut self, byte_len: usize) {
900 debug_assert!(self.reinject_buf.is_empty());
901 self.sync_offset_to_cursor();
902 self.offset += byte_len;
903 self.cursor.skip_bytes(byte_len);
904 }
905
906 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
907 debug_assert!(self.reinject_buf.is_empty());
908 self.offset += byte_len;
909 }
910
911 fn consume_ascii_chars(&mut self, count: usize) {
912 if self.reinject_buf.is_empty() {
913 self.consume_source_bytes(count);
914 return;
915 }
916
917 for _ in 0..count {
918 self.advance();
919 }
920 }
921
922 fn source_horizontal_whitespace_len(&self) -> usize {
923 self.cursor
924 .rest()
925 .as_bytes()
926 .iter()
927 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
928 .count()
929 }
930
931 fn source_ascii_plain_word_len(&self) -> usize {
932 self.cursor
933 .rest()
934 .as_bytes()
935 .iter()
936 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
937 .count()
938 }
939
940 fn find_double_quote_special(source: &str) -> Option<usize> {
941 source
942 .as_bytes()
943 .iter()
944 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
945 }
946
947 fn ensure_capture_from_source(
948 &self,
949 capture: &mut Option<String>,
950 start: Position,
951 end: Position,
952 ) {
953 if capture.is_none() {
954 *capture = Some(self.input[start.offset..end.offset].to_string());
955 }
956 }
957
958 fn push_capture_char(capture: &mut Option<String>, ch: char) {
959 if let Some(text) = capture.as_mut() {
960 text.push(ch);
961 }
962 }
963
964 fn push_capture_str(capture: &mut Option<String>, text: &str) {
965 if let Some(current) = capture.as_mut() {
966 current.push_str(text);
967 }
968 }
969
970 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
971 if let Some(timeline) = self.zsh_timeline.as_ref() {
972 while self.zsh_timeline_index < timeline.entries.len()
973 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
974 {
975 self.zsh_timeline_index += 1;
976 }
977 return if self.zsh_timeline_index == 0 {
978 self.initial_zsh_options.as_ref()
979 } else {
980 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
981 };
982 }
983
984 self.initial_zsh_options.as_ref()
985 }
986
987 fn comments_enabled(&mut self) -> bool {
988 !self
989 .current_zsh_options()
990 .is_some_and(|options| options.interactive_comments.is_definitely_off())
991 }
992
993 fn rc_quotes_enabled(&mut self) -> bool {
994 self.current_zsh_options()
995 .is_some_and(|options| options.rc_quotes.is_definitely_on())
996 }
997
998 fn ignore_braces_enabled(&mut self) -> bool {
999 self.current_zsh_options()
1000 .is_some_and(|options| options.ignore_braces.is_definitely_on())
1001 }
1002
1003 fn ignore_close_braces_enabled(&mut self) -> bool {
1004 self.current_zsh_options().is_some_and(|options| {
1005 options.ignore_braces.is_definitely_on()
1006 || options.ignore_close_braces.is_definitely_on()
1007 })
1008 }
1009
1010 fn brace_ccl_enabled(&mut self) -> bool {
1011 self.current_zsh_options()
1012 .is_some_and(|options| options.brace_ccl.is_definitely_on())
1013 }
1014
1015 fn should_treat_hash_as_word_char(&mut self) -> bool {
1016 if !self.comments_enabled() {
1017 return true;
1018 }
1019 self.reinject_buf.is_empty()
1020 && (self
1021 .input
1022 .get(..self.offset)
1023 .and_then(|prefix| prefix.chars().next_back())
1024 .is_some_and(|prev| {
1025 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1026 })
1027 || self.is_inside_unclosed_double_paren_on_line())
1028 }
1029
1030 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1031 capture
1032 .as_deref()
1033 .unwrap_or(&self.input[start.offset..self.offset])
1034 }
1035
1036 fn current_word_surface_is_single_char(
1037 &self,
1038 start: Position,
1039 capture: &Option<String>,
1040 target: char,
1041 ) -> bool {
1042 let text = self.current_word_text(start, capture);
1043 if !text.contains('\x00') {
1044 let mut encoded = [0; 4];
1045 return text == target.encode_utf8(&mut encoded);
1046 }
1047
1048 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1049 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1050 }
1051
1052 fn current_word_surface_last_char<'b>(
1053 &'b self,
1054 start: Position,
1055 capture: &'b Option<String>,
1056 ) -> Option<char> {
1057 self.current_word_text(start, capture)
1058 .chars()
1059 .rev()
1060 .find(|&ch| ch != '\x00')
1061 }
1062
1063 fn current_word_surface_ends_with_char(
1064 &self,
1065 start: Position,
1066 capture: &Option<String>,
1067 target: char,
1068 ) -> bool {
1069 self.current_word_surface_last_char(start, capture) == Some(target)
1070 }
1071
1072 fn current_word_surface_ends_with_extglob_prefix(
1073 &self,
1074 start: Position,
1075 capture: &Option<String>,
1076 ) -> bool {
1077 self.current_word_surface_last_char(start, capture)
1078 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1079 }
1080
1081 fn current_word_surface_can_take_zsh_glob_modifier_suffix(
1082 &mut self,
1083 start: Position,
1084 capture: &Option<String>,
1085 ) -> bool {
1086 if self.current_zsh_options().is_none() || self.peek_char() != Some('(') {
1087 return false;
1088 }
1089
1090 let text = self.current_word_text(start, capture);
1091 if !text.contains('/') {
1092 return false;
1093 }
1094
1095 let mut chars = self.lookahead_chars();
1096 matches!((chars.next(), chars.next()), (Some('('), Some(':')))
1097 }
1098
1099 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1105 self.skip_whitespace();
1106 let start = self.current_position();
1107 let token = self.next_lexed_token_inner(false)?;
1108 let end = self.current_position();
1109 Some(token.with_span(Span::from_positions(start, end)))
1110 }
1111
1112 pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1114 self.skip_whitespace();
1115 let start = self.current_position();
1116 let token = self.next_lexed_token_inner(true)?;
1117 let end = self.current_position();
1118 Some(token.with_span(Span::from_positions(start, end)))
1119 }
1120
1121 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1123 let ch = self.peek_char()?;
1124
1125 match ch {
1126 '\n' => {
1127 self.consume_ascii_chars(1);
1128 Some(LexedToken::punctuation(TokenKind::Newline))
1129 }
1130 ';' => {
1131 if self.second_char() == Some(';') {
1132 if self.third_char() == Some('&') {
1133 self.consume_ascii_chars(3);
1134 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1136 self.consume_ascii_chars(2);
1137 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1139 } else if self.second_char() == Some('|') {
1140 self.consume_ascii_chars(2);
1141 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1143 self.consume_ascii_chars(2);
1144 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1146 self.consume_ascii_chars(1);
1147 Some(LexedToken::punctuation(TokenKind::Semicolon))
1148 }
1149 }
1150 '|' => {
1151 if self.second_char() == Some('|') {
1152 self.consume_ascii_chars(2);
1153 Some(LexedToken::punctuation(TokenKind::Or))
1154 } else if self.second_char() == Some('&') {
1155 self.consume_ascii_chars(2);
1156 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1157 } else {
1158 self.consume_ascii_chars(1);
1159 Some(LexedToken::punctuation(TokenKind::Pipe))
1160 }
1161 }
1162 '&' => {
1163 if self.second_char() == Some('&') {
1164 self.consume_ascii_chars(2);
1165 Some(LexedToken::punctuation(TokenKind::And))
1166 } else if self.second_char() == Some('>') {
1167 if self.third_char() == Some('>') {
1168 self.consume_ascii_chars(3);
1169 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1170 } else {
1171 self.consume_ascii_chars(2);
1172 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1173 }
1174 } else if self.second_char() == Some('|') {
1175 self.consume_ascii_chars(2);
1176 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1177 } else if self.second_char() == Some('!') {
1178 self.consume_ascii_chars(2);
1179 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1180 } else {
1181 self.consume_ascii_chars(1);
1182 Some(LexedToken::punctuation(TokenKind::Background))
1183 }
1184 }
1185 '>' => {
1186 if self.second_char() == Some('>') {
1187 if self.third_char() == Some('|') {
1188 self.consume_ascii_chars(3);
1189 } else {
1190 self.consume_ascii_chars(2);
1191 }
1192 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1193 } else if self.second_char() == Some('|') {
1194 self.consume_ascii_chars(2);
1195 Some(LexedToken::punctuation(TokenKind::Clobber))
1196 } else if self.second_char() == Some('(') {
1197 self.consume_ascii_chars(2);
1198 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1199 } else if self.second_char() == Some('&') {
1200 self.consume_ascii_chars(2);
1201 Some(LexedToken::punctuation(TokenKind::DupOutput))
1202 } else {
1203 self.consume_ascii_chars(1);
1204 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1205 }
1206 }
1207 '<' => {
1208 if self.second_char() == Some('<') {
1209 if self.third_char() == Some('<') {
1210 self.consume_ascii_chars(3);
1211 Some(LexedToken::punctuation(TokenKind::HereString))
1212 } else if self.third_char() == Some('-') {
1213 self.consume_ascii_chars(3);
1214 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1215 } else {
1216 self.consume_ascii_chars(2);
1217 Some(LexedToken::punctuation(TokenKind::HereDoc))
1218 }
1219 } else if self.second_char() == Some('>') {
1220 self.consume_ascii_chars(2);
1221 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1222 } else if self.second_char() == Some('(') {
1223 self.consume_ascii_chars(2);
1224 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1225 } else if self.second_char() == Some('&') {
1226 self.consume_ascii_chars(2);
1227 Some(LexedToken::punctuation(TokenKind::DupInput))
1228 } else {
1229 self.consume_ascii_chars(1);
1230 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1231 }
1232 }
1233 '(' => {
1234 if self.second_char() == Some('(') {
1235 self.consume_ascii_chars(2);
1236 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1237 } else {
1238 self.consume_ascii_chars(1);
1239 Some(LexedToken::punctuation(TokenKind::LeftParen))
1240 }
1241 }
1242 ')' => {
1243 if self.second_char() == Some(')') {
1244 self.consume_ascii_chars(2);
1245 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1246 } else {
1247 self.consume_ascii_chars(1);
1248 Some(LexedToken::punctuation(TokenKind::RightParen))
1249 }
1250 }
1251 '{' => {
1252 let start = self.current_position();
1253 if self.ignore_braces_enabled() {
1254 self.consume_ascii_chars(1);
1255 match self.peek_char() {
1256 Some(' ') | Some('\t') | Some('\n') | None => {
1257 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1258 }
1259 _ => self.read_word_starting_with("{", start),
1260 }
1261 } else if self.looks_like_brace_expansion() {
1262 self.read_brace_expansion_word()
1266 } else if self.is_brace_group_start() {
1267 self.advance();
1268 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1269 } else if self.brace_literal_starts_case_pattern_delimiter() {
1270 self.read_word_starting_with("{", start)
1271 } else {
1272 self.read_brace_literal_word()
1273 }
1274 }
1275 '}' => {
1276 self.consume_ascii_chars(1);
1277 if self.ignore_close_braces_enabled() {
1278 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1279 } else {
1280 Some(LexedToken::punctuation(TokenKind::RightBrace))
1281 }
1282 }
1283 '[' => {
1284 let start = self.current_position();
1285 self.consume_ascii_chars(1);
1286 if self.peek_char() == Some('[')
1287 && matches!(
1288 self.second_char(),
1289 Some(' ') | Some('\t') | Some('\n') | None
1290 )
1291 {
1292 self.consume_ascii_chars(1);
1293 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1294 } else {
1295 match self.peek_char() {
1302 Some(' ') | Some('\t') | Some('\n') | None => {
1303 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1304 }
1305 _ => self.read_word_starting_with("[", start),
1306 }
1307 }
1308 }
1309 ']' => {
1310 if self.second_char() == Some(']') {
1311 self.consume_ascii_chars(2);
1312 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1313 } else {
1314 self.consume_ascii_chars(1);
1315 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1316 }
1317 }
1318 '\'' => self.read_single_quoted_string(),
1319 '"' => self.read_double_quoted_string(),
1320 '#' => {
1321 if self.should_treat_hash_as_word_char() {
1322 let start = self.current_position();
1323 return self.read_word_starting_with("#", start);
1324 }
1325 if preserve_comments {
1326 self.read_comment();
1327 Some(LexedToken::comment())
1328 } else {
1329 self.skip_comment();
1330 self.next_lexed_token_inner(false)
1331 }
1332 }
1333 '0'..='9' => self.read_word_or_fd_redirect(),
1335 _ => self.read_word(),
1336 }
1337 }
1338
1339 fn skip_whitespace(&mut self) {
1340 while let Some(ch) = self.peek_char() {
1341 if self.reinject_buf.is_empty() {
1342 let whitespace_len = self.source_horizontal_whitespace_len();
1343 if whitespace_len > 0 {
1344 self.consume_source_bytes(whitespace_len);
1345 continue;
1346 }
1347
1348 if self.cursor.rest().starts_with("\\\n") {
1349 self.consume_source_bytes(2);
1350 continue;
1351 }
1352 }
1353
1354 if ch == ' ' || ch == '\t' {
1355 self.consume_ascii_chars(1);
1356 } else if ch == '\\' {
1357 if self.second_char() == Some('\n') {
1359 self.consume_ascii_chars(2);
1360 } else {
1361 break;
1362 }
1363 } else {
1364 break;
1365 }
1366 }
1367 }
1368
1369 fn skip_comment(&mut self) {
1370 if self.reinject_buf.is_empty() {
1371 let end = self
1372 .cursor
1373 .find_byte(b'\n')
1374 .unwrap_or(self.cursor.rest().len());
1375 self.consume_source_bytes(end);
1376 return;
1377 }
1378
1379 while let Some(ch) = self.peek_char() {
1380 if ch == '\n' {
1381 break;
1382 }
1383 self.advance();
1384 }
1385 }
1386
1387 fn read_comment(&mut self) {
1388 debug_assert_eq!(self.peek_char(), Some('#'));
1389
1390 if self.reinject_buf.is_empty() {
1391 let rest = self.cursor.rest();
1392 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1393 self.consume_source_bytes(end);
1394 return;
1395 }
1396
1397 self.advance(); while let Some(ch) = self.peek_char() {
1400 if ch == '\n' {
1401 break;
1402 }
1403 self.advance();
1404 }
1405 }
1406
1407 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1408 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1409 return false;
1410 }
1411
1412 let line_start = self.input[..self.offset]
1413 .rfind('\n')
1414 .map_or(0, |index| index + 1);
1415 let prefix = &self.input[line_start..self.offset];
1416 line_has_unclosed_double_paren(prefix)
1417 }
1418
1419 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1422 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1423 let Some(fd) = first_digit.to_digit(10) else {
1424 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1425 };
1426 let fd = fd as i32;
1427
1428 match (self.second_char(), self.third_char()) {
1429 (Some('>'), Some('>')) => {
1430 if self.fourth_char() == Some('|') {
1431 self.consume_ascii_chars(4);
1432 } else {
1433 self.consume_ascii_chars(3);
1434 }
1435 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1436 }
1437 (Some('>'), Some('|')) => {
1438 self.consume_ascii_chars(3);
1439 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1440 }
1441 (Some('>'), Some('&')) => {
1442 self.consume_ascii_chars(3);
1443
1444 let mut target_str = String::with_capacity(4);
1445 while let Some(c) = self.peek_char() {
1446 if c.is_ascii_digit() {
1447 target_str.push(c);
1448 self.advance();
1449 } else {
1450 break;
1451 }
1452 }
1453
1454 if target_str.is_empty() {
1455 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1456 }
1457
1458 let target_fd: i32 = target_str.parse().unwrap_or(1);
1459 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1460 }
1461 (Some('>'), _) => {
1462 self.consume_ascii_chars(2);
1463 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1464 }
1465 (Some('<'), Some('&')) => {
1466 self.consume_ascii_chars(3);
1467
1468 let mut target_str = String::with_capacity(4);
1469 while let Some(c) = self.peek_char() {
1470 if c.is_ascii_digit() || c == '-' {
1471 target_str.push(c);
1472 self.advance();
1473 if c == '-' {
1474 break;
1475 }
1476 } else {
1477 break;
1478 }
1479 }
1480
1481 if target_str == "-" {
1482 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1483 }
1484 let target_fd: i32 = target_str.parse().unwrap_or(0);
1485 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1486 }
1487 (Some('<'), Some('>')) => {
1488 self.consume_ascii_chars(3);
1489 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1490 }
1491 (Some('<'), Some('<')) => {}
1492 (Some('<'), _) => {
1493 self.consume_ascii_chars(2);
1494 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1495 }
1496 _ => {}
1497 }
1498 }
1499
1500 self.read_word()
1502 }
1503
1504 fn read_word_starting_with(
1505 &mut self,
1506 _prefix: &str,
1507 start: Position,
1508 ) -> Option<LexedToken<'a>> {
1509 let segment = match self.read_unquoted_segment(start) {
1510 Ok(segment) => segment,
1511 Err(kind) => return Some(LexedToken::error(kind)),
1512 };
1513 if segment.as_str().is_empty() {
1514 return None;
1515 }
1516 let mut lexed_word = LexedWord::from_segment(segment);
1517 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1518 return Some(LexedToken::error(kind));
1519 }
1520 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1521 }
1522
1523 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1524 let start = self.current_position();
1525
1526 if self.reinject_buf.is_empty() {
1527 let ascii_len = self.source_ascii_plain_word_len();
1528 let chunk = if ascii_len > 0
1529 && self
1530 .cursor
1531 .rest()
1532 .as_bytes()
1533 .get(ascii_len)
1534 .is_none_or(|byte| byte.is_ascii())
1535 {
1536 self.consume_source_bytes(ascii_len);
1537 &self.input[start.offset..self.offset]
1538 } else {
1539 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1540 self.advance_scanned_source_bytes(chunk.len());
1541 chunk
1542 };
1543 if !chunk.is_empty() {
1544 let continues = matches!(
1545 self.peek_char(),
1546 Some(next)
1547 if Self::is_word_char(next)
1548 || next == '$'
1549 || matches!(next, '\'' | '"')
1550 || next == '{'
1551 || (next == '\\' && self.second_char() == Some('\n'))
1552 || (next == '('
1553 && (chunk.ends_with('=')
1554 || Self::word_can_take_parenthesized_suffix(chunk)))
1555 );
1556 let continues = continues
1557 || (self.peek_char() == Some('(')
1558 && (self.looks_like_zsh_alternative_glob_suffix(chunk)
1559 || self.looks_like_zsh_glob_modifier_suffix(chunk)));
1560
1561 if !continues {
1562 let end = self.current_position();
1563 return Some(LexedToken::borrowed_word(
1564 TokenKind::Word,
1565 &self.input[start.offset..self.offset],
1566 Some(Span::from_positions(start, end)),
1567 ));
1568 }
1569
1570 if self.peek_char() == Some('(')
1571 && (chunk.ends_with('=')
1572 || Self::word_can_take_parenthesized_suffix(chunk)
1573 || self.looks_like_zsh_alternative_glob_suffix(chunk)
1574 || self.looks_like_zsh_glob_modifier_suffix(chunk))
1575 {
1576 return self.read_complex_word(start);
1577 }
1578
1579 let end = self.current_position();
1580 return self.finish_segmented_word(LexedWord::borrowed(
1581 LexedWordSegmentKind::Plain,
1582 &self.input[start.offset..self.offset],
1583 Some(Span::from_positions(start, end)),
1584 ));
1585 }
1586 }
1587
1588 self.read_complex_word(start)
1589 }
1590
1591 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1592 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1593 return Some(LexedToken::error(kind));
1594 }
1595
1596 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1597 }
1598
1599 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1600 if self.peek_char() == Some('$') {
1601 match self.second_char() {
1602 Some('\'') => return self.read_dollar_single_quoted_string(),
1603 Some('"') => return self.read_dollar_double_quoted_string(),
1604 _ => {}
1605 }
1606 }
1607
1608 let segment = match self.read_unquoted_segment(start) {
1609 Ok(segment) => segment,
1610 Err(kind) => return Some(LexedToken::error(kind)),
1611 };
1612
1613 if segment.as_str().is_empty() {
1614 return None;
1615 }
1616
1617 self.finish_segmented_word(LexedWord::from_segment(segment))
1618 }
1619
1620 fn read_unquoted_segment(
1621 &mut self,
1622 start: Position,
1623 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1624 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1625 while let Some(ch) = self.peek_char() {
1626 if ch == '"' || ch == '\'' {
1627 break;
1628 } else if ch == '$' {
1629 if matches!(self.second_char(), Some('\'') | Some('"'))
1630 && (self.current_position().offset > start.offset
1631 || word.as_ref().is_some_and(|word| !word.is_empty()))
1632 {
1633 break;
1634 }
1635
1636 self.advance();
1638
1639 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1643 Self::push_capture_char(&mut word, '[');
1644 self.advance();
1645 if !self.read_legacy_arithmetic_into(&mut word, start) {
1646 return Err(LexerErrorKind::CommandSubstitution);
1647 }
1648 } else if self.peek_char() == Some('(') {
1649 if self.second_char() == Some('(') {
1650 if !self.read_arithmetic_expansion_into(&mut word) {
1651 return Err(LexerErrorKind::CommandSubstitution);
1652 }
1653 } else {
1654 Self::push_capture_char(&mut word, '(');
1655 self.advance();
1656 if !self.read_command_subst_into(&mut word) {
1657 return Err(LexerErrorKind::CommandSubstitution);
1658 }
1659 }
1660 } else if self.peek_char() == Some('{') {
1661 Self::push_capture_char(&mut word, '{');
1664 self.advance();
1665 let _ = self.read_param_expansion_into(&mut word, start);
1666 } else {
1667 if let Some(c) = self.peek_char() {
1669 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1670 || c.is_ascii_digit()
1671 {
1672 Self::push_capture_char(&mut word, c);
1673 self.advance();
1674 } else {
1675 while let Some(c) = self.peek_char() {
1677 if c.is_ascii_alphanumeric() || c == '_' {
1678 Self::push_capture_char(&mut word, c);
1679 self.advance();
1680 } else {
1681 break;
1682 }
1683 }
1684 }
1685 }
1686 }
1687 } else if ch == '{' {
1688 if self.looks_like_mid_word_brace_segment() {
1689 Self::push_capture_char(&mut word, ch);
1692 self.advance();
1693 self.consume_mid_word_brace_segment(&mut word);
1694 } else {
1695 Self::push_capture_char(&mut word, ch);
1698 self.advance();
1699 }
1700 } else if ch == '`' {
1701 let capture_end = self.current_position();
1704 self.ensure_capture_from_source(&mut word, start, capture_end);
1705 Self::push_capture_char(&mut word, ch);
1706 self.advance(); let mut closed = false;
1708 while let Some(c) = self.peek_char() {
1709 Self::push_capture_char(&mut word, c);
1710 self.advance();
1711 if c == '`' {
1712 closed = true;
1713 break;
1714 }
1715 if c == '\\'
1716 && let Some(next) = self.peek_char()
1717 {
1718 Self::push_capture_char(&mut word, next);
1719 self.advance();
1720 }
1721 }
1722 if !closed {
1723 return Err(LexerErrorKind::BacktickSubstitution);
1724 }
1725 } else if ch == '\\' {
1726 let capture_end = self.current_position();
1727 self.ensure_capture_from_source(&mut word, start, capture_end);
1728 self.advance();
1729 if let Some(next) = self.peek_char() {
1730 if next == '\n' {
1731 self.advance();
1733 } else {
1734 Self::push_capture_char(&mut word, '\x00');
1739 Self::push_capture_char(&mut word, next);
1740 self.advance();
1741 if next == '{'
1742 && self.current_word_surface_is_single_char(start, &word, '{')
1743 && self.escaped_brace_sequence_looks_like_brace_expansion()
1744 {
1745 let mut depth = 1;
1746 while let Some(c) = self.peek_char() {
1747 Self::push_capture_char(&mut word, c);
1748 self.advance();
1749 match c {
1750 '{' => depth += 1,
1751 '}' => {
1752 depth -= 1;
1753 if depth == 0 {
1754 break;
1755 }
1756 }
1757 _ => {}
1758 }
1759 }
1760 }
1761 }
1762 } else {
1763 Self::push_capture_char(&mut word, '\\');
1764 }
1765 } else if ch == '('
1766 && self.current_word_surface_ends_with_char(start, &word, '=')
1767 && self.looks_like_assoc_assign()
1768 {
1769 Self::push_capture_char(&mut word, ch);
1772 self.advance();
1773 let mut depth = 1;
1774 while let Some(c) = self.peek_char() {
1775 Self::push_capture_char(&mut word, c);
1776 self.advance();
1777 match c {
1778 '(' => depth += 1,
1779 ')' => {
1780 depth -= 1;
1781 if depth == 0 {
1782 break;
1783 }
1784 }
1785 '"' => {
1786 while let Some(qc) = self.peek_char() {
1787 Self::push_capture_char(&mut word, qc);
1788 self.advance();
1789 if qc == '"' {
1790 break;
1791 }
1792 if qc == '\\'
1793 && let Some(esc) = self.peek_char()
1794 {
1795 Self::push_capture_char(&mut word, esc);
1796 self.advance();
1797 }
1798 }
1799 }
1800 '\'' => {
1801 while let Some(qc) = self.peek_char() {
1802 Self::push_capture_char(&mut word, qc);
1803 self.advance();
1804 if qc == '\'' {
1805 break;
1806 }
1807 }
1808 }
1809 '\\' => {
1810 if let Some(esc) = self.peek_char() {
1811 Self::push_capture_char(&mut word, esc);
1812 self.advance();
1813 }
1814 }
1815 _ => {}
1816 }
1817 }
1818 } else if ch == '('
1819 && (self.current_word_surface_ends_with_extglob_prefix(start, &word)
1820 || self.current_word_surface_can_take_zsh_glob_modifier_suffix(start, &word))
1821 {
1822 Self::push_capture_char(&mut word, ch);
1825 self.advance();
1826 let mut depth = 1;
1827 while let Some(c) = self.peek_char() {
1828 Self::push_capture_char(&mut word, c);
1829 self.advance();
1830 match c {
1831 '(' => depth += 1,
1832 ')' => {
1833 depth -= 1;
1834 if depth == 0 {
1835 break;
1836 }
1837 }
1838 '\\' => {
1839 if let Some(esc) = self.peek_char() {
1840 Self::push_capture_char(&mut word, esc);
1841 self.advance();
1842 }
1843 }
1844 _ => {}
1845 }
1846 }
1847 } else if Self::is_plain_word_char(ch) {
1848 if self.reinject_buf.is_empty() {
1849 let ascii_len = self.source_ascii_plain_word_len();
1850 let chunk = if ascii_len > 0
1851 && self
1852 .cursor
1853 .rest()
1854 .as_bytes()
1855 .get(ascii_len)
1856 .is_none_or(|byte| byte.is_ascii())
1857 {
1858 self.consume_source_bytes(ascii_len);
1859 &self.input[self.offset - ascii_len..self.offset]
1860 } else {
1861 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1862 self.advance_scanned_source_bytes(chunk.len());
1863 chunk
1864 };
1865 Self::push_capture_str(&mut word, chunk);
1866 } else {
1867 Self::push_capture_char(&mut word, ch);
1868 self.advance();
1869 }
1870 } else {
1871 break;
1872 }
1873 }
1874
1875 if let Some(word) = word {
1876 let span = Some(Span::from_positions(start, self.current_position()));
1877 Ok(LexedWordSegment::owned_with_spans(
1878 LexedWordSegmentKind::Plain,
1879 word,
1880 span,
1881 span,
1882 ))
1883 } else {
1884 let end = self.current_position();
1885 Ok(LexedWordSegment::borrowed(
1886 LexedWordSegmentKind::Plain,
1887 &self.input[start.offset..self.offset],
1888 Some(Span::from_positions(start, end)),
1889 ))
1890 }
1891 }
1892
1893 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1894 let segment = match self.read_single_quoted_segment() {
1895 Ok(segment) => segment,
1896 Err(kind) => return Some(LexedToken::error(kind)),
1897 };
1898 let mut word = LexedWord::from_segment(segment);
1899 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1900 return Some(LexedToken::error(kind));
1901 }
1902
1903 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1904 }
1905
1906 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1907 debug_assert_eq!(self.peek_char(), Some('\''));
1908
1909 let wrapper_start = self.current_position();
1910 self.consume_ascii_chars(1); let content_start = self.current_position();
1912 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1913 let mut content_end = content_start;
1914 let mut content = String::with_capacity(16);
1915 let mut closed = false;
1916
1917 if can_borrow {
1918 let rest = self.cursor.rest();
1919 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1920 self.consume_source_bytes(quote_index);
1921 content_end = self.current_position();
1922 self.consume_ascii_chars(1); closed = true;
1924 } else {
1925 self.consume_source_bytes(rest.len());
1926 }
1927 }
1928
1929 while let Some(ch) = self.peek_char() {
1930 if closed {
1931 break;
1932 }
1933 if ch == '\'' {
1934 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1935 if !can_borrow {
1936 content.push('\'');
1937 }
1938 self.advance();
1939 self.advance();
1940 continue;
1941 }
1942 content_end = self.current_position();
1943 self.consume_ascii_chars(1); closed = true;
1945 break;
1946 }
1947 if !can_borrow {
1948 content.push(ch);
1949 }
1950 self.advance();
1951 }
1952
1953 if !closed {
1954 return Err(LexerErrorKind::SingleQuote);
1955 }
1956
1957 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1958 let content_span = Some(Span::from_positions(content_start, content_end));
1959
1960 if can_borrow {
1961 Ok(LexedWordSegment::borrowed_with_spans(
1962 LexedWordSegmentKind::SingleQuoted,
1963 &self.input[content_start.offset..content_end.offset],
1964 content_span,
1965 wrapper_span,
1966 ))
1967 } else {
1968 Ok(LexedWordSegment::owned_with_spans(
1969 LexedWordSegmentKind::SingleQuoted,
1970 content,
1971 content_span,
1972 wrapper_span,
1973 ))
1974 }
1975 }
1976
1977 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1978 let segment = match self.read_dollar_single_quoted_segment() {
1979 Ok(segment) => segment,
1980 Err(kind) => return Some(LexedToken::error(kind)),
1981 };
1982 let mut word = LexedWord::from_segment(segment);
1983 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1984 return Some(LexedToken::error(kind));
1985 }
1986
1987 let kind = if word.single_segment().is_some() {
1988 TokenKind::LiteralWord
1989 } else {
1990 TokenKind::Word
1991 };
1992
1993 Some(LexedToken::with_word_payload(kind, word))
1994 }
1995
1996 fn read_dollar_single_quoted_segment(
1997 &mut self,
1998 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1999 debug_assert_eq!(self.peek_char(), Some('$'));
2000 debug_assert_eq!(self.second_char(), Some('\''));
2001
2002 let wrapper_start = self.current_position();
2003 self.consume_ascii_chars(2); let content_start = self.current_position();
2005 let mut out = String::with_capacity(16);
2006
2007 while let Some(ch) = self.peek_char() {
2008 if ch == '\'' {
2009 let content_end = self.current_position();
2010 self.advance();
2011 let wrapper_span =
2012 Some(Span::from_positions(wrapper_start, self.current_position()));
2013 let content_span = Some(Span::from_positions(content_start, content_end));
2014 return Ok(LexedWordSegment::owned_with_spans(
2015 LexedWordSegmentKind::DollarSingleQuoted,
2016 out,
2017 content_span,
2018 wrapper_span,
2019 ));
2020 }
2021
2022 if ch == '\\' {
2023 self.advance();
2024 if let Some(esc) = self.peek_char() {
2025 self.advance();
2026 match esc {
2027 'n' => out.push('\n'),
2028 't' => out.push('\t'),
2029 'r' => out.push('\r'),
2030 'a' => out.push('\x07'),
2031 'b' => out.push('\x08'),
2032 'f' => out.push('\x0C'),
2033 'v' => out.push('\x0B'),
2034 'e' | 'E' => out.push('\x1B'),
2035 '\\' => out.push('\\'),
2036 '\'' => out.push('\''),
2037 '"' => out.push('"'),
2038 '?' => out.push('?'),
2039 'c' => {
2040 if let Some(control) = self.peek_char() {
2041 self.advance();
2042 out.push(((control as u32 & 0x1F) as u8) as char);
2043 } else {
2044 out.push('\\');
2045 out.push('c');
2046 }
2047 }
2048 'x' => {
2049 let mut hex = String::new();
2050 for _ in 0..2 {
2051 if let Some(h) = self.peek_char() {
2052 if h.is_ascii_hexdigit() {
2053 hex.push(h);
2054 self.advance();
2055 } else {
2056 break;
2057 }
2058 }
2059 }
2060 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2061 out.push(val as char);
2062 }
2063 }
2064 'u' => {
2065 let mut hex = String::new();
2066 for _ in 0..4 {
2067 if let Some(h) = self.peek_char() {
2068 if h.is_ascii_hexdigit() {
2069 hex.push(h);
2070 self.advance();
2071 } else {
2072 break;
2073 }
2074 }
2075 }
2076 if let Ok(val) = u32::from_str_radix(&hex, 16)
2077 && let Some(c) = char::from_u32(val)
2078 {
2079 out.push(c);
2080 }
2081 }
2082 'U' => {
2083 let mut hex = String::new();
2084 for _ in 0..8 {
2085 if let Some(h) = self.peek_char() {
2086 if h.is_ascii_hexdigit() {
2087 hex.push(h);
2088 self.advance();
2089 } else {
2090 break;
2091 }
2092 }
2093 }
2094 if let Ok(val) = u32::from_str_radix(&hex, 16)
2095 && let Some(c) = char::from_u32(val)
2096 {
2097 out.push(c);
2098 }
2099 }
2100 '0'..='7' => {
2101 let mut oct = String::new();
2102 oct.push(esc);
2103 for _ in 0..2 {
2104 if let Some(o) = self.peek_char() {
2105 if o.is_ascii_digit() && o < '8' {
2106 oct.push(o);
2107 self.advance();
2108 } else {
2109 break;
2110 }
2111 }
2112 }
2113 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2114 out.push(val as char);
2115 }
2116 }
2117 _ => {
2118 out.push('\\');
2119 out.push(esc);
2120 }
2121 }
2122 } else {
2123 out.push('\\');
2124 }
2125 continue;
2126 }
2127
2128 out.push(ch);
2129 self.advance();
2130 }
2131
2132 Err(LexerErrorKind::SingleQuote)
2133 }
2134
2135 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2136 let start = self.current_position();
2137
2138 if self.reinject_buf.is_empty() {
2139 let ascii_len = self.source_ascii_plain_word_len();
2140 let chunk = if ascii_len > 0
2141 && self
2142 .cursor
2143 .rest()
2144 .as_bytes()
2145 .get(ascii_len)
2146 .is_none_or(|byte| byte.is_ascii())
2147 {
2148 self.consume_source_bytes(ascii_len);
2149 &self.input[start.offset..self.offset]
2150 } else {
2151 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2152 self.advance_scanned_source_bytes(chunk.len());
2153 chunk
2154 };
2155 if chunk.is_empty() {
2156 return None;
2157 }
2158
2159 let end = self.current_position();
2160 return Some(LexedWordSegment::borrowed(
2161 LexedWordSegmentKind::Plain,
2162 &self.input[start.offset..self.offset],
2163 Some(Span::from_positions(start, end)),
2164 ));
2165 }
2166
2167 let ch = self.peek_char()?;
2168 if !Self::is_plain_word_char(ch) {
2169 return None;
2170 }
2171
2172 let mut text = String::with_capacity(16);
2173 while let Some(ch) = self.peek_char() {
2174 if !Self::is_plain_word_char(ch) {
2175 break;
2176 }
2177 text.push(ch);
2178 self.advance();
2179 }
2180
2181 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2182 }
2183
2184 fn append_segmented_continuation(
2187 &mut self,
2188 word: &mut LexedWord<'a>,
2189 ) -> Result<(), LexerErrorKind> {
2190 loop {
2191 match self.peek_char() {
2192 Some('\\') if self.second_char() == Some('\n') => {
2193 self.advance();
2194 self.advance();
2195 continue;
2196 }
2197 Some('\'') => {
2198 word.push_segment(self.read_single_quoted_segment()?);
2199 }
2200 Some('"') => {
2201 word.push_segment(self.read_double_quoted_segment()?);
2202 }
2203 Some('$') if self.second_char() == Some('\'') => {
2204 word.push_segment(self.read_dollar_single_quoted_segment()?);
2205 }
2206 Some('$') if self.second_char() == Some('"') => {
2207 word.push_segment(self.read_dollar_double_quoted_segment()?);
2208 }
2209 Some('(')
2210 if Self::lexed_word_can_take_parenthesized_suffix(word)
2211 || self.looks_like_zsh_alternative_glob_suffix(&word.joined_text())
2212 || self.looks_like_zsh_glob_modifier_suffix(&word.joined_text()) =>
2213 {
2214 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2215 unreachable!("peeked '(' should produce a suffix segment");
2216 };
2217 word.push_segment(segment);
2218 }
2219 _ => {
2220 if let Some(segment) = self.read_plain_continuation_segment() {
2221 word.push_segment(segment);
2222 continue;
2223 }
2224
2225 let start = self.current_position();
2226 let plain = self.read_unquoted_segment(start)?;
2227 if plain.as_str().is_empty() {
2228 break;
2229 }
2230 word.push_segment(plain);
2231 }
2232 }
2233 }
2234
2235 Ok(())
2236 }
2237
2238 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2239 debug_assert_eq!(self.peek_char(), Some('('));
2240
2241 let start = self.current_position();
2242 let mut depth = 0usize;
2243 let mut escaped = false;
2244 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2245
2246 while let Some(ch) = self.peek_char() {
2247 if let Some(text) = text.as_mut() {
2248 text.push(ch);
2249 }
2250 self.advance();
2251
2252 if escaped {
2253 escaped = false;
2254 continue;
2255 }
2256
2257 match ch {
2258 '\\' => escaped = true,
2259 '(' => depth += 1,
2260 ')' => {
2261 depth = depth.saturating_sub(1);
2262 if depth == 0 {
2263 break;
2264 }
2265 }
2266 _ => {}
2267 }
2268 }
2269
2270 let end = self.current_position();
2271 let span = Some(Span::from_positions(start, end));
2272 if let Some(text) = text {
2273 Some(LexedWordSegment::owned_with_spans(
2274 LexedWordSegmentKind::Plain,
2275 text,
2276 span,
2277 span,
2278 ))
2279 } else {
2280 Some(LexedWordSegment::borrowed_with_spans(
2281 LexedWordSegmentKind::Plain,
2282 &self.input[start.offset..end.offset],
2283 span,
2284 span,
2285 ))
2286 }
2287 }
2288
2289 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2290 self.read_double_quoted_word(false)
2291 }
2292
2293 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2294 self.read_double_quoted_word(true)
2295 }
2296
2297 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2298 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2299 Ok(segment) => segment,
2300 Err(kind) => return Some(LexedToken::error(kind)),
2301 };
2302 let mut word = LexedWord::from_segment(segment);
2303 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2304 return Some(LexedToken::error(kind));
2305 }
2306
2307 let kind = if word.single_segment().is_some() {
2308 TokenKind::QuotedWord
2309 } else {
2310 TokenKind::Word
2311 };
2312
2313 Some(LexedToken::with_word_payload(kind, word))
2314 }
2315
2316 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2317 self.read_double_quoted_segment_with_dollar(false)
2318 }
2319
2320 fn read_dollar_double_quoted_segment(
2321 &mut self,
2322 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2323 self.read_double_quoted_segment_with_dollar(true)
2324 }
2325
2326 fn read_double_quoted_segment_with_dollar(
2327 &mut self,
2328 dollar: bool,
2329 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2330 if dollar {
2331 debug_assert_eq!(self.peek_char(), Some('$'));
2332 debug_assert_eq!(self.second_char(), Some('"'));
2333 } else {
2334 debug_assert_eq!(self.peek_char(), Some('"'));
2335 }
2336
2337 let wrapper_start = self.current_position();
2338 if dollar {
2339 self.consume_ascii_chars(2); } else {
2341 self.consume_ascii_chars(1); }
2343 let content_start = self.current_position();
2344 let mut content_end = content_start;
2345 let mut simple = self.reinject_buf.is_empty();
2346 let mut borrowable = self.reinject_buf.is_empty();
2347 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2348 let mut closed = false;
2349
2350 while let Some(ch) = self.peek_char() {
2351 if simple {
2352 if self.reinject_buf.is_empty() {
2353 let rest = self.cursor.rest();
2354 match Self::find_double_quote_special(rest) {
2355 Some(index) if index > 0 => {
2356 self.consume_source_bytes(index);
2357 continue;
2358 }
2359 None => {
2360 self.consume_source_bytes(rest.len());
2361 return Err(LexerErrorKind::DoubleQuote);
2362 }
2363 _ => {}
2364 }
2365 }
2366
2367 match ch {
2368 '"' => {
2369 content_end = self.current_position();
2370 self.consume_ascii_chars(1); closed = true;
2372 break;
2373 }
2374 '\\' | '$' | '`' => {
2375 simple = false;
2376 if ch == '`' {
2377 borrowable = false;
2378 let capture_end = self.current_position();
2379 self.ensure_capture_from_source(
2380 &mut content,
2381 content_start,
2382 capture_end,
2383 );
2384 }
2385 }
2386 _ => {
2387 self.advance();
2388 }
2389 }
2390 if simple {
2391 continue;
2392 }
2393 }
2394
2395 match ch {
2396 '"' => {
2397 if borrowable {
2398 content_end = self.current_position();
2399 }
2400 self.consume_ascii_chars(1); closed = true;
2402 break;
2403 }
2404 '\\' => {
2405 let escape_start = self.current_position();
2406 self.advance();
2407 if let Some(next) = self.peek_char() {
2408 match next {
2409 '\n' => {
2410 borrowable = false;
2411 self.ensure_capture_from_source(
2412 &mut content,
2413 content_start,
2414 escape_start,
2415 );
2416 self.advance();
2417 }
2418 '$' => {
2419 borrowable = false;
2420 self.ensure_capture_from_source(
2421 &mut content,
2422 content_start,
2423 escape_start,
2424 );
2425 Self::push_capture_char(&mut content, '\x00');
2426 Self::push_capture_char(&mut content, '$');
2427 self.advance();
2428 }
2429 '"' | '\\' | '`' => {
2430 borrowable = false;
2431 self.ensure_capture_from_source(
2432 &mut content,
2433 content_start,
2434 escape_start,
2435 );
2436 if next == '\\' {
2437 Self::push_capture_char(&mut content, '\x00');
2438 }
2439 if next == '`' {
2440 Self::push_capture_char(&mut content, '\x00');
2441 }
2442 Self::push_capture_char(&mut content, next);
2443 self.advance();
2444 content_end = self.current_position();
2445 }
2446 _ => {
2447 Self::push_capture_char(&mut content, '\\');
2448 Self::push_capture_char(&mut content, next);
2449 self.advance();
2450 content_end = self.current_position();
2451 }
2452 }
2453 }
2454 }
2455 '$' => {
2456 Self::push_capture_char(&mut content, '$');
2457 self.advance();
2458 if self.peek_char() == Some('(') {
2459 if self.second_char() == Some('(') {
2460 self.read_arithmetic_expansion_into(&mut content);
2461 } else {
2462 Self::push_capture_char(&mut content, '(');
2463 self.advance();
2464 self.read_command_subst_into(&mut content);
2465 }
2466 } else if self.peek_char() == Some('{') {
2467 Self::push_capture_char(&mut content, '{');
2468 self.advance();
2469 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2470 }
2471 content_end = self.current_position();
2472 }
2473 '`' => {
2474 borrowable = false;
2475 let capture_end = self.current_position();
2476 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2477 Self::push_capture_char(&mut content, '`');
2478 self.advance(); while let Some(c) = self.peek_char() {
2480 Self::push_capture_char(&mut content, c);
2481 self.advance();
2482 if c == '`' {
2483 break;
2484 }
2485 if c == '\\'
2486 && let Some(next) = self.peek_char()
2487 {
2488 Self::push_capture_char(&mut content, next);
2489 self.advance();
2490 }
2491 }
2492 content_end = self.current_position();
2493 }
2494 _ => {
2495 Self::push_capture_char(&mut content, ch);
2496 self.advance();
2497 content_end = self.current_position();
2498 }
2499 }
2500 }
2501
2502 if !closed {
2503 return Err(LexerErrorKind::DoubleQuote);
2504 }
2505
2506 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2507 let content_span = Some(Span::from_positions(content_start, content_end));
2508
2509 if borrowable {
2510 Ok(LexedWordSegment::borrowed_with_spans(
2511 if dollar {
2512 LexedWordSegmentKind::DollarDoubleQuoted
2513 } else {
2514 LexedWordSegmentKind::DoubleQuoted
2515 },
2516 &self.input[content_start.offset..content_end.offset],
2517 content_span,
2518 wrapper_span,
2519 ))
2520 } else {
2521 Ok(LexedWordSegment::owned_with_spans(
2522 if dollar {
2523 LexedWordSegmentKind::DollarDoubleQuoted
2524 } else {
2525 LexedWordSegmentKind::DoubleQuoted
2526 },
2527 content.unwrap_or_default(),
2528 content_span,
2529 wrapper_span,
2530 ))
2531 }
2532 }
2533
2534 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2535 debug_assert_eq!(self.peek_char(), Some('('));
2536 debug_assert_eq!(self.second_char(), Some('('));
2537
2538 Self::push_capture_char(content, '(');
2539 self.advance();
2540 Self::push_capture_char(content, '(');
2541 self.advance();
2542
2543 let mut depth = 2;
2544 while let Some(c) = self.peek_char() {
2545 match c {
2546 '\\' => {
2547 Self::push_capture_char(content, c);
2548 self.advance();
2549 if let Some(next) = self.peek_char() {
2550 Self::push_capture_char(content, next);
2551 self.advance();
2552 }
2553 }
2554 '\'' => {
2555 Self::push_capture_char(content, c);
2556 self.advance();
2557 while let Some(quoted) = self.peek_char() {
2558 Self::push_capture_char(content, quoted);
2559 self.advance();
2560 if quoted == '\'' {
2561 break;
2562 }
2563 }
2564 }
2565 '"' => {
2566 let mut escaped = false;
2567 Self::push_capture_char(content, c);
2568 self.advance();
2569 while let Some(quoted) = self.peek_char() {
2570 Self::push_capture_char(content, quoted);
2571 self.advance();
2572 if escaped {
2573 escaped = false;
2574 continue;
2575 }
2576 match quoted {
2577 '\\' => escaped = true,
2578 '"' => break,
2579 _ => {}
2580 }
2581 }
2582 }
2583 '`' => {
2584 let mut escaped = false;
2585 Self::push_capture_char(content, c);
2586 self.advance();
2587 while let Some(quoted) = self.peek_char() {
2588 Self::push_capture_char(content, quoted);
2589 self.advance();
2590 if escaped {
2591 escaped = false;
2592 continue;
2593 }
2594 match quoted {
2595 '\\' => escaped = true,
2596 '`' => break,
2597 _ => {}
2598 }
2599 }
2600 }
2601 '(' => {
2602 Self::push_capture_char(content, c);
2603 self.advance();
2604 depth += 1;
2605 }
2606 ')' => {
2607 Self::push_capture_char(content, c);
2608 self.advance();
2609 depth -= 1;
2610 if depth == 0 {
2611 return true;
2612 }
2613 }
2614 _ => {
2615 Self::push_capture_char(content, c);
2616 self.advance();
2617 }
2618 }
2619 }
2620
2621 false
2622 }
2623
2624 fn read_legacy_arithmetic_into(
2625 &mut self,
2626 content: &mut Option<String>,
2627 segment_start: Position,
2628 ) -> bool {
2629 let mut bracket_depth = 1;
2630
2631 while let Some(c) = self.peek_char() {
2632 match c {
2633 '\\' => {
2634 Self::push_capture_char(content, c);
2635 self.advance();
2636 if let Some(next) = self.peek_char() {
2637 Self::push_capture_char(content, next);
2638 self.advance();
2639 }
2640 }
2641 '\'' => {
2642 Self::push_capture_char(content, c);
2643 self.advance();
2644 while let Some(quoted) = self.peek_char() {
2645 Self::push_capture_char(content, quoted);
2646 self.advance();
2647 if quoted == '\'' {
2648 break;
2649 }
2650 }
2651 }
2652 '"' => {
2653 let mut escaped = false;
2654 Self::push_capture_char(content, c);
2655 self.advance();
2656 while let Some(quoted) = self.peek_char() {
2657 Self::push_capture_char(content, quoted);
2658 self.advance();
2659 if escaped {
2660 escaped = false;
2661 continue;
2662 }
2663 match quoted {
2664 '\\' => escaped = true,
2665 '"' => break,
2666 _ => {}
2667 }
2668 }
2669 }
2670 '`' => {
2671 let mut escaped = false;
2672 Self::push_capture_char(content, c);
2673 self.advance();
2674 while let Some(quoted) = self.peek_char() {
2675 Self::push_capture_char(content, quoted);
2676 self.advance();
2677 if escaped {
2678 escaped = false;
2679 continue;
2680 }
2681 match quoted {
2682 '\\' => escaped = true,
2683 '`' => break,
2684 _ => {}
2685 }
2686 }
2687 }
2688 '[' => {
2689 Self::push_capture_char(content, c);
2690 self.advance();
2691 bracket_depth += 1;
2692 }
2693 ']' => {
2694 Self::push_capture_char(content, c);
2695 self.advance();
2696 bracket_depth -= 1;
2697 if bracket_depth == 0 {
2698 return true;
2699 }
2700 }
2701 '$' => {
2702 Self::push_capture_char(content, c);
2703 self.advance();
2704 if self.peek_char() == Some('(') {
2705 if self.second_char() == Some('(') {
2706 if !self.read_arithmetic_expansion_into(content) {
2707 return false;
2708 }
2709 } else {
2710 Self::push_capture_char(content, '(');
2711 self.advance();
2712 if !self.read_command_subst_into(content) {
2713 return false;
2714 }
2715 }
2716 } else if self.peek_char() == Some('{') {
2717 Self::push_capture_char(content, '{');
2718 self.advance();
2719 if !self.read_param_expansion_into(content, segment_start) {
2720 return false;
2721 }
2722 } else if self.peek_char() == Some('[') {
2723 Self::push_capture_char(content, '[');
2724 self.advance();
2725 if !self.read_legacy_arithmetic_into(content, segment_start) {
2726 return false;
2727 }
2728 }
2729 }
2730 _ => {
2731 Self::push_capture_char(content, c);
2732 self.advance();
2733 }
2734 }
2735 }
2736
2737 false
2738 }
2739
2740 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2744 self.read_command_subst_into_depth(content, 0)
2745 }
2746
2747 fn flush_command_subst_keyword(
2748 current_word: &mut String,
2749 pending_case_headers: &mut usize,
2750 case_clause_depths: &mut SmallVec<[usize; 4]>,
2751 depth: usize,
2752 word_started_at_command_start: &mut bool,
2753 ) {
2754 if current_word.is_empty() {
2755 *word_started_at_command_start = false;
2756 return;
2757 }
2758
2759 match current_word.as_str() {
2760 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2761 "in" if *pending_case_headers > 0 => {
2762 *pending_case_headers -= 1;
2763 case_clause_depths.push(depth);
2764 }
2765 "esac" if *word_started_at_command_start => {
2766 case_clause_depths.pop();
2767 }
2768 _ => {}
2769 }
2770
2771 current_word.clear();
2772 *word_started_at_command_start = false;
2773 }
2774
2775 fn read_command_subst_heredoc_delimiter_into(
2776 &mut self,
2777 content: &mut Option<String>,
2778 ) -> Option<String> {
2779 while let Some(ch) = self.peek_char() {
2780 if !matches!(ch, ' ' | '\t') {
2781 break;
2782 }
2783 Self::push_capture_char(content, ch);
2784 self.advance();
2785 }
2786
2787 let mut cooked = String::new();
2788 let mut in_single = false;
2789 let mut in_double = false;
2790 let mut escaped = false;
2791 let mut saw_any = false;
2792
2793 while let Some(ch) = self.peek_char() {
2794 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2795 break;
2796 }
2797
2798 saw_any = true;
2799 Self::push_capture_char(content, ch);
2800 self.advance();
2801
2802 if escaped {
2803 cooked.push(ch);
2804 escaped = false;
2805 continue;
2806 }
2807
2808 match ch {
2809 '\\' if !in_single => escaped = true,
2810 '\'' if !in_double => in_single = !in_single,
2811 '"' if !in_single => in_double = !in_double,
2812 _ => cooked.push(ch),
2813 }
2814 }
2815
2816 saw_any.then_some(cooked)
2817 }
2818
2819 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2820 Self::push_capture_char(content, '`');
2821 self.advance();
2822 while let Some(ch) = self.peek_char() {
2823 Self::push_capture_char(content, ch);
2824 self.advance();
2825 if ch == '\\' {
2826 if let Some(esc) = self.peek_char() {
2827 Self::push_capture_char(content, esc);
2828 self.advance();
2829 }
2830 continue;
2831 }
2832 if ch == '`' {
2833 break;
2834 }
2835 }
2836 }
2837
2838 fn read_command_subst_pending_heredoc_into(
2839 &mut self,
2840 content: &mut Option<String>,
2841 delimiter: &str,
2842 strip_tabs: bool,
2843 ) -> bool {
2844 loop {
2845 let mut line = String::new();
2846 let mut saw_newline = false;
2847
2848 while let Some(ch) = self.peek_char() {
2849 self.advance();
2850 if ch == '\n' {
2851 saw_newline = true;
2852 break;
2853 }
2854 line.push(ch);
2855 }
2856
2857 Self::push_capture_str(content, &line);
2858 if saw_newline {
2859 Self::push_capture_char(content, '\n');
2860 }
2861
2862 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2863 return true;
2864 }
2865 }
2866 }
2867
2868 fn read_command_subst_into_depth(
2869 &mut self,
2870 content: &mut Option<String>,
2871 subst_depth: usize,
2872 ) -> bool {
2873 if subst_depth >= self.max_subst_depth {
2874 let mut depth = 1;
2876 while let Some(c) = self.peek_char() {
2877 self.advance();
2878 match c {
2879 '(' => depth += 1,
2880 ')' => {
2881 depth -= 1;
2882 if depth == 0 {
2883 Self::push_capture_char(content, ')');
2884 return true;
2885 }
2886 }
2887 _ => {}
2888 }
2889 }
2890 return false;
2891 }
2892
2893 let mut depth = 1;
2894 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2895 let mut pending_case_headers = 0usize;
2896 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2897 let mut current_word = String::with_capacity(16);
2898 let mut at_command_start = true;
2899 let mut expecting_redirection_target = false;
2900 let mut current_word_started_at_command_start = false;
2901 while let Some(c) = self.peek_char() {
2902 match c {
2903 '#' if !self.should_treat_hash_as_word_char() => {
2904 let had_word = !current_word.is_empty();
2905 Self::flush_command_subst_keyword(
2906 &mut current_word,
2907 &mut pending_case_headers,
2908 &mut case_clause_depths,
2909 depth,
2910 &mut current_word_started_at_command_start,
2911 );
2912 if had_word && expecting_redirection_target {
2913 expecting_redirection_target = false;
2914 }
2915 Self::push_capture_char(content, '#');
2916 self.advance();
2917 while let Some(comment_ch) = self.peek_char() {
2918 Self::push_capture_char(content, comment_ch);
2919 self.advance();
2920 if comment_ch == '\n' {
2921 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2922 if !self.read_command_subst_pending_heredoc_into(
2923 content, &delimiter, strip_tabs,
2924 ) {
2925 return false;
2926 }
2927 }
2928 at_command_start = true;
2929 expecting_redirection_target = false;
2930 break;
2931 }
2932 }
2933 }
2934 '(' => {
2935 Self::flush_command_subst_keyword(
2936 &mut current_word,
2937 &mut pending_case_headers,
2938 &mut case_clause_depths,
2939 depth,
2940 &mut current_word_started_at_command_start,
2941 );
2942 depth += 1;
2943 Self::push_capture_char(content, c);
2944 self.advance();
2945 at_command_start = true;
2946 expecting_redirection_target = false;
2947 }
2948 ')' => {
2949 Self::flush_command_subst_keyword(
2950 &mut current_word,
2951 &mut pending_case_headers,
2952 &mut case_clause_depths,
2953 depth,
2954 &mut current_word_started_at_command_start,
2955 );
2956 if case_clause_depths
2957 .last()
2958 .is_some_and(|case_depth| *case_depth == depth)
2959 {
2960 Self::push_capture_char(content, ')');
2961 self.advance();
2962 at_command_start = true;
2963 expecting_redirection_target = false;
2964 continue;
2965 }
2966 depth -= 1;
2967 self.advance();
2968 if depth == 0 {
2969 Self::push_capture_char(content, ')');
2970 return true;
2971 }
2972 Self::push_capture_char(content, c);
2973 at_command_start = false;
2974 expecting_redirection_target = false;
2975 }
2976 '"' => {
2977 let had_word = !current_word.is_empty();
2978 Self::flush_command_subst_keyword(
2979 &mut current_word,
2980 &mut pending_case_headers,
2981 &mut case_clause_depths,
2982 depth,
2983 &mut current_word_started_at_command_start,
2984 );
2985 if had_word && expecting_redirection_target {
2986 expecting_redirection_target = false;
2987 }
2988 Self::push_capture_char(content, '"');
2990 self.advance();
2991 while let Some(qc) = self.peek_char() {
2992 match qc {
2993 '"' => {
2994 Self::push_capture_char(content, '"');
2995 self.advance();
2996 break;
2997 }
2998 '\\' => {
2999 Self::push_capture_char(content, '\\');
3000 self.advance();
3001 if let Some(esc) = self.peek_char() {
3002 Self::push_capture_char(content, esc);
3003 self.advance();
3004 }
3005 }
3006 '$' => {
3007 Self::push_capture_char(content, '$');
3008 self.advance();
3009 if self.peek_char() == Some('(') {
3010 if self.second_char() == Some('(') {
3011 if !self.read_arithmetic_expansion_into(content) {
3012 return false;
3013 }
3014 } else {
3015 Self::push_capture_char(content, '(');
3016 self.advance();
3017 if !self
3018 .read_command_subst_into_depth(content, subst_depth + 1)
3019 {
3020 return false;
3021 }
3022 }
3023 }
3024 }
3025 _ => {
3026 Self::push_capture_char(content, qc);
3027 self.advance();
3028 }
3029 }
3030 }
3031 if expecting_redirection_target {
3032 expecting_redirection_target = false;
3033 } else {
3034 at_command_start = false;
3035 }
3036 }
3037 '\'' => {
3038 let had_word = !current_word.is_empty();
3039 Self::flush_command_subst_keyword(
3040 &mut current_word,
3041 &mut pending_case_headers,
3042 &mut case_clause_depths,
3043 depth,
3044 &mut current_word_started_at_command_start,
3045 );
3046 if had_word && expecting_redirection_target {
3047 expecting_redirection_target = false;
3048 }
3049 Self::push_capture_char(content, '\'');
3051 self.advance();
3052 while let Some(qc) = self.peek_char() {
3053 Self::push_capture_char(content, qc);
3054 self.advance();
3055 if qc == '\'' {
3056 break;
3057 }
3058 }
3059 if expecting_redirection_target {
3060 expecting_redirection_target = false;
3061 } else {
3062 at_command_start = false;
3063 }
3064 }
3065 '`' => {
3066 let had_word = !current_word.is_empty();
3067 Self::flush_command_subst_keyword(
3068 &mut current_word,
3069 &mut pending_case_headers,
3070 &mut case_clause_depths,
3071 depth,
3072 &mut current_word_started_at_command_start,
3073 );
3074 if had_word && expecting_redirection_target {
3075 expecting_redirection_target = false;
3076 }
3077 self.read_command_subst_backtick_segment_into(content);
3078 if expecting_redirection_target {
3079 expecting_redirection_target = false;
3080 } else {
3081 at_command_start = false;
3082 }
3083 }
3084 '$' if self.second_char() == Some('\'') => {
3085 let had_word = !current_word.is_empty();
3086 Self::flush_command_subst_keyword(
3087 &mut current_word,
3088 &mut pending_case_headers,
3089 &mut case_clause_depths,
3090 depth,
3091 &mut current_word_started_at_command_start,
3092 );
3093 if had_word && expecting_redirection_target {
3094 expecting_redirection_target = false;
3095 }
3096 Self::push_capture_char(content, '$');
3097 self.advance();
3098 Self::push_capture_char(content, '\'');
3099 self.advance();
3100 while let Some(qc) = self.peek_char() {
3101 Self::push_capture_char(content, qc);
3102 self.advance();
3103 if qc == '\\' {
3104 if let Some(esc) = self.peek_char() {
3105 Self::push_capture_char(content, esc);
3106 self.advance();
3107 }
3108 continue;
3109 }
3110 if qc == '\'' {
3111 break;
3112 }
3113 }
3114 if expecting_redirection_target {
3115 expecting_redirection_target = false;
3116 } else {
3117 at_command_start = false;
3118 }
3119 }
3120 '\\' => {
3121 let had_word = !current_word.is_empty();
3122 Self::flush_command_subst_keyword(
3123 &mut current_word,
3124 &mut pending_case_headers,
3125 &mut case_clause_depths,
3126 depth,
3127 &mut current_word_started_at_command_start,
3128 );
3129 if had_word && expecting_redirection_target {
3130 expecting_redirection_target = false;
3131 }
3132 Self::push_capture_char(content, '\\');
3133 self.advance();
3134 if let Some(esc) = self.peek_char() {
3135 Self::push_capture_char(content, esc);
3136 self.advance();
3137 }
3138 if expecting_redirection_target {
3139 expecting_redirection_target = false;
3140 } else {
3141 at_command_start = false;
3142 }
3143 }
3144 '<' if self.second_char() == Some('<') => {
3145 let word_was_redirection_fd = current_word_started_at_command_start
3146 && !current_word.is_empty()
3147 && current_word.chars().all(|current| current.is_ascii_digit());
3148 Self::flush_command_subst_keyword(
3149 &mut current_word,
3150 &mut pending_case_headers,
3151 &mut case_clause_depths,
3152 depth,
3153 &mut current_word_started_at_command_start,
3154 );
3155 if word_was_redirection_fd {
3156 at_command_start = true;
3157 }
3158
3159 Self::push_capture_char(content, '<');
3160 self.advance();
3161 Self::push_capture_char(content, '<');
3162 self.advance();
3163
3164 if self.peek_char() == Some('<') {
3165 Self::push_capture_char(content, '<');
3166 self.advance();
3167 expecting_redirection_target = true;
3168 continue;
3169 }
3170
3171 let strip_tabs = if self.peek_char() == Some('-') {
3172 Self::push_capture_char(content, '-');
3173 self.advance();
3174 true
3175 } else {
3176 false
3177 };
3178
3179 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3180 {
3181 pending_heredocs.push((delimiter, strip_tabs));
3182 expecting_redirection_target = false;
3183 } else {
3184 expecting_redirection_target = true;
3185 }
3186 }
3187 '>' | '<' => {
3188 let word_was_redirection_fd = current_word_started_at_command_start
3189 && !current_word.is_empty()
3190 && current_word.chars().all(|current| current.is_ascii_digit());
3191 Self::flush_command_subst_keyword(
3192 &mut current_word,
3193 &mut pending_case_headers,
3194 &mut case_clause_depths,
3195 depth,
3196 &mut current_word_started_at_command_start,
3197 );
3198 if word_was_redirection_fd {
3199 at_command_start = true;
3200 }
3201 Self::push_capture_char(content, c);
3202 self.advance();
3203 expecting_redirection_target = true;
3204 }
3205 '\n' => {
3206 Self::flush_command_subst_keyword(
3207 &mut current_word,
3208 &mut pending_case_headers,
3209 &mut case_clause_depths,
3210 depth,
3211 &mut current_word_started_at_command_start,
3212 );
3213 Self::push_capture_char(content, '\n');
3214 self.advance();
3215 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3216 if !self.read_command_subst_pending_heredoc_into(
3217 content, &delimiter, strip_tabs,
3218 ) {
3219 return false;
3220 }
3221 }
3222 at_command_start = true;
3223 expecting_redirection_target = false;
3224 }
3225 _ => {
3226 if c.is_ascii_alphanumeric() || c == '_' {
3227 if current_word.is_empty()
3228 && !expecting_redirection_target
3229 && at_command_start
3230 {
3231 current_word_started_at_command_start = true;
3232 at_command_start = false;
3233 }
3234 current_word.push(c);
3235 } else {
3236 let had_word = !current_word.is_empty();
3237 Self::flush_command_subst_keyword(
3238 &mut current_word,
3239 &mut pending_case_headers,
3240 &mut case_clause_depths,
3241 depth,
3242 &mut current_word_started_at_command_start,
3243 );
3244 if had_word && expecting_redirection_target {
3245 expecting_redirection_target = false;
3246 }
3247 match c {
3248 ' ' | '\t' => {}
3249 ';' | '|' | '&' => {
3250 at_command_start = true;
3251 expecting_redirection_target = false;
3252 }
3253 _ => {
3254 if !expecting_redirection_target {
3255 at_command_start = false;
3256 }
3257 }
3258 }
3259 }
3260 Self::push_capture_char(content, c);
3261 self.advance();
3262 }
3263 }
3264 }
3265
3266 false
3267 }
3268
3269 fn read_param_expansion_into(
3273 &mut self,
3274 content: &mut Option<String>,
3275 segment_start: Position,
3276 ) -> bool {
3277 let mut borrowable = true;
3278 let mut depth = 1;
3279 let mut literal_brace_depth = 0usize;
3280 let mut in_single = false;
3281 let mut in_double = false;
3282 let mut double_quote_depth = 0usize;
3283 while let Some(c) = self.peek_char() {
3284 if in_single {
3285 match c {
3286 '\\' => {
3287 let escape_start = self.current_position();
3288 if self.second_char() == Some('"') {
3289 self.advance();
3290 borrowable = false;
3291 self.ensure_capture_from_source(content, segment_start, escape_start);
3292 Self::push_capture_char(content, '"');
3293 self.advance();
3294 } else {
3295 Self::push_capture_char(content, '\\');
3296 self.advance();
3297 }
3298 }
3299 '\'' => {
3300 Self::push_capture_char(content, c);
3301 self.advance();
3302 in_single = false;
3303 }
3304 _ => {
3305 Self::push_capture_char(content, c);
3306 self.advance();
3307 }
3308 }
3309 continue;
3310 }
3311
3312 match c {
3313 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3314 self.advance();
3315 Self::push_capture_char(content, '}');
3316 if depth == 1
3317 && literal_brace_depth > 0
3318 && self.has_later_top_level_param_expansion_closer(depth)
3319 {
3320 literal_brace_depth -= 1;
3321 continue;
3322 }
3323 depth -= 1;
3324 if depth == 0 {
3325 break;
3326 }
3327 }
3328 '{' if !in_single && !in_double => {
3329 literal_brace_depth += 1;
3330 Self::push_capture_char(content, '{');
3331 self.advance();
3332 }
3333 '"' => {
3334 Self::push_capture_char(content, '"');
3336 self.advance();
3337 in_double = !in_double;
3338 double_quote_depth = if in_double { depth } else { 0 };
3339 }
3340 '\'' => {
3341 Self::push_capture_char(content, '\'');
3342 self.advance();
3343 if !in_double {
3344 in_single = true;
3345 }
3346 }
3347 '\\' => {
3348 let escape_start = self.current_position();
3351 self.advance();
3352 if let Some(esc) = self.peek_char() {
3353 match esc {
3354 '$' => {
3355 borrowable = false;
3356 self.ensure_capture_from_source(
3357 content,
3358 segment_start,
3359 escape_start,
3360 );
3361 Self::push_capture_char(content, '\x00');
3362 Self::push_capture_char(content, '$');
3363 self.advance();
3364 }
3365 '"' | '\\' | '`' => {
3366 borrowable = false;
3367 self.ensure_capture_from_source(
3368 content,
3369 segment_start,
3370 escape_start,
3371 );
3372 Self::push_capture_char(content, esc);
3373 self.advance();
3374 }
3375 '}' => {
3376 Self::push_capture_char(content, '\\');
3378 Self::push_capture_char(content, '}');
3379 self.advance();
3380 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3381 }
3382 _ => {
3383 Self::push_capture_char(content, '\\');
3384 Self::push_capture_char(content, esc);
3385 self.advance();
3386 }
3387 }
3388 } else {
3389 Self::push_capture_char(content, '\\');
3390 }
3391 }
3392 '$' => {
3393 Self::push_capture_char(content, '$');
3394 self.advance();
3395 if self.peek_char() == Some('(') {
3396 if self.second_char() == Some('(') {
3397 if !self.read_arithmetic_expansion_into(content) {
3398 borrowable = false;
3399 }
3400 } else {
3401 Self::push_capture_char(content, '(');
3402 self.advance();
3403 self.read_command_subst_into(content);
3404 }
3405 } else if self.peek_char() == Some('{') {
3406 Self::push_capture_char(content, '{');
3407 self.advance();
3408 borrowable &= self.read_param_expansion_into(content, segment_start);
3409 }
3410 }
3411 _ => {
3412 Self::push_capture_char(content, c);
3413 self.advance();
3414 }
3415 }
3416 }
3417 borrowable
3418 }
3419
3420 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3421 let mut chars = self.lookahead_chars().peekable();
3422 let mut depth = target_depth;
3423 let mut in_single = false;
3424 let mut in_double = false;
3425 let mut double_quote_depth = 0usize;
3426
3427 while let Some(ch) = chars.next() {
3428 if in_single {
3429 match ch {
3430 '\'' => in_single = false,
3431 '\\' if chars.peek() == Some(&'"') => {
3432 chars.next();
3433 }
3434 '\\' => {}
3435 _ => {}
3436 }
3437 continue;
3438 }
3439
3440 if in_double {
3441 match ch {
3442 '"' => {
3443 in_double = false;
3444 double_quote_depth = 0;
3445 }
3446 '\\' => {
3447 chars.next();
3448 }
3449 '$' if chars.peek() == Some(&'{') => {
3450 chars.next();
3451 depth += 1;
3452 }
3453 '}' if depth > double_quote_depth => {
3454 depth -= 1;
3455 }
3456 _ => {}
3457 }
3458 continue;
3459 }
3460
3461 match ch {
3462 '\n' if depth == target_depth => return false,
3463 '\'' => in_single = true,
3464 '"' => {
3465 in_double = true;
3466 double_quote_depth = depth;
3467 }
3468 '\\' => {
3469 chars.next();
3470 }
3471 '$' if chars.peek() == Some(&'{') => {
3472 chars.next();
3473 depth += 1;
3474 }
3475 '}' => {
3476 if depth == target_depth {
3477 return true;
3478 }
3479 depth -= 1;
3480 }
3481 _ => {}
3482 }
3483 }
3484
3485 false
3486 }
3487
3488 fn looks_like_brace_expansion(&mut self) -> bool {
3494 const MAX_LOOKAHEAD: usize = 10_000;
3495 let brace_ccl_enabled = self.brace_ccl_enabled();
3496
3497 let mut chars = self.lookahead_chars();
3498
3499 if chars.next() != Some('{') {
3501 return false;
3502 }
3503
3504 let mut depth = 1;
3505 let mut paren_depth = 0usize;
3506 let mut has_comma = false;
3507 let mut has_dot_dot = false;
3508 let mut escaped = false;
3509 let mut in_single = false;
3510 let mut in_double = false;
3511 let mut in_backtick = false;
3512 let mut prev_char = None;
3513 let mut scanned = 0usize;
3514
3515 for ch in chars {
3516 scanned += 1;
3517 if scanned > MAX_LOOKAHEAD {
3518 return false;
3519 }
3520
3521 let brace_surface_active = !in_single && !in_double && !in_backtick;
3522 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3523
3524 match ch {
3525 _ if escaped => {
3526 escaped = false;
3527 }
3528 '\\' if !in_single => escaped = true,
3529 '\'' if !in_double && !in_backtick => in_single = !in_single,
3530 '"' if !in_single && !in_backtick => in_double = !in_double,
3531 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3532 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3533 paren_depth += 1
3534 }
3535 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3536 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3537 '}' if !in_single && !in_double && !in_backtick => {
3538 depth -= 1;
3539 if depth == 0 {
3540 return has_comma || has_dot_dot || (brace_ccl_enabled && scanned > 1);
3542 }
3543 }
3544 ',' if at_top_level => has_comma = true,
3545 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3546 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3548 _ => {}
3549 }
3550 prev_char = Some(ch);
3551 }
3552
3553 false
3554 }
3555
3556 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3557 let mut brace_depth = 1usize;
3558 let mut paren_depth = 0usize;
3559 let mut escaped = false;
3560 let mut in_single = false;
3561 let mut in_double = false;
3562 let mut in_backtick = false;
3563 let mut prev_char = None;
3564
3565 while let Some(ch) = self.peek_char() {
3566 Self::push_capture_char(word, ch);
3567 self.advance();
3568
3569 if escaped {
3570 escaped = false;
3571 prev_char = Some(ch);
3572 continue;
3573 }
3574
3575 match ch {
3576 '\\' if !in_single => escaped = true,
3577 '\'' if !in_double && !in_backtick => in_single = !in_single,
3578 '"' if !in_single && !in_backtick => in_double = !in_double,
3579 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3580 '(' if !in_single
3581 && !in_double
3582 && !in_backtick
3583 && (paren_depth > 0 || prev_char == Some('$')) =>
3584 {
3585 paren_depth += 1
3586 }
3587 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3588 paren_depth -= 1
3589 }
3590 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3591 '}' if !in_single && !in_double && !in_backtick => {
3592 brace_depth -= 1;
3593 if brace_depth == 0 {
3594 break;
3595 }
3596 }
3597 _ => {}
3598 }
3599
3600 prev_char = Some(ch);
3601 }
3602 }
3603
3604 fn consume_brace_word_body(&mut self, word: &mut String) {
3605 let mut brace_depth = 1usize;
3606 let mut paren_depth = 0usize;
3607 let mut escaped = false;
3608 let mut in_single = false;
3609 let mut in_double = false;
3610 let mut in_backtick = false;
3611 let mut prev_char = None;
3612
3613 while let Some(ch) = self.peek_char() {
3614 word.push(ch);
3615 self.advance();
3616
3617 if escaped {
3618 escaped = false;
3619 prev_char = Some(ch);
3620 continue;
3621 }
3622
3623 match ch {
3624 '\\' if !in_single => escaped = true,
3625 '\'' if !in_double && !in_backtick => in_single = !in_single,
3626 '"' if !in_single && !in_backtick => in_double = !in_double,
3627 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3628 '(' if !in_single
3629 && !in_double
3630 && !in_backtick
3631 && (paren_depth > 0 || prev_char == Some('$')) =>
3632 {
3633 paren_depth += 1
3634 }
3635 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3636 paren_depth -= 1
3637 }
3638 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3639 '}' if !in_single && !in_double && !in_backtick => {
3640 brace_depth -= 1;
3641 if brace_depth == 0 {
3642 break;
3643 }
3644 }
3645 _ => {}
3646 }
3647
3648 prev_char = Some(ch);
3649 }
3650 }
3651
3652 fn looks_like_mid_word_brace_segment(&self) -> bool {
3655 const MAX_LOOKAHEAD: usize = 10_000;
3656
3657 let mut chars = self.lookahead_chars();
3658 if chars.next() != Some('{') {
3659 return false;
3660 }
3661
3662 let mut brace_depth = 1;
3663 let mut paren_depth = 0usize;
3664 let mut escaped = false;
3665 let mut in_single = false;
3666 let mut in_double = false;
3667 let mut in_backtick = false;
3668 let mut prev_char = None;
3669 let mut scanned = 0usize;
3670
3671 for ch in chars {
3672 scanned += 1;
3673 if scanned > MAX_LOOKAHEAD {
3674 return false;
3675 }
3676
3677 if !in_single
3678 && !in_double
3679 && !in_backtick
3680 && !escaped
3681 && brace_depth == 1
3682 && paren_depth == 0
3683 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3684 {
3685 return false;
3686 }
3687
3688 if escaped {
3689 escaped = false;
3690 prev_char = Some(ch);
3691 continue;
3692 }
3693
3694 match ch {
3695 '\\' => escaped = true,
3696 '\'' if !in_double && !in_backtick => in_single = !in_single,
3697 '"' if !in_single && !in_backtick => in_double = !in_double,
3698 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3699 '(' if !in_single
3700 && !in_double
3701 && !in_backtick
3702 && (paren_depth > 0 || prev_char == Some('$')) =>
3703 {
3704 paren_depth += 1
3705 }
3706 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3707 paren_depth -= 1
3708 }
3709 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3710 '}' if !in_single && !in_double && !in_backtick => {
3711 brace_depth -= 1;
3712 if brace_depth == 0 {
3713 return true;
3714 }
3715 }
3716 _ => {}
3717 }
3718
3719 prev_char = Some(ch);
3720 }
3721
3722 false
3723 }
3724
3725 fn is_brace_group_start(&self) -> bool {
3727 let mut chars = self.lookahead_chars();
3728 if chars.next() != Some('{') {
3730 return false;
3731 }
3732 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3734 }
3735
3736 fn escaped_brace_sequence_looks_like_brace_expansion(&mut self) -> bool {
3739 const MAX_LOOKAHEAD: usize = 10_000;
3740 let brace_ccl_enabled = self.brace_ccl_enabled();
3741
3742 let mut chars = self.lookahead_chars();
3743 let mut depth = 1;
3744 let mut has_comma = false;
3745 let mut has_dot_dot = false;
3746 let mut prev_char = None;
3747 let mut scanned = 0usize;
3748
3749 for ch in chars.by_ref() {
3750 scanned += 1;
3751 if scanned > MAX_LOOKAHEAD {
3752 return false;
3753 }
3754 match ch {
3755 '{' => depth += 1,
3756 '}' => {
3757 depth -= 1;
3758 if depth == 0 {
3759 return has_comma || has_dot_dot || (brace_ccl_enabled && scanned > 1);
3760 }
3761 }
3762 ',' if depth == 1 => has_comma = true,
3763 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3764 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3765 _ => {}
3766 }
3767 prev_char = Some(ch);
3768 }
3769
3770 false
3771 }
3772
3773 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3774 let mut chars = self.lookahead_chars();
3775 if chars.next() != Some('{') {
3776 return false;
3777 }
3778 chars.next() == Some(')')
3779 }
3780
3781 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3783 let mut word = String::with_capacity(16);
3784
3785 if let Some('{') = self.peek_char() {
3786 word.push('{');
3787 self.advance();
3788 } else {
3789 return None;
3790 }
3791
3792 self.consume_brace_word_body(&mut word);
3793
3794 while let Some(ch) = self.peek_char() {
3795 if Self::is_word_char(ch) {
3796 if self.reinject_buf.is_empty() {
3797 let chunk = self.cursor.eat_while(Self::is_word_char);
3798 word.push_str(chunk);
3799 self.advance_scanned_source_bytes(chunk.len());
3800 } else {
3801 word.push(ch);
3802 self.advance();
3803 }
3804 } else {
3805 break;
3806 }
3807 }
3808
3809 Some(LexedToken::owned_word(TokenKind::Word, word))
3810 }
3811
3812 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3814 let mut word = String::with_capacity(16);
3815
3816 if let Some('{') = self.peek_char() {
3818 word.push('{');
3819 self.advance();
3820 } else {
3821 return None;
3822 }
3823
3824 self.consume_brace_word_body(&mut word);
3826
3827 while let Some(ch) = self.peek_char() {
3829 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3830 if ch == '{' {
3831 word.push(ch);
3833 self.advance();
3834 self.consume_brace_word_body(&mut word);
3835 } else {
3836 word.push(ch);
3837 self.advance();
3838 }
3839 } else {
3840 break;
3841 }
3842 }
3843
3844 Some(LexedToken::owned_word(TokenKind::Word, word))
3845 }
3846
3847 fn looks_like_assoc_assign(&self) -> bool {
3851 let mut chars = self.lookahead_chars();
3852 if chars.next() != Some('(') {
3854 return false;
3855 }
3856 for ch in chars {
3858 match ch {
3859 ' ' | '\t' => continue,
3860 '[' => return true,
3861 _ => return false,
3862 }
3863 }
3864 false
3865 }
3866
3867 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3868 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3869 }
3870
3871 fn looks_like_zsh_alternative_glob_suffix(&mut self, prefix: &str) -> bool {
3872 if self.current_zsh_options().is_none()
3873 || self.peek_char() != Some('(')
3874 || !prefix.ends_with('.')
3875 {
3876 return false;
3877 }
3878
3879 let mut chars = self.lookahead_chars();
3880 if chars.next() != Some('(') {
3881 return false;
3882 }
3883
3884 let mut depth = 1usize;
3885 let mut escaped = false;
3886 let mut saw_glob_marker = false;
3887
3888 for ch in chars {
3889 if escaped {
3890 escaped = false;
3891 continue;
3892 }
3893
3894 match ch {
3895 '\\' => escaped = true,
3896 '(' => depth += 1,
3897 ')' => {
3898 depth = depth.saturating_sub(1);
3899 if depth == 0 {
3900 return saw_glob_marker;
3901 }
3902 }
3903 '|' if depth == 1 => {
3904 saw_glob_marker = true;
3905 }
3906 _ => {}
3907 }
3908 }
3909
3910 false
3911 }
3912
3913 fn looks_like_zsh_glob_modifier_suffix(&mut self, prefix: &str) -> bool {
3914 if self.current_zsh_options().is_none()
3915 || self.peek_char() != Some('(')
3916 || !prefix.contains('/')
3917 {
3918 return false;
3919 }
3920
3921 let mut chars = self.lookahead_chars();
3922 matches!((chars.next(), chars.next()), (Some('('), Some(':')))
3923 }
3924
3925 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3926 word.segments().any(|segment| {
3927 matches!(
3928 segment.kind(),
3929 LexedWordSegmentKind::SingleQuoted
3930 | LexedWordSegmentKind::DollarSingleQuoted
3931 | LexedWordSegmentKind::DoubleQuoted
3932 | LexedWordSegmentKind::DollarDoubleQuoted
3933 )
3934 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3935 }
3936
3937 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3938 text.contains(['*', '?'])
3939 || text.ends_with('}') && text.contains("${")
3940 || text.ends_with(']')
3941 && text
3942 .rfind('[')
3943 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3944 }
3945
3946 fn is_word_char(ch: char) -> bool {
3947 !matches!(
3948 ch,
3949 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3950 )
3951 }
3952
3953 const fn is_ascii_word_byte(byte: u8) -> bool {
3954 !matches!(
3955 byte,
3956 b' ' | b'\t'
3957 | b'\n'
3958 | b';'
3959 | b'|'
3960 | b'&'
3961 | b'>'
3962 | b'<'
3963 | b'('
3964 | b')'
3965 | b'{'
3966 | b'}'
3967 | b'\''
3968 | b'"'
3969 )
3970 }
3971
3972 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3973 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3974 }
3975
3976 fn is_plain_word_char(ch: char) -> bool {
3977 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3978 }
3979
3980 pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3982 let mut content = String::with_capacity(64);
3983 let mut current_line = String::with_capacity(64);
3984
3985 let mut rest_of_line = String::with_capacity(32);
3992 let rest_of_line_start = self.current_position();
3993 let mut in_double_quote = false;
3994 let mut in_single_quote = false;
3995 let mut in_comment = false;
3996 let mut saw_non_whitespace_tail = false;
3997 let mut consecutive_backslashes = 0usize;
3998 let mut previous_tail_char = None;
3999 while let Some(ch) = self.peek_char() {
4000 self.advance();
4001 if in_comment {
4002 if ch == '\n' {
4003 break;
4004 }
4005 rest_of_line.push(ch);
4006 previous_tail_char = Some(ch);
4007 continue;
4008 }
4009 if ch == '#'
4010 && !in_single_quote
4011 && !in_double_quote
4012 && self.comments_enabled()
4013 && heredoc_tail_hash_starts_comment(previous_tail_char)
4014 {
4015 in_comment = true;
4016 rest_of_line.push(ch);
4017 previous_tail_char = Some(ch);
4018 consecutive_backslashes = 0;
4019 continue;
4020 }
4021 let backslash_continues_line = ch == '\\'
4022 && !in_single_quote
4023 && self.peek_char() == Some('\n')
4024 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
4025 && consecutive_backslashes.is_multiple_of(2);
4026 if backslash_continues_line {
4027 rest_of_line.push(ch);
4028 rest_of_line.push('\n');
4029 self.advance();
4030 consecutive_backslashes = 0;
4031 continue;
4032 }
4033 if ch == '\n' && !in_double_quote && !in_single_quote {
4034 break;
4035 }
4036 if ch == '"' && !in_single_quote {
4037 in_double_quote = !in_double_quote;
4038 } else if ch == '\'' && !in_double_quote {
4039 in_single_quote = !in_single_quote;
4040 } else if ch == '\\' && in_double_quote {
4041 rest_of_line.push(ch);
4043 if let Some(next) = self.peek_char() {
4044 rest_of_line.push(next);
4045 self.advance();
4046 }
4047 continue;
4048 }
4049 rest_of_line.push(ch);
4050 if !ch.is_whitespace() {
4051 saw_non_whitespace_tail = true;
4052 }
4053 if ch == '\\' && !in_single_quote {
4054 consecutive_backslashes += 1;
4055 } else {
4056 consecutive_backslashes = 0;
4057 }
4058 previous_tail_char = Some(ch);
4059 }
4060
4061 self.sync_offset_to_cursor();
4065 let content_start = self.current_position();
4066 let mut current_line_start = content_start;
4067 let content_end;
4068
4069 loop {
4071 if self.reinject_buf.is_empty() {
4072 self.sync_offset_to_cursor();
4078 let rest = self.cursor.rest();
4079 if rest.is_empty() {
4080 content_end = self.current_position();
4081 break;
4082 }
4083
4084 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
4085 let line = &rest[..line_len];
4086 let has_newline = line_len < rest.len();
4087
4088 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
4089 content_end = current_line_start;
4090 self.consume_source_bytes(line_len);
4091 if has_newline {
4092 self.consume_ascii_chars(1);
4093 }
4094 break;
4095 }
4096
4097 content.push_str(line);
4098 self.consume_source_bytes(line_len);
4099
4100 if has_newline {
4101 self.consume_ascii_chars(1);
4102 content.push('\n');
4103 current_line_start = self.current_position();
4104 continue;
4105 }
4106
4107 content_end = self.current_position();
4108 break;
4109 }
4110
4111 match self.peek_char() {
4112 Some('\n') => {
4113 self.advance();
4114 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4116 content_end = current_line_start;
4117 break;
4118 }
4119 content.push_str(¤t_line);
4120 content.push('\n');
4121 current_line.clear();
4122 current_line_start = self.current_position();
4123 }
4124 Some(ch) => {
4125 current_line.push(ch);
4126 self.advance();
4127 }
4128 None => {
4129 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4131 content_end = current_line_start;
4132 break;
4133 }
4134 if !current_line.is_empty() {
4135 content.push_str(¤t_line);
4136 }
4137 content_end = self.current_position();
4138 break;
4139 }
4140 }
4141 }
4142
4143 let post_heredoc_offset = self.offset;
4148 self.offset = rest_of_line_start.offset;
4149 for ch in rest_of_line.chars() {
4150 self.reinject_buf.push_back(ch);
4151 }
4152 self.reinject_buf.push_back('\n');
4153 self.reinject_resume_offset = Some(post_heredoc_offset);
4154
4155 HeredocRead {
4156 content,
4157 content_span: Span::from_positions(content_start, content_end),
4158 }
4159 }
4160
4161 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4162 let mut chars = self.cursor.rest().chars();
4163 if chars.next() != Some('\n') {
4164 return false;
4165 }
4166
4167 for ch in chars {
4168 if matches!(ch, ' ' | '\t') {
4169 continue;
4170 }
4171 if ch == '\n' {
4172 return false;
4173 }
4174 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4175 || (ch == '#' && self.comments_enabled());
4176 }
4177
4178 false
4179 }
4180}
4181
4182fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4183 let line = if strip_tabs {
4184 line.trim_start_matches('\t')
4185 } else {
4186 line
4187 };
4188
4189 if line == delimiter {
4190 return true;
4191 }
4192
4193 let Some(trailing) = line.strip_prefix(delimiter) else {
4194 return false;
4195 };
4196
4197 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4198}
4199
4200fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4201 previous_tail_char.is_none_or(|prev| {
4202 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4203 })
4204}
4205
4206fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4207 let ch = input.get(index..)?.chars().next()?;
4208 Some((ch, index + ch.len_utf8()))
4209}
4210
4211fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4212 let mut index = 0usize;
4213 let mut depth = 0usize;
4214 let mut in_single = false;
4215 let mut in_double = false;
4216 let mut in_backtick = false;
4217 let mut escaped = false;
4218
4219 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4220 let was_escaped = escaped;
4221 if ch == '\\' && !in_single {
4222 escaped = !escaped;
4223 index = next_index;
4224 continue;
4225 }
4226 escaped = false;
4227
4228 match ch {
4229 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4230 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4231 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4232 '(' if !in_single
4233 && !in_double
4234 && !in_backtick
4235 && !was_escaped
4236 && prefix[next_index..].starts_with('(') =>
4237 {
4238 depth += 1;
4239 index = next_index + '('.len_utf8();
4240 continue;
4241 }
4242 ')' if !in_single
4243 && !in_double
4244 && !in_backtick
4245 && !was_escaped
4246 && prefix[next_index..].starts_with(')') =>
4247 {
4248 depth = depth.saturating_sub(1);
4249 index = next_index + ')'.len_utf8();
4250 continue;
4251 }
4252 _ => {}
4253 }
4254
4255 index = next_index;
4256 }
4257
4258 depth > 0
4259}
4260
4261fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4262 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4263 let prefix = &input[line_start..index];
4264 line_has_unclosed_double_paren(prefix)
4265}
4266
4267fn hash_starts_comment(input: &str, index: usize) -> bool {
4268 if inside_unclosed_double_paren_on_line(input, index) {
4269 return false;
4270 }
4271
4272 let next = &input[index + '#'.len_utf8()..];
4273 input[..index]
4274 .chars()
4275 .next_back()
4276 .is_none_or(|prev| match prev {
4277 '(' => {
4278 let whitespace_index = next.find(char::is_whitespace);
4279 let close_index = next.find(')');
4280
4281 match (whitespace_index, close_index) {
4282 (Some(whitespace), Some(close)) => whitespace < close,
4283 (Some(_), None) | (None, None) => true,
4284 (None, Some(_)) => false,
4285 }
4286 }
4287 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4288 })
4289}
4290
4291fn heredoc_delimiter_is_terminator(
4292 ch: char,
4293 in_single: bool,
4294 in_double: bool,
4295 escaped: bool,
4296) -> bool {
4297 !in_single
4298 && !in_double
4299 && !escaped
4300 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4301}
4302
4303fn scan_double_quoted_command_substitution_segment(
4304 input: &str,
4305 mut index: usize,
4306 subst_depth: usize,
4307) -> Option<usize> {
4308 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4309 match ch {
4310 '"' => return Some(next_index),
4311 '\\' => {
4312 index = next_index;
4313 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4314 index = escaped_next;
4315 }
4316 }
4317 '$' if input[next_index..].starts_with('{') => {
4318 let consumed = scan_command_subst_parameter_expansion_len(
4319 &input[next_index + '{'.len_utf8()..],
4320 subst_depth,
4321 0,
4322 )?;
4323 index = next_index + '{'.len_utf8() + consumed;
4324 }
4325 '$' if input[next_index..].starts_with('(')
4326 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4327 {
4328 let consumed = scan_command_substitution_body_len_inner(
4329 &input[next_index + '('.len_utf8()..],
4330 subst_depth + 1,
4331 )?;
4332 index = next_index + '('.len_utf8() + consumed;
4333 }
4334 _ => index = next_index,
4335 }
4336 }
4337
4338 None
4339}
4340
4341fn scan_command_subst_parameter_expansion_len(
4342 input: &str,
4343 subst_depth: usize,
4344 parameter_depth: usize,
4345) -> Option<usize> {
4346 if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4347 return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4348 }
4349
4350 let mut index = 0usize;
4351 let mut in_single = false;
4352 let mut in_double = false;
4353 let mut in_ansi_c_single = false;
4354 let mut in_backtick = false;
4355 let mut escaped = false;
4356 let mut ansi_c_quote_pending = false;
4357
4358 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4359 let was_escaped = escaped;
4360 if ch == '\\' && !in_single {
4361 escaped = !escaped;
4362 index = next_index;
4363 ansi_c_quote_pending = false;
4364 continue;
4365 }
4366 escaped = false;
4367
4368 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4369 if input[next_index..].starts_with('{')
4370 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4371 &input[next_index + '{'.len_utf8()..],
4372 subst_depth,
4373 parameter_depth + 1,
4374 )
4375 {
4376 index = next_index + '{'.len_utf8() + consumed;
4377 ansi_c_quote_pending = false;
4378 continue;
4379 }
4380
4381 if input[next_index..].starts_with('(')
4382 && !input[next_index + '('.len_utf8()..].starts_with('(')
4383 && let Some(consumed) = scan_command_substitution_body_len_inner(
4384 &input[next_index + '('.len_utf8()..],
4385 subst_depth + 1,
4386 )
4387 {
4388 index = next_index + '('.len_utf8() + consumed;
4389 ansi_c_quote_pending = false;
4390 continue;
4391 }
4392 }
4393
4394 if !in_single
4395 && !in_ansi_c_single
4396 && !in_double
4397 && !in_backtick
4398 && !was_escaped
4399 && matches!(ch, '<' | '>')
4400 && input[next_index..].starts_with('(')
4401 && let Some(consumed) = scan_command_substitution_body_len_inner(
4402 &input[next_index + '('.len_utf8()..],
4403 subst_depth + 1,
4404 )
4405 {
4406 index = next_index + '('.len_utf8() + consumed;
4407 ansi_c_quote_pending = false;
4408 continue;
4409 }
4410
4411 match ch {
4412 '\'' if !in_double && !in_backtick && !was_escaped => {
4413 if in_ansi_c_single {
4414 in_ansi_c_single = false;
4415 } else if !in_single && ansi_c_quote_pending {
4416 in_ansi_c_single = true;
4417 } else {
4418 in_single = !in_single;
4419 }
4420 }
4421 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4422 in_double = !in_double
4423 }
4424 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4425 in_backtick = !in_backtick
4426 }
4427 '}' if !in_single
4428 && !in_ansi_c_single
4429 && !in_double
4430 && !in_backtick
4431 && !was_escaped =>
4432 {
4433 return Some(next_index);
4434 }
4435 _ => {}
4436 }
4437
4438 ansi_c_quote_pending = ch == '$'
4439 && !in_single
4440 && !in_ansi_c_single
4441 && !in_double
4442 && !in_backtick
4443 && !was_escaped;
4444 index = next_index;
4445 }
4446
4447 None
4448}
4449
4450fn scan_command_subst_parameter_expansion_len_balanced(
4451 input: &str,
4452 subst_depth: usize,
4453) -> Option<usize> {
4454 let mut index = 0usize;
4455 let mut brace_depth = 1usize;
4456 let mut in_single = false;
4457 let mut in_double = false;
4458 let mut in_ansi_c_single = false;
4459 let mut in_backtick = false;
4460 let mut escaped = false;
4461 let mut ansi_c_quote_pending = false;
4462
4463 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4464 let was_escaped = escaped;
4465 if ch == '\\' && !in_single {
4466 escaped = !escaped;
4467 index = next_index;
4468 ansi_c_quote_pending = false;
4469 continue;
4470 }
4471 escaped = false;
4472
4473 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4474 if input[next_index..].starts_with('{') {
4475 brace_depth = brace_depth.saturating_add(1);
4476 index = next_index + '{'.len_utf8();
4477 ansi_c_quote_pending = false;
4478 continue;
4479 }
4480
4481 if input[next_index..].starts_with('(')
4482 && !input[next_index + '('.len_utf8()..].starts_with('(')
4483 && let Some(consumed) = scan_command_substitution_body_len_inner(
4484 &input[next_index + '('.len_utf8()..],
4485 subst_depth + 1,
4486 )
4487 {
4488 index = next_index + '('.len_utf8() + consumed;
4489 ansi_c_quote_pending = false;
4490 continue;
4491 }
4492 }
4493
4494 if !in_single
4495 && !in_ansi_c_single
4496 && !in_double
4497 && !in_backtick
4498 && !was_escaped
4499 && matches!(ch, '<' | '>')
4500 && input[next_index..].starts_with('(')
4501 && let Some(consumed) = scan_command_substitution_body_len_inner(
4502 &input[next_index + '('.len_utf8()..],
4503 subst_depth + 1,
4504 )
4505 {
4506 index = next_index + '('.len_utf8() + consumed;
4507 ansi_c_quote_pending = false;
4508 continue;
4509 }
4510
4511 match ch {
4512 '\'' if !in_double && !in_backtick && !was_escaped => {
4513 if in_ansi_c_single {
4514 in_ansi_c_single = false;
4515 } else if !in_single && ansi_c_quote_pending {
4516 in_ansi_c_single = true;
4517 } else {
4518 in_single = !in_single;
4519 }
4520 }
4521 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4522 in_double = !in_double
4523 }
4524 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4525 in_backtick = !in_backtick
4526 }
4527 '}' if !in_single
4528 && !in_ansi_c_single
4529 && !in_double
4530 && !in_backtick
4531 && !was_escaped =>
4532 {
4533 brace_depth = brace_depth.saturating_sub(1);
4534 if brace_depth == 0 {
4535 return Some(next_index);
4536 }
4537 }
4538 _ => {}
4539 }
4540
4541 ansi_c_quote_pending = ch == '$'
4542 && !in_single
4543 && !in_ansi_c_single
4544 && !in_double
4545 && !in_backtick
4546 && !was_escaped;
4547 index = next_index;
4548 }
4549
4550 None
4551}
4552
4553fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4554 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4555 if !matches!(ch, ' ' | '\t') {
4556 break;
4557 }
4558 index = next_index;
4559 }
4560
4561 let start = index;
4562 let mut cooked = String::new();
4563 let mut in_single = false;
4564 let mut in_double = false;
4565 let mut escaped = false;
4566
4567 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4568 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4569 break;
4570 }
4571
4572 index = next_index;
4573 if escaped {
4574 cooked.push(ch);
4575 escaped = false;
4576 continue;
4577 }
4578
4579 match ch {
4580 '\\' if !in_single => escaped = true,
4581 '\'' if !in_double => in_single = !in_single,
4582 '"' if !in_single => in_double = !in_double,
4583 _ => cooked.push(ch),
4584 }
4585 }
4586
4587 (index > start).then_some((index, cooked))
4588}
4589
4590fn skip_command_subst_pending_heredoc(
4591 input: &str,
4592 mut index: usize,
4593 delimiter: &str,
4594 strip_tabs: bool,
4595) -> usize {
4596 while index <= input.len() {
4597 let rest = &input[index..];
4598 let line_len = rest.find('\n').unwrap_or(rest.len());
4599 let line = &rest[..line_len];
4600 let has_newline = line_len < rest.len();
4601
4602 index += line_len;
4603 if has_newline {
4604 index += '\n'.len_utf8();
4605 }
4606
4607 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4608 return index;
4609 }
4610 }
4611
4612 index
4613}
4614
4615fn scan_command_subst_ansi_c_single_quoted_segment(
4616 input: &str,
4617 quote_index: usize,
4618) -> Option<usize> {
4619 let mut index = quote_index + '\''.len_utf8();
4620
4621 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4622 index = next_index;
4623 if ch == '\\' {
4624 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4625 index = escaped_next;
4626 }
4627 continue;
4628 }
4629
4630 if ch == '\'' {
4631 return Some(index);
4632 }
4633 }
4634
4635 None
4636}
4637
4638fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4639 let mut index = start;
4640
4641 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4642 index = next_index;
4643 if ch == '\\' {
4644 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4645 index = escaped_next;
4646 }
4647 continue;
4648 }
4649
4650 if ch == '`' {
4651 return Some(index);
4652 }
4653 }
4654
4655 None
4656}
4657
4658fn flush_scanned_command_subst_keyword(
4659 current_word: &mut String,
4660 pending_case_headers: &mut usize,
4661 case_clause_depths: &mut SmallVec<[usize; 4]>,
4662 depth: usize,
4663 word_started_at_command_start: &mut bool,
4664) {
4665 if current_word.is_empty() {
4666 *word_started_at_command_start = false;
4667 return;
4668 }
4669
4670 match current_word.as_str() {
4671 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4672 "in" if *pending_case_headers > 0 => {
4673 *pending_case_headers -= 1;
4674 case_clause_depths.push(depth);
4675 }
4676 "esac" if *word_started_at_command_start => {
4677 case_clause_depths.pop();
4678 }
4679 _ => {}
4680 }
4681
4682 current_word.clear();
4683 *word_started_at_command_start = false;
4684}
4685
4686pub(super) fn scan_command_substitution_body_len_inner(
4687 input: &str,
4688 subst_depth: usize,
4689) -> Option<usize> {
4690 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4691 return None;
4692 }
4693
4694 let mut index = 0usize;
4695 let mut depth = 1;
4696 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4697 let mut pending_case_headers = 0usize;
4698 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4699 let mut current_word = String::with_capacity(16);
4700 let mut at_command_start = true;
4701 let mut expecting_redirection_target = false;
4702 let mut current_word_started_at_command_start = false;
4703
4704 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4705 match ch {
4706 '#' if hash_starts_comment(input, index) => {
4707 let had_word = !current_word.is_empty();
4708 flush_scanned_command_subst_keyword(
4709 &mut current_word,
4710 &mut pending_case_headers,
4711 &mut case_clause_depths,
4712 depth,
4713 &mut current_word_started_at_command_start,
4714 );
4715 if had_word && expecting_redirection_target {
4716 expecting_redirection_target = false;
4717 }
4718 index = next_index;
4719 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4720 index = comment_next;
4721 if comment_ch == '\n' {
4722 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4723 index = skip_command_subst_pending_heredoc(
4724 input, index, &delimiter, strip_tabs,
4725 );
4726 }
4727 at_command_start = true;
4728 expecting_redirection_target = false;
4729 break;
4730 }
4731 }
4732 }
4733 '(' => {
4734 flush_scanned_command_subst_keyword(
4735 &mut current_word,
4736 &mut pending_case_headers,
4737 &mut case_clause_depths,
4738 depth,
4739 &mut current_word_started_at_command_start,
4740 );
4741 depth += 1;
4742 index = next_index;
4743 at_command_start = true;
4744 expecting_redirection_target = false;
4745 }
4746 ')' => {
4747 flush_scanned_command_subst_keyword(
4748 &mut current_word,
4749 &mut pending_case_headers,
4750 &mut case_clause_depths,
4751 depth,
4752 &mut current_word_started_at_command_start,
4753 );
4754 if case_clause_depths
4755 .last()
4756 .is_some_and(|case_depth| *case_depth == depth)
4757 {
4758 index = next_index;
4759 at_command_start = true;
4760 expecting_redirection_target = false;
4761 continue;
4762 }
4763 depth -= 1;
4764 index = next_index;
4765 if depth == 0 {
4766 return Some(index);
4767 }
4768 at_command_start = false;
4769 expecting_redirection_target = false;
4770 }
4771 '"' => {
4772 let had_word = !current_word.is_empty();
4773 flush_scanned_command_subst_keyword(
4774 &mut current_word,
4775 &mut pending_case_headers,
4776 &mut case_clause_depths,
4777 depth,
4778 &mut current_word_started_at_command_start,
4779 );
4780 if had_word && expecting_redirection_target {
4781 expecting_redirection_target = false;
4782 }
4783 index = scan_double_quoted_command_substitution_segment(
4784 input,
4785 next_index,
4786 subst_depth,
4787 )?;
4788 if expecting_redirection_target {
4789 expecting_redirection_target = false;
4790 } else {
4791 at_command_start = false;
4792 }
4793 }
4794 '\'' => {
4795 let had_word = !current_word.is_empty();
4796 flush_scanned_command_subst_keyword(
4797 &mut current_word,
4798 &mut pending_case_headers,
4799 &mut case_clause_depths,
4800 depth,
4801 &mut current_word_started_at_command_start,
4802 );
4803 if had_word && expecting_redirection_target {
4804 expecting_redirection_target = false;
4805 }
4806 index = next_index;
4807 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4808 index = quoted_next;
4809 if quoted_ch == '\'' {
4810 break;
4811 }
4812 }
4813 if expecting_redirection_target {
4814 expecting_redirection_target = false;
4815 } else {
4816 at_command_start = false;
4817 }
4818 }
4819 '`' => {
4820 let had_word = !current_word.is_empty();
4821 flush_scanned_command_subst_keyword(
4822 &mut current_word,
4823 &mut pending_case_headers,
4824 &mut case_clause_depths,
4825 depth,
4826 &mut current_word_started_at_command_start,
4827 );
4828 if had_word && expecting_redirection_target {
4829 expecting_redirection_target = false;
4830 }
4831 index = scan_command_subst_backtick_segment(input, next_index)?;
4832 if expecting_redirection_target {
4833 expecting_redirection_target = false;
4834 } else {
4835 at_command_start = false;
4836 }
4837 }
4838 '$' if input[next_index..].starts_with('\'') => {
4839 let had_word = !current_word.is_empty();
4840 flush_scanned_command_subst_keyword(
4841 &mut current_word,
4842 &mut pending_case_headers,
4843 &mut case_clause_depths,
4844 depth,
4845 &mut current_word_started_at_command_start,
4846 );
4847 if had_word && expecting_redirection_target {
4848 expecting_redirection_target = false;
4849 }
4850 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4851 if expecting_redirection_target {
4852 expecting_redirection_target = false;
4853 } else {
4854 at_command_start = false;
4855 }
4856 }
4857 '\\' => {
4858 let had_word = !current_word.is_empty();
4859 flush_scanned_command_subst_keyword(
4860 &mut current_word,
4861 &mut pending_case_headers,
4862 &mut case_clause_depths,
4863 depth,
4864 &mut current_word_started_at_command_start,
4865 );
4866 if had_word && expecting_redirection_target {
4867 expecting_redirection_target = false;
4868 }
4869 index = next_index;
4870 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4871 index = escaped_next;
4872 }
4873 if expecting_redirection_target {
4874 expecting_redirection_target = false;
4875 } else {
4876 at_command_start = false;
4877 }
4878 }
4879 '>' => {
4880 let word_was_redirection_fd = current_word_started_at_command_start
4881 && !current_word.is_empty()
4882 && current_word.chars().all(|current| current.is_ascii_digit());
4883 flush_scanned_command_subst_keyword(
4884 &mut current_word,
4885 &mut pending_case_headers,
4886 &mut case_clause_depths,
4887 depth,
4888 &mut current_word_started_at_command_start,
4889 );
4890 if word_was_redirection_fd {
4891 at_command_start = true;
4892 }
4893 index = next_index;
4894 expecting_redirection_target = true;
4895 }
4896 '<' if input[next_index..].starts_with('<') => {
4897 let word_was_redirection_fd = current_word_started_at_command_start
4898 && !current_word.is_empty()
4899 && current_word.chars().all(|current| current.is_ascii_digit());
4900 let had_word = !current_word.is_empty();
4901 flush_scanned_command_subst_keyword(
4902 &mut current_word,
4903 &mut pending_case_headers,
4904 &mut case_clause_depths,
4905 depth,
4906 &mut current_word_started_at_command_start,
4907 );
4908 if had_word && expecting_redirection_target {
4909 expecting_redirection_target = false;
4910 }
4911 if word_was_redirection_fd {
4912 at_command_start = true;
4913 }
4914 if inside_unclosed_double_paren_on_line(input, index) {
4915 index = next_index + '<'.len_utf8();
4916 continue;
4917 }
4918
4919 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4920 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4921 expecting_redirection_target = true;
4922 continue;
4923 }
4924
4925 let strip_tabs = input[next_index..].starts_with("<-");
4926 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4927 if let Some((delimiter_index, delimiter)) =
4928 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4929 {
4930 pending_heredocs.push((delimiter, strip_tabs));
4931 index = delimiter_index;
4932 expecting_redirection_target = false;
4933 } else {
4934 index = next_index;
4935 expecting_redirection_target = true;
4936 }
4937 }
4938 '\n' => {
4939 flush_scanned_command_subst_keyword(
4940 &mut current_word,
4941 &mut pending_case_headers,
4942 &mut case_clause_depths,
4943 depth,
4944 &mut current_word_started_at_command_start,
4945 );
4946 index = next_index;
4947 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4948 index =
4949 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4950 }
4951 at_command_start = true;
4952 expecting_redirection_target = false;
4953 }
4954 '$' if input[next_index..].starts_with('{') => {
4955 let had_word = !current_word.is_empty();
4956 flush_scanned_command_subst_keyword(
4957 &mut current_word,
4958 &mut pending_case_headers,
4959 &mut case_clause_depths,
4960 depth,
4961 &mut current_word_started_at_command_start,
4962 );
4963 if had_word && expecting_redirection_target {
4964 expecting_redirection_target = false;
4965 }
4966 let consumed = scan_command_subst_parameter_expansion_len(
4967 &input[next_index + '{'.len_utf8()..],
4968 subst_depth,
4969 0,
4970 )?;
4971 index = next_index + '{'.len_utf8() + consumed;
4972 if expecting_redirection_target {
4973 expecting_redirection_target = false;
4974 } else {
4975 at_command_start = false;
4976 }
4977 }
4978 '$' if input[next_index..].starts_with('(')
4979 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4980 {
4981 let had_word = !current_word.is_empty();
4982 flush_scanned_command_subst_keyword(
4983 &mut current_word,
4984 &mut pending_case_headers,
4985 &mut case_clause_depths,
4986 depth,
4987 &mut current_word_started_at_command_start,
4988 );
4989 if had_word && expecting_redirection_target {
4990 expecting_redirection_target = false;
4991 }
4992 let consumed = scan_command_substitution_body_len_inner(
4993 &input[next_index + '('.len_utf8()..],
4994 subst_depth + 1,
4995 )?;
4996 index = next_index + '('.len_utf8() + consumed;
4997 if expecting_redirection_target {
4998 expecting_redirection_target = false;
4999 } else {
5000 at_command_start = false;
5001 }
5002 }
5003 _ => {
5004 if ch.is_ascii_alphanumeric() || ch == '_' {
5005 if current_word.is_empty() && !expecting_redirection_target && at_command_start
5006 {
5007 current_word_started_at_command_start = true;
5008 at_command_start = false;
5009 }
5010 current_word.push(ch);
5011 } else {
5012 let had_word = !current_word.is_empty();
5013 flush_scanned_command_subst_keyword(
5014 &mut current_word,
5015 &mut pending_case_headers,
5016 &mut case_clause_depths,
5017 depth,
5018 &mut current_word_started_at_command_start,
5019 );
5020 if had_word && expecting_redirection_target {
5021 expecting_redirection_target = false;
5022 }
5023 match ch {
5024 ' ' | '\t' => {}
5025 ';' | '|' | '&' => {
5026 at_command_start = true;
5027 expecting_redirection_target = false;
5028 }
5029 _ => {
5030 if !expecting_redirection_target {
5031 at_command_start = false;
5032 }
5033 }
5034 }
5035 }
5036 index = next_index;
5037 }
5038 }
5039 }
5040
5041 None
5042}
5043
5044pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
5045 scan_command_substitution_body_len_inner(input, 0)
5046}
5047
5048#[cfg(test)]
5049mod tests {
5050 use super::*;
5051
5052 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
5053 match token.kind {
5054 kind if kind.is_word_like() => token.word_string(),
5055 TokenKind::Comment => token
5056 .span
5057 .slice(source)
5058 .strip_prefix('#')
5059 .map(str::to_string),
5060 TokenKind::Error => token
5061 .error_kind()
5062 .map(LexerErrorKind::message)
5063 .map(str::to_string),
5064 _ => None,
5065 }
5066 }
5067
5068 fn assert_next_token(
5069 lexer: &mut Lexer<'_>,
5070 expected_kind: TokenKind,
5071 expected_text: Option<&str>,
5072 ) {
5073 let token = lexer.next_lexed_token().unwrap();
5074 assert_eq!(token.kind, expected_kind);
5075 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
5076 }
5077
5078 fn assert_next_token_with_comments(
5079 lexer: &mut Lexer<'_>,
5080 expected_kind: TokenKind,
5081 expected_text: Option<&str>,
5082 ) {
5083 let token = lexer.next_lexed_token_with_comments().unwrap();
5084 assert_eq!(token.kind, expected_kind);
5085 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
5086 }
5087
5088 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
5089 let mut lexer = Lexer::new(input);
5090
5091 while let Some(token) = lexer.next_lexed_token() {
5092 if token.kind == TokenKind::Newline {
5093 continue;
5094 }
5095
5096 assert_eq!(
5097 token.span.start.line, token.span.end.line,
5098 "token should stay on one line: {:?}",
5099 token
5100 );
5101 }
5102 }
5103
5104 #[test]
5105 fn test_simple_words() {
5106 let mut lexer = Lexer::new("echo hello world");
5107
5108 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5109 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5110 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5111 assert!(lexer.next_lexed_token().is_none());
5112 }
5113
5114 #[test]
5115 fn test_single_quoted_string() {
5116 let mut lexer = Lexer::new("echo 'hello world'");
5117
5118 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5119 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5121 assert!(lexer.next_lexed_token().is_none());
5122 }
5123
5124 #[test]
5125 fn test_double_quoted_string() {
5126 let mut lexer = Lexer::new("echo \"hello world\"");
5127
5128 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5129 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5130 assert!(lexer.next_lexed_token().is_none());
5131 }
5132
5133 #[test]
5134 fn test_brace_expansion_token_ignores_quoted_closers() {
5135 let mut lexer = Lexer::new("echo {\"}\",a}\n");
5136
5137 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5138 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5139 assert_next_token(&mut lexer, TokenKind::Newline, None);
5140 assert!(lexer.next_lexed_token().is_none());
5141 }
5142
5143 #[test]
5144 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5145 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5146
5147 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5148 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5149 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5150 assert_next_token(&mut lexer, TokenKind::Newline, None);
5151 assert!(lexer.next_lexed_token().is_none());
5152 }
5153
5154 #[test]
5155 fn test_double_quoted_expansion_token_keeps_source_backing() {
5156 let source = r#""$bar""#;
5157 let mut lexer = Lexer::new(source);
5158
5159 let token = lexer.next_lexed_token().unwrap();
5160 assert_eq!(token.kind, TokenKind::QuotedWord);
5161 assert_eq!(token.word_text(), Some("$bar"));
5162
5163 let word = token.word().unwrap();
5164 let segment = word.single_segment().unwrap();
5165 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5166 assert_eq!(segment.span().unwrap().slice(source), "$bar");
5167 }
5168
5169 #[test]
5170 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5171 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5172 let mut lexer = Lexer::new(source);
5173
5174 let token = lexer.next_lexed_token().unwrap();
5175 assert_eq!(token.kind, TokenKind::QuotedWord);
5176 assert_eq!(
5177 token.word_text(),
5178 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5179 );
5180 }
5181
5182 #[test]
5183 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5184 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5185 let mut lexer = Lexer::new(source);
5186
5187 let token = lexer.next_lexed_token().unwrap();
5188 assert_eq!(token.kind, TokenKind::QuotedWord);
5189 assert_eq!(
5190 token.word_text(),
5191 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5192 );
5193 }
5194
5195 #[test]
5196 fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5197 let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5198 let mut lexer = Lexer::new(source);
5199
5200 let token = lexer.next_lexed_token().unwrap();
5201 assert_eq!(token.kind, TokenKind::QuotedWord);
5202 assert_eq!(
5203 token.word_text(),
5204 Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5205 );
5206 }
5207
5208 #[test]
5209 fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5210 let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5211 let mut lexer = Lexer::new(source);
5212
5213 let token = lexer.next_lexed_token().unwrap();
5214 assert_eq!(token.kind, TokenKind::QuotedWord);
5215 assert_eq!(
5216 token.word_text(),
5217 Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5218 );
5219 }
5220
5221 #[test]
5222 fn test_mixed_word_keeps_segment_kinds() {
5223 let source = r#"foo"bar"'baz'"#;
5224 let mut lexer = Lexer::new(source);
5225
5226 let token = lexer.next_lexed_token().unwrap();
5227 assert_eq!(token.kind, TokenKind::Word);
5228
5229 let word = token.word().unwrap();
5230 let segments: Vec<_> = word
5231 .segments()
5232 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5233 .collect();
5234
5235 assert_eq!(
5236 segments,
5237 vec![
5238 (LexedWordSegmentKind::Plain, "foo".to_string()),
5239 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5240 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5241 ]
5242 );
5243 assert_eq!(word.joined_text(), "foobarbaz");
5244 assert_eq!(
5245 word.segments()
5246 .next()
5247 .and_then(LexedWordSegment::span)
5248 .unwrap()
5249 .slice(source),
5250 "foo"
5251 );
5252 }
5253
5254 #[test]
5255 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5256 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5257
5258 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5259 let body = &source[..consumed];
5260
5261 assert!(body.contains("field, direction"));
5262 assert!(body.ends_with(')'));
5263 }
5264
5265 #[test]
5266 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5267 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5268
5269 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5270 let body = &source[..consumed];
5271
5272 assert!(body.contains("printf '%s' y"));
5273 assert!(body.ends_with(')'));
5274 }
5275
5276 #[test]
5277 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5278 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5279
5280 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5281 let body = &source[..consumed];
5282
5283 assert!(body.contains("printf %s 1,2"));
5284 assert!(body.ends_with(')'));
5285 }
5286
5287 #[test]
5288 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5289 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5290
5291 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5292 let body = &source[..consumed];
5293
5294 assert!(body.contains("field, direction"));
5295 assert!(body.ends_with(')'));
5296 }
5297
5298 #[test]
5299 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5300 let source = "printf %s ${x//foo/)},1)\"";
5301
5302 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5303 let body = &source[..consumed];
5304
5305 assert!(body.contains("${x//foo/)},1"));
5306 assert!(body.ends_with(')'));
5307 }
5308
5309 #[test]
5310 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5311 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5312
5313 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5314 let body = &source[..consumed];
5315
5316 assert!(body.contains("printf %s 1,2"));
5317 assert!(body.ends_with(')'));
5318 }
5319
5320 #[test]
5321 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5322 let source = "[[ \"$buf\" == (#b)(*) ]]";
5323 let index = source.find('#').expect("expected hash");
5324
5325 assert!(!hash_starts_comment(source, index));
5326 }
5327
5328 #[test]
5329 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5330 let source = "(#comment with )";
5331 let index = source.find('#').expect("expected hash");
5332
5333 assert!(hash_starts_comment(source, index));
5334 }
5335
5336 #[test]
5337 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5338 let source = "(( #c < 256 ))";
5339 let index = source.find('#').expect("expected hash");
5340
5341 assert!(!hash_starts_comment(source, index));
5342 }
5343
5344 #[test]
5345 fn test_hash_starts_comment_respects_quoted_double_parens() {
5346 let source = "printf '((' # comment";
5347 let index = source.find('#').expect("expected hash");
5348
5349 assert!(hash_starts_comment(source, index));
5350 }
5351
5352 #[test]
5353 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5354 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5355
5356 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5357 let body = &source[..consumed];
5358
5359 assert!(body.contains("printf %s 1,2"));
5360 assert!(body.ends_with(')'));
5361 }
5362
5363 #[test]
5364 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5365 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5366
5367 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5368 let body = &source[..consumed];
5369
5370 assert!(body.contains("printf %s 1,2"));
5371 assert!(body.ends_with(')'));
5372 }
5373
5374 #[test]
5375 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5376 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5377
5378 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5379 let body = &source[..consumed];
5380
5381 assert!(body.contains("printf %s 1,2"));
5382 assert!(body.ends_with(')'));
5383 }
5384
5385 #[test]
5386 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5387 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5388
5389 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5390 let body = &source[..consumed];
5391
5392 assert!(body.contains("printf %s 1,2"));
5393 assert!(body.ends_with("))"));
5394 }
5395
5396 #[test]
5397 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5398 let source = "printf %s 1,2; echo case in)\"";
5399
5400 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5401 let body = &source[..consumed];
5402
5403 assert!(body.contains("echo case in"));
5404 assert!(body.ends_with(')'));
5405 }
5406
5407 #[test]
5408 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5409 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5410
5411 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5412 let body = &source[..consumed];
5413
5414 assert!(body.contains("$'a\\'b'"));
5415 assert!(body.contains("printf %s 1,2"));
5416 assert!(body.ends_with(')'));
5417 }
5418
5419 #[test]
5420 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5421 let source = "printf %s `echo foo)`; printf %s ok)\"";
5422
5423 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5424 let body = &source[..consumed];
5425
5426 assert!(body.contains("`echo foo)`"));
5427 assert!(body.contains("printf %s ok"));
5428 assert!(body.ends_with(')'));
5429 }
5430
5431 #[test]
5432 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5433 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5434
5435 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5436 let body = &source[..consumed];
5437
5438 assert!(body.contains("${x/`echo }`/foo)},1"));
5439 assert!(body.ends_with(')'));
5440 }
5441
5442 #[test]
5443 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5444 {
5445 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5446
5447 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5448 let body = &source[..consumed];
5449
5450 assert!(body.contains("${x/<(echo })/foo)},1"));
5451 assert!(body.ends_with(')'));
5452 }
5453
5454 #[test]
5455 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5456 let source = "printf %s 1,2; echo case in)";
5457
5458 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5459 let body = &source[..consumed];
5460
5461 assert_eq!(body, source);
5462 }
5463
5464 #[test]
5465 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5466 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5467
5468 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5469 let body = &source[..consumed];
5470
5471 assert_eq!(body, source);
5472 }
5473
5474 #[test]
5475 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5476 let source = "printf %s `echo foo)`; printf %s ok)";
5477
5478 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5479 let body = &source[..consumed];
5480
5481 assert_eq!(body, source);
5482 }
5483
5484 #[test]
5485 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5486 let source = "echo \"$line\" | cut -d' ' -f2-)";
5487
5488 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5489 let body = &source[..consumed];
5490
5491 assert_eq!(body, source);
5492 }
5493
5494 #[test]
5495 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5496 let source = "echo \"${@}\" | tr -d '[:space:]')";
5497
5498 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5499 let body = &source[..consumed];
5500
5501 assert_eq!(body, source);
5502 }
5503
5504 #[test]
5505 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5506 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5507
5508 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5509 let body = &source[..consumed];
5510
5511 assert_eq!(body, source);
5512 }
5513
5514 #[test]
5515 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5516 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5517
5518 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5519 let body = &source[..consumed];
5520
5521 assert_eq!(body, source);
5522 }
5523
5524 #[test]
5525 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5526 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5527 let mut lexer = Lexer::new(source);
5528
5529 let first = lexer.next_lexed_token().expect("expected first token");
5530 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5531 assert_eq!(first.word_string().as_deref(), Some("echo"));
5532
5533 let second = lexer.next_lexed_token().expect("expected second token");
5534 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5535 assert_eq!(
5536 second.word_string().as_deref(),
5537 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5538 );
5539 }
5540
5541 #[test]
5542 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5543 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5544 let start = source.find("$(").expect("expected command substitution") + 2;
5545 let consumed =
5546 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5547 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5548 }
5549
5550 #[test]
5551 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5552 let source = "echo $(echo $(basename $filename .fuzz))";
5553 let start = source.find("$(").expect("expected command substitution") + 2;
5554 let consumed =
5555 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5556 assert_eq!(
5557 &source[start..start + consumed],
5558 "echo $(basename $filename .fuzz))"
5559 );
5560 }
5561
5562 #[test]
5563 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5564 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5565 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5566 assert_eq!(consumed, source.len());
5567 }
5568
5569 #[test]
5570 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5571 let source = "'foo'bar";
5572 let mut lexer = Lexer::new(source);
5573
5574 let token = lexer.next_lexed_token().unwrap();
5575 assert_eq!(token.kind, TokenKind::LiteralWord);
5576
5577 let word = token.word().unwrap();
5578 let segments: Vec<_> = word
5579 .segments()
5580 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5581 .collect();
5582
5583 assert_eq!(
5584 segments,
5585 vec![
5586 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5587 (LexedWordSegmentKind::Plain, "bar".to_string()),
5588 ]
5589 );
5590 assert_eq!(word.joined_text(), "foobar");
5591 assert_eq!(
5592 word.segments()
5593 .nth(1)
5594 .and_then(LexedWordSegment::span)
5595 .unwrap()
5596 .slice(source),
5597 "bar"
5598 );
5599 }
5600
5601 #[test]
5602 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5603 let source = "$(printf hi)";
5604 let mut lexer = Lexer::new(source);
5605
5606 let token = lexer.next_lexed_token().unwrap();
5607 assert_eq!(token.kind, TokenKind::Word);
5608
5609 let word = token.word().unwrap();
5610 let segment = word.single_segment().unwrap();
5611 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5612 assert_eq!(segment.as_str(), source);
5613 assert_eq!(segment.span().unwrap().slice(source), source);
5614 }
5615
5616 #[test]
5617 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5618 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5619 let mut lexer = Lexer::new(source);
5620
5621 let token = lexer.next_lexed_token().unwrap();
5622 assert_eq!(token.kind, TokenKind::Word);
5623
5624 let word = token.word().unwrap();
5625 let segment = word.single_segment().unwrap();
5626 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5627 assert_eq!(segment.as_str(), source);
5628 assert_eq!(segment.span().unwrap().slice(source), source);
5629 }
5630
5631 #[test]
5632 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5633 let source = "\"foo\"$(printf hi)";
5634 let mut lexer = Lexer::new(source);
5635
5636 let token = lexer.next_lexed_token().unwrap();
5637 assert_eq!(token.kind, TokenKind::Word);
5638
5639 let word = token.word().unwrap();
5640 let continuation = word.segments().nth(1).unwrap();
5641 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5642 assert_eq!(continuation.as_str(), "$(printf hi)");
5643 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5644 }
5645
5646 #[test]
5647 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5648 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5649 let mut lexer = Lexer::new(source);
5650
5651 let token = lexer.next_lexed_token().unwrap();
5652 assert_eq!(token.kind, TokenKind::QuotedWord);
5653
5654 let word = token.word().unwrap();
5655 let segment = word.single_segment().unwrap();
5656 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5657 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5658 assert_eq!(
5659 segment.span().unwrap().slice(source),
5660 "${arr[$RANDOM % ${#arr[@]}]}"
5661 );
5662 }
5663
5664 #[test]
5665 fn test_ansi_c_control_escape_can_consume_quote() {
5666 let mut lexer = Lexer::new("echo $'\\c''");
5667
5668 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5669 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5670 assert!(lexer.next_lexed_token().is_none());
5671 }
5672
5673 #[test]
5674 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5675 let source = r#"out_line="${out_line//'"'/'\"'}"
5676"#;
5677 let mut lexer = Lexer::new(source);
5678
5679 assert_next_token(
5680 &mut lexer,
5681 TokenKind::Word,
5682 Some(r#"out_line=${out_line//'"'/'"'}"#),
5683 );
5684 assert_next_token(&mut lexer, TokenKind::Newline, None);
5685 assert!(lexer.next_lexed_token().is_none());
5686 }
5687
5688 #[test]
5689 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5690 let source = r#"out_line="${out_line//'"'/'\"'}"
5691echo "Error: Missing python3!"
5692cat << 'EOF' > "${pywrapper}"
5693import os
5694EOF
5695"#;
5696 let mut lexer = Lexer::new(source);
5697
5698 assert_next_token(
5699 &mut lexer,
5700 TokenKind::Word,
5701 Some(r#"out_line=${out_line//'"'/'"'}"#),
5702 );
5703 assert_next_token(&mut lexer, TokenKind::Newline, None);
5704 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5705 assert_next_token(
5706 &mut lexer,
5707 TokenKind::QuotedWord,
5708 Some("Error: Missing python3!"),
5709 );
5710 assert_next_token(&mut lexer, TokenKind::Newline, None);
5711 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5712 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5713 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5714 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5715 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5716 }
5717
5718 #[test]
5719 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5720 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5721 let mut lexer = Lexer::new(source);
5722
5723 let token = lexer.next_lexed_token().unwrap();
5724 assert_eq!(token.kind, TokenKind::Word);
5725 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5726 assert!(token.source_slice(source).is_none());
5727 assert_eq!(
5728 token.word_string().as_deref(),
5729 Some("crypt=${crypt//\\/\\\\}")
5730 );
5731 assert_next_token(&mut lexer, TokenKind::Newline, None);
5732 assert!(lexer.next_lexed_token().is_none());
5733 }
5734
5735 #[test]
5736 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5737 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5738 let mut lexer = Lexer::new(source);
5739
5740 assert_next_token(
5741 &mut lexer,
5742 TokenKind::Word,
5743 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5744 );
5745 assert_next_token(&mut lexer, TokenKind::Newline, None);
5746 assert_next_token(&mut lexer, TokenKind::Newline, None);
5747 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5748 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5749 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5750 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5751 assert_next_token(&mut lexer, TokenKind::Newline, None);
5752 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5753 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5754 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5755 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5756 assert_next_token(&mut lexer, TokenKind::Newline, None);
5757 assert_next_token(
5758 &mut lexer,
5759 TokenKind::Word,
5760 Some(
5761 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5762 ),
5763 );
5764 assert_next_token(&mut lexer, TokenKind::Newline, None);
5765 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5766 assert_next_token(&mut lexer, TokenKind::Newline, None);
5767 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5768 assert_next_token(&mut lexer, TokenKind::Newline, None);
5769 assert!(lexer.next_lexed_token().is_none());
5770 }
5771
5772 #[test]
5773 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5774 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5775 let mut lexer = Lexer::new(source);
5776
5777 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5778 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5779 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5780 assert_next_token(&mut lexer, TokenKind::Newline, None);
5781 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5782 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5783 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5784 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5785 assert_next_token(&mut lexer, TokenKind::Newline, None);
5786 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5787 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5788 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5789 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5790 assert_next_token(&mut lexer, TokenKind::Newline, None);
5791 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5792 assert_next_token(&mut lexer, TokenKind::Newline, None);
5793 assert!(lexer.next_lexed_token().is_none());
5794 }
5795
5796 #[test]
5797 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5798 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5799 let mut lexer = Lexer::new(source);
5800
5801 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5802 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5803 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5804 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5805 assert_next_token(&mut lexer, TokenKind::And, None);
5806 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5807 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5808 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5809 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5810 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5811 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5812 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5813 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5814 assert_next_token(&mut lexer, TokenKind::Newline, None);
5815 assert!(lexer.next_lexed_token().is_none());
5816 }
5817
5818 #[test]
5819 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5820 let source = "echo -{$(echo a),b}-\n";
5821 let mut lexer = Lexer::new(source);
5822
5823 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5824 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5825 assert_next_token(&mut lexer, TokenKind::Newline, None);
5826 assert!(lexer.next_lexed_token().is_none());
5827 }
5828
5829 #[test]
5830 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5831 let source = "echo -{$((1 + 2)),b}-\n";
5832 let mut lexer = Lexer::new(source);
5833
5834 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5835 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5836 assert_next_token(&mut lexer, TokenKind::Newline, None);
5837 assert!(lexer.next_lexed_token().is_none());
5838 }
5839
5840 #[test]
5841 fn test_operators() {
5842 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5843
5844 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5845 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5846 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5847 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5848 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5849 assert_next_token(&mut lexer, TokenKind::And, None);
5850 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5851 assert_next_token(&mut lexer, TokenKind::Or, None);
5852 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5853 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5854 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5855 assert_next_token(&mut lexer, TokenKind::Background, None);
5856 assert!(lexer.next_lexed_token().is_none());
5857 }
5858
5859 #[test]
5860 fn test_double_left_bracket_requires_separator() {
5861 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5862
5863 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5864 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5865 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5866 assert_next_token(&mut lexer, TokenKind::Newline, None);
5867 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5868 assert_next_token(&mut lexer, TokenKind::Newline, None);
5869 assert!(lexer.next_lexed_token().is_none());
5870 }
5871
5872 #[test]
5873 fn test_redirects() {
5874 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5875
5876 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5877 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5878 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5879 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5880 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5881 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5882 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5883 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5884 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5885 let token = lexer.next_lexed_token().unwrap();
5886 assert_eq!(token.kind, TokenKind::Clobber);
5887 assert_eq!(token.fd_value(), Some(2));
5888 assert_eq!(token_text(&token, lexer.input), None);
5889 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5890 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5891 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5892 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5893 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5894 assert_next_token(&mut lexer, TokenKind::HereString, None);
5895 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5896 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5897 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5898 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5899 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5900 }
5901
5902 #[test]
5903 fn test_comment() {
5904 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5905
5906 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5907 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5908 assert_next_token(&mut lexer, TokenKind::Newline, None);
5909 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5910 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5911 }
5912
5913 #[test]
5914 fn test_comment_token_with_span() {
5915 let mut lexer = Lexer::new("# lead\necho hi # tail");
5916
5917 let comment = lexer.next_lexed_token_with_comments().unwrap();
5918 assert_eq!(comment.kind, TokenKind::Comment);
5919 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5920 assert_eq!(comment.span.start.line, 1);
5921 assert_eq!(comment.span.start.column, 1);
5922 assert_eq!(comment.span.end.line, 1);
5923 assert_eq!(comment.span.end.column, 7);
5924
5925 assert_next_token(&mut lexer, TokenKind::Newline, None);
5926 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5927 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5928
5929 let inline = lexer.next_lexed_token_with_comments().unwrap();
5930 assert_eq!(inline.kind, TokenKind::Comment);
5931 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5932 assert_eq!(inline.span.start.line, 2);
5933 assert_eq!(inline.span.start.column, 9);
5934 }
5935
5936 #[test]
5937 fn test_comment_token_preserves_hash_boundaries() {
5938 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5939
5940 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5941 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5942 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5943 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5944 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5945 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5946 assert!(lexer.next_lexed_token_with_comments().is_none());
5947 }
5948
5949 #[test]
5950 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5951 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5952
5953 let mut saw_comment = false;
5954 while let Some(token) = lexer.next_lexed_token_with_comments() {
5955 if token.kind == TokenKind::Comment {
5956 saw_comment = true;
5957 break;
5958 }
5959 }
5960
5961 assert!(
5962 !saw_comment,
5963 "zsh inline glob controls inside [[ ]] should not lex as comments"
5964 );
5965 }
5966
5967 #[test]
5968 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5969 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5970
5971 let mut saw_comment = false;
5972 while let Some(token) = lexer.next_lexed_token_with_comments() {
5973 if token.kind == TokenKind::Comment {
5974 saw_comment = true;
5975 break;
5976 }
5977 }
5978
5979 assert!(
5980 !saw_comment,
5981 "zsh arithmetic char literals inside (( )) should not lex as comments"
5982 );
5983 }
5984
5985 #[test]
5986 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5987 let mut lexer = Lexer::new(
5988 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5989 );
5990
5991 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5992 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5993 assert_next_token(
5994 &mut lexer,
5995 TokenKind::LiteralWord,
5996 Some("\\e]133;C;cmdline_url=%s\\a"),
5997 );
5998 assert_next_token(
5999 &mut lexer,
6000 TokenKind::QuotedWord,
6001 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
6002 );
6003 assert_next_token(&mut lexer, TokenKind::Newline, None);
6004 }
6005
6006 #[test]
6007 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
6008 let mut lexer = Lexer::new(
6009 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
6010 );
6011
6012 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6013 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6014 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
6015 assert_next_token(&mut lexer, TokenKind::Newline, None);
6016 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
6017 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
6018 assert_next_token(
6019 &mut lexer,
6020 TokenKind::LiteralWord,
6021 Some("\\e]133;C;cmdline_url=%s\\a"),
6022 );
6023 assert_next_token(
6024 &mut lexer,
6025 TokenKind::QuotedWord,
6026 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
6027 );
6028 assert_next_token(&mut lexer, TokenKind::Newline, None);
6029 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
6030 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
6031 assert_next_token(&mut lexer, TokenKind::Newline, None);
6032 }
6033
6034 #[test]
6035 fn test_variable_words() {
6036 let mut lexer = Lexer::new("echo $HOME $USER");
6037
6038 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6039 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
6040 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
6041 assert!(lexer.next_lexed_token().is_none());
6042 }
6043
6044 #[test]
6045 fn test_pipeline_tokens() {
6046 let mut lexer = Lexer::new("echo hello | cat");
6047
6048 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6049 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6050 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6051 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6052 assert!(lexer.next_lexed_token().is_none());
6053 }
6054
6055 #[test]
6056 fn test_read_heredoc() {
6057 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
6059 let content = lexer.read_heredoc("EOF", false);
6060 assert_eq!(content.content, "hello\nworld\n");
6061 }
6062
6063 #[test]
6064 fn test_read_heredoc_single_line() {
6065 let mut lexer = Lexer::new("\ntest\nEOF");
6066 let content = lexer.read_heredoc("EOF", false);
6067 assert_eq!(content.content, "test\n");
6068 }
6069
6070 #[test]
6071 fn test_read_heredoc_full_scenario() {
6072 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
6074
6075 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6077 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6078 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6079
6080 let content = lexer.read_heredoc("EOF", false);
6082 assert_eq!(content.content, "hello\nworld\n");
6083 }
6084
6085 #[test]
6086 fn test_read_heredoc_with_redirect() {
6087 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
6089 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6090 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6091 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6092 let content = lexer.read_heredoc("EOF", false);
6093 assert_eq!(content.content, "hello\n");
6094 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6096 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
6097 }
6098
6099 #[test]
6100 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
6101 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
6102 let mut lexer = Lexer::new(source);
6103
6104 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6105 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6106 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6107
6108 let heredoc = lexer.read_heredoc("EOF", false);
6109 assert_eq!(heredoc.content, "hello\n");
6110
6111 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6112 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6113 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6114 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6115 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6116 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6117 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6118 }
6119
6120 #[test]
6121 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6122 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6123 let mut lexer = Lexer::new(source);
6124
6125 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6126 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6127 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6128
6129 let heredoc = lexer.read_heredoc("EOF", false);
6130 assert_eq!(heredoc.content, "1\n2\n3\n");
6131 }
6132
6133 #[test]
6134 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6135 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6136 let mut lexer = Lexer::new(source);
6137
6138 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6139 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6140 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6141
6142 let heredoc = lexer.read_heredoc("EOF", false);
6143 assert_eq!(heredoc.content, "body\n");
6144 }
6145
6146 #[test]
6147 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6148 let source = "cat <<EOF # note \\\nbody\nEOF\n";
6149 let mut lexer = Lexer::new(source);
6150
6151 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6152 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6153 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6154
6155 let heredoc = lexer.read_heredoc("EOF", false);
6156 assert_eq!(heredoc.content, "body\n");
6157 }
6158
6159 #[test]
6160 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6161 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6162 let mut lexer = Lexer::new(source);
6163
6164 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6165 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6166 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6167 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6168
6169 let heredoc = lexer.read_heredoc("EOF", false);
6170 assert_eq!(heredoc.content, "body\n");
6171
6172 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6173 }
6174
6175 #[test]
6176 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6177 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6178 let mut lexer = Lexer::new(source);
6179
6180 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6181 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6182 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6183
6184 let heredoc = lexer.read_heredoc("EOF", false);
6185 assert_eq!(heredoc.content, "1\n");
6186
6187 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6188 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6189 }
6190
6191 #[test]
6192 fn test_read_heredoc_with_redirect_preserves_following_spans() {
6193 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6194 let mut lexer = Lexer::new(source);
6195
6196 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6197 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6198 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6199
6200 let heredoc = lexer.read_heredoc("EOF", false);
6201 assert_eq!(heredoc.content, "hello\n");
6202
6203 let redirect = lexer.next_lexed_token_with_comments().unwrap();
6204 assert_eq!(redirect.kind, TokenKind::RedirectOut);
6205 assert_eq!(redirect.span.slice(source), ">");
6206
6207 let target = lexer.next_lexed_token_with_comments().unwrap();
6208 assert_eq!(target.kind, TokenKind::Word);
6209 assert_eq!(
6210 token_text(&target, lexer.input).as_deref(),
6211 Some("file.txt")
6212 );
6213 assert_eq!(target.span.slice(source), "file.txt");
6214
6215 let newline = lexer.next_lexed_token_with_comments().unwrap();
6216 assert_eq!(newline.kind, TokenKind::Newline);
6217 assert_eq!(newline.span.slice(source), "\n");
6218
6219 let comment = lexer.next_lexed_token_with_comments().unwrap();
6220 assert_eq!(comment.kind, TokenKind::Comment);
6221 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6222 assert_eq!(comment.span.slice(source), "# done");
6223 }
6224
6225 #[test]
6226 fn test_comment_with_unicode() {
6227 let source = "# café résumé\necho ok";
6229 let mut lexer = Lexer::new(source);
6230
6231 let comment = lexer.next_lexed_token_with_comments().unwrap();
6232 assert_eq!(comment.kind, TokenKind::Comment);
6233 assert_eq!(
6234 token_text(&comment, lexer.input).as_deref(),
6235 Some(" café résumé")
6236 );
6237 let start = comment.span.start.offset;
6239 let end = comment.span.end.offset;
6240 assert_eq!(start, 0);
6241 assert_eq!(&source[start..end], "# café résumé");
6242 assert!(source.is_char_boundary(start));
6243 assert!(source.is_char_boundary(end));
6244
6245 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6246 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6247 }
6248
6249 #[test]
6250 fn test_comment_with_cjk_characters() {
6251 let source = "# 你好世界\necho ok";
6253 let mut lexer = Lexer::new(source);
6254
6255 let comment = lexer.next_lexed_token_with_comments().unwrap();
6256 assert_eq!(comment.kind, TokenKind::Comment);
6257 assert_eq!(
6258 token_text(&comment, lexer.input).as_deref(),
6259 Some(" 你好世界")
6260 );
6261 let start = comment.span.start.offset;
6262 let end = comment.span.end.offset;
6263 assert_eq!(&source[start..end], "# 你好世界");
6264 assert!(source.is_char_boundary(start));
6265 assert!(source.is_char_boundary(end));
6266 }
6267
6268 #[test]
6269 fn test_heredoc_with_comments_inside() {
6270 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6272 let mut lexer = Lexer::new(source);
6273
6274 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6275 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6276 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6277
6278 let heredoc = lexer.read_heredoc("EOF", false);
6279 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6280
6281 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6284 let comment = lexer.next_lexed_token_with_comments().unwrap();
6285 assert_eq!(comment.kind, TokenKind::Comment);
6286 assert_eq!(
6287 token_text(&comment, lexer.input).as_deref(),
6288 Some(" real comment")
6289 );
6290 }
6291
6292 #[test]
6293 fn test_heredoc_with_hash_in_variable() {
6294 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6296 let mut lexer = Lexer::new(source);
6297
6298 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6299 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6300 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6301
6302 let heredoc = lexer.read_heredoc("EOF", false);
6303 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6304 }
6305
6306 #[test]
6307 fn test_heredoc_span_does_not_leak() {
6308 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6311 let mut lexer = Lexer::new(source);
6312
6313 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6314 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6315 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6316
6317 let heredoc = lexer.read_heredoc("EOF", false);
6318 let start = heredoc.content_span.start.offset;
6319 let end = heredoc.content_span.end.offset;
6320 assert!(
6321 end <= source.len(),
6322 "heredoc span end ({end}) exceeds source length ({})",
6323 source.len()
6324 );
6325 assert_eq!(&source[start..end], "hello\nworld\n");
6326
6327 assert_next_token(&mut lexer, TokenKind::Newline, None);
6329 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6330 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6331 }
6332
6333 #[test]
6334 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6335 let source = "\
6336cat <<\\_ACEOF
6337Use these variables to override the choices made by `configure' or to help
6338it to find libraries and programs with nonstandard names/locations.
6339_ACEOF
6340ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6341ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6342";
6343 let mut lexer = Lexer::new(source);
6344
6345 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6346 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6347 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6348 assert_eq!(delimiter.kind, TokenKind::Word);
6349 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6350
6351 let heredoc = lexer.read_heredoc("_ACEOF", false);
6352 assert_eq!(
6353 heredoc.content,
6354 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6355 );
6356
6357 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6358
6359 let first = lexer.next_lexed_token_with_comments().unwrap();
6360 assert_eq!(first.kind, TokenKind::Word);
6361 assert_eq!(
6362 first.span.slice(source),
6363 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6364 );
6365 let first_segments = first
6366 .word()
6367 .unwrap()
6368 .segments()
6369 .map(|segment| {
6370 (
6371 segment.kind(),
6372 segment.as_str().to_string(),
6373 segment.span().map(|span| span.slice(source).to_string()),
6374 )
6375 })
6376 .collect::<Vec<_>>();
6377 assert_eq!(
6378 first_segments,
6379 vec![
6380 (
6381 LexedWordSegmentKind::Plain,
6382 "ac_dir_suffix=/".to_string(),
6383 Some("ac_dir_suffix=/".to_string()),
6384 ),
6385 (
6386 LexedWordSegmentKind::Plain,
6387 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6388 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6389 ),
6390 ]
6391 );
6392
6393 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6394
6395 let second = lexer.next_lexed_token_with_comments().unwrap();
6396 assert_eq!(second.kind, TokenKind::Word);
6397 assert_eq!(
6398 second.span.slice(source),
6399 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6400 );
6401 let second_segments = second
6402 .word()
6403 .unwrap()
6404 .segments()
6405 .map(|segment| {
6406 (
6407 segment.kind(),
6408 segment.as_str().to_string(),
6409 segment.span().map(|span| span.slice(source).to_string()),
6410 )
6411 })
6412 .collect::<Vec<_>>();
6413 assert_eq!(
6414 second_segments,
6415 vec![
6416 (
6417 LexedWordSegmentKind::Plain,
6418 "ac_top_builddir_sub=".to_string(),
6419 Some("ac_top_builddir_sub=".to_string()),
6420 ),
6421 (
6422 LexedWordSegmentKind::Plain,
6423 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6424 Some(
6425 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6426 .to_string(),
6427 ),
6428 ),
6429 ]
6430 );
6431 }
6432
6433 #[test]
6434 fn test_heredoc_with_unicode_content() {
6435 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6437 let mut lexer = Lexer::new(source);
6438
6439 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6440 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6441 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6442
6443 let heredoc = lexer.read_heredoc("EOF", false);
6444 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6445 let start = heredoc.content_span.start.offset;
6446 let end = heredoc.content_span.end.offset;
6447 assert!(
6448 source.is_char_boundary(start),
6449 "heredoc span start ({start}) not on char boundary"
6450 );
6451 assert!(
6452 source.is_char_boundary(end),
6453 "heredoc span end ({end}) not on char boundary"
6454 );
6455 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6456 }
6457
6458 #[test]
6459 fn test_assoc_compound_assignment() {
6460 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6463 assert_next_token(
6464 &mut lexer,
6465 TokenKind::Word,
6466 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6467 );
6468 assert!(lexer.next_lexed_token().is_none());
6469 }
6470
6471 #[test]
6472 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6473 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6474 let mut lexer = Lexer::new(source);
6475
6476 let token = lexer.next_lexed_token().unwrap();
6477 assert_eq!(token.kind, TokenKind::Word);
6478 assert_eq!(token.span.slice(source), source);
6479 assert!(lexer.next_lexed_token().is_none());
6480 }
6481
6482 #[test]
6483 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6484 let source = r#"foo\_bar@(baz|qux)"#;
6485 let mut lexer = Lexer::new(source);
6486
6487 let token = lexer.next_lexed_token().unwrap();
6488 assert_eq!(token.kind, TokenKind::Word);
6489 assert_eq!(token.span.slice(source), source);
6490 assert!(lexer.next_lexed_token().is_none());
6491 }
6492
6493 #[test]
6494 fn test_zsh_alternative_glob_after_dot_keeps_suffix_group() {
6495 let source = "file.(txt|doc|pdf)";
6496 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6497 let mut lexer = Lexer::with_profile(source, &profile);
6498
6499 let token = lexer.next_lexed_token().unwrap();
6500 assert_eq!(token.kind, TokenKind::Word);
6501 assert_eq!(token.span.slice(source), source);
6502 assert!(lexer.next_lexed_token().is_none());
6503 }
6504
6505 #[test]
6506 fn test_zsh_path_glob_modifier_keeps_suffix_group() {
6507 let source = "/path/file(:h)";
6508 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6509 let mut lexer = Lexer::with_profile(source, &profile);
6510
6511 let token = lexer.next_lexed_token().unwrap();
6512 assert_eq!(token.kind, TokenKind::Word);
6513 assert_eq!(token.span.slice(source), source);
6514 assert!(lexer.next_lexed_token().is_none());
6515
6516 let mut default_lexer = Lexer::new(source);
6517 let token = default_lexer.next_lexed_token().unwrap();
6518 assert_eq!(token.kind, TokenKind::Word);
6519 assert_eq!(token.span.slice(source), "/path/file");
6520 }
6521
6522 #[test]
6523 fn test_indexed_array_not_collapsed() {
6524 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6527 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6528 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6529 }
6530
6531 #[test]
6532 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6533 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6534 let mut lexer = Lexer::new(source);
6535
6536 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6537 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6538
6539 let token = lexer.next_lexed_token().unwrap();
6540 assert_eq!(token.kind, TokenKind::Word);
6541 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6542
6543 let word = token.word().unwrap();
6544 let segments: Vec<_> = word
6545 .segments()
6546 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6547 .collect();
6548 assert_eq!(
6549 segments,
6550 vec![
6551 (
6552 LexedWordSegmentKind::DoubleQuoted,
6553 "$plugin_dir".to_string()
6554 ),
6555 (LexedWordSegmentKind::Plain, "/*".to_string()),
6556 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6557 ]
6558 );
6559
6560 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6561 assert!(lexer.next_lexed_token().is_none());
6562 }
6563
6564 #[test]
6565 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6566 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6567 let mut lexer = Lexer::new(source);
6568
6569 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6570 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6571
6572 let token = lexer.next_lexed_token().unwrap();
6573 assert_eq!(token.kind, TokenKind::Word);
6574 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6575
6576 let word = token.word().unwrap();
6577 let segments: Vec<_> = word
6578 .segments()
6579 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6580 .collect();
6581 assert_eq!(
6582 segments,
6583 vec![
6584 (
6585 LexedWordSegmentKind::DoubleQuoted,
6586 "$__GREP_CACHE_FILE".to_string()
6587 ),
6588 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6589 ]
6590 );
6591
6592 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6593 assert!(lexer.next_lexed_token().is_none());
6594 }
6595
6596 #[test]
6597 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6598 let source = r#"$dir/${~pats}(N)"#;
6599 let mut lexer = Lexer::new(source);
6600
6601 let token = lexer.next_lexed_token().unwrap();
6602 assert_eq!(token.kind, TokenKind::Word);
6603 assert_eq!(token.span.slice(source), source);
6604 assert!(lexer.next_lexed_token().is_none());
6605 }
6606
6607 #[test]
6608 fn test_dollar_word_does_not_absorb_function_parens() {
6609 let mut lexer = Lexer::new(r#"foo$x()"#);
6610
6611 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6612 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6613 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6614 assert!(lexer.next_lexed_token().is_none());
6615 }
6616
6617 #[test]
6618 fn test_command_substitution_word_does_not_absorb_function_parens() {
6619 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6620
6621 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6622 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6623 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6624 assert!(lexer.next_lexed_token().is_none());
6625 }
6626
6627 #[test]
6630 fn test_digit_at_eof_no_panic() {
6631 let mut lexer = Lexer::new("2");
6633 let token = lexer.next_lexed_token();
6634 assert!(token.is_some());
6635 }
6636
6637 #[test]
6639 fn test_nested_brace_expansion_single_token() {
6640 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6642 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6643 assert!(lexer.next_lexed_token().is_none());
6645 }
6646
6647 #[test]
6649 fn test_simple_brace_expansion_unchanged() {
6650 let mut lexer = Lexer::new("${foo}");
6651 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6652 assert!(lexer.next_lexed_token().is_none());
6653 }
6654
6655 #[test]
6656 fn test_nvm_fixture_lexes_without_stalling() {
6657 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6658 let mut lexer = Lexer::new(input);
6659 let mut tokens = 0usize;
6660
6661 while lexer.next_lexed_token().is_some() {
6662 tokens += 1;
6663 assert!(
6664 tokens < 100_000,
6665 "lexer should continue making progress on the nvm fixture"
6666 );
6667 }
6668
6669 assert!(tokens > 0, "nvm fixture should produce at least one token");
6670 }
6671
6672 #[test]
6673 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6674 let input = concat!(
6675 "case \"${_input_type:-}\" in\n",
6676 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6677 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6678 "esac\n",
6679 );
6680
6681 assert_non_newline_tokens_stay_on_one_line(input);
6682
6683 let mut lexer = Lexer::new(input);
6684 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6685 .map(|token| (token.kind, token_text(&token, input)))
6686 .collect::<Vec<_>>();
6687 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6688 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6689 }
6690
6691 #[test]
6692 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6693 let input = concat!(
6694 "case $2 in\n",
6695 " cygwin*) bin='cygwin32/bin' ;|\n",
6696 "esac\n",
6697 );
6698
6699 let mut lexer = Lexer::new(input);
6700 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6701 .map(|token| (token.kind, token_text(&token, input)))
6702 .collect::<Vec<_>>();
6703
6704 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6705 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6706 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6707 }
6708
6709 #[test]
6710 fn test_inline_if_with_array_append_stays_line_local() {
6711 let input = concat!(
6712 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6713 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6714 );
6715
6716 assert_non_newline_tokens_stay_on_one_line(input);
6717 }
6718
6719 #[test]
6720 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6721 let source = "unsetopt interactive_comments\n#literal\n";
6722 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6723 let mut lexer = Lexer::with_profile(source, &profile);
6724
6725 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6726 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6727 assert_next_token(&mut lexer, TokenKind::Newline, None);
6728 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6729 }
6730
6731 #[test]
6732 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6733 let source = "setopt rc_quotes\nprint 'a''b'\n";
6734 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6735 let mut lexer = Lexer::with_profile(source, &profile);
6736
6737 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6738 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6739 assert_next_token(&mut lexer, TokenKind::Newline, None);
6740 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6741 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6742 }
6743
6744 #[test]
6745 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6746 let source = "setopt ignore_braces\n{ echo }\n";
6747 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6748 let mut lexer = Lexer::with_profile(source, &profile);
6749
6750 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6751 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6752 assert_next_token(&mut lexer, TokenKind::Newline, None);
6753 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6754 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6755 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6756 }
6757
6758 #[test]
6759 fn test_zsh_midfile_setopt_brace_ccl_keeps_adjacent_brace_expansions_in_one_word() {
6760 let source = "setopt brace_ccl\n{ab}{0-2}\n";
6761 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6762 let mut lexer = Lexer::with_profile(source, &profile);
6763
6764 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6765 assert_next_token(&mut lexer, TokenKind::Word, Some("brace_ccl"));
6766 assert_next_token(&mut lexer, TokenKind::Newline, None);
6767 assert_next_token(&mut lexer, TokenKind::Word, Some("{ab}{0-2}"));
6768 }
6769
6770 #[test]
6771 fn test_heredoc_in_arithmetic_fuzz_crash() {
6772 let data: &[u8] = &[
6776 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6777 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6778 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6779 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6780 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6781 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6782 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6783 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6784 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6785 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6786 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6787 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6788 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6789 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6790 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6791 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6792 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6793 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6794 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6795 ];
6796 let input = std::str::from_utf8(data).unwrap();
6797 let script = format!("echo $(({input}))\n");
6798 let _ = crate::parser::Parser::new(&script).parse();
6800 }
6801}